diff -r 2dde5a7439fd Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Thu Aug 30 14:56:13 2012 +0000 +++ b/Lib/test/test_urlparse.py Thu Aug 30 13:16:37 2012 -0500 @@ -531,6 +531,12 @@ self.assertEqual(urlparse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + def test_mutablereturntype(self): + # trying to move to a return type that is mutable + tmp = urlparse.urlparse('http://www.example.com/foo/bar/?cheese=whiz#hahaha') + tmp.netloc = 'www.python.com' + self.assertEqual(urlparse.urlunparse(tmp),"http://www.python.com/foo/bar/?cheese=whiz#hahaha") + def test_main(): test_support.run_unittest(UrlParseTestCase) diff -r 2dde5a7439fd Lib/urlparse.py --- a/Lib/urlparse.py Thu Aug 30 14:56:13 2012 +0000 +++ b/Lib/urlparse.py Thu Aug 30 13:16:37 2012 -0500 @@ -39,7 +39,7 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', - 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh'] + 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] @@ -63,6 +63,7 @@ MAX_CACHE_SIZE = 20 _parse_cache = {} + def clear_cache(): """Clear the parse cache.""" _parse_cache.clear() @@ -113,19 +114,52 @@ return port return None -from collections import namedtuple + def __iter__(self): + for attr in self.__slots__: + yield getattr(self, attr) -class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): + def __getitem__(self, k): + # this is messy because we need to handle slices + slots = self.__slots__[k] + if hasattr(slots,'__iter__'): + return [self.__getattribute__(slot) for slot in slots] - __slots__ = () + return self.__getattribute__(slots) + + def __cmp__(self, y): + return cmp(tuple(self), tuple(y)) + + +class SplitResult(ResultMixin): + + # only allow these attribs + __slots__ = ('scheme', 'netloc', 'path', 'query', 'fragment') + + def __init__(self, scheme=None, netloc=None, path=None, query=None, fragment=None): + # this constructor should be compatable with the old one from named_tuple + self.scheme = scheme + self.netloc = netloc + self.path = path + self.query = query + self.fragment = fragment def geturl(self): return urlunsplit(self) -class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): +class ParseResult(ResultMixin): - __slots__ = () + # only allow these attribs + __slots__ = ('scheme', 'netloc', 'path', 'params', 'query', 'fragment') + + def __init__(self, scheme=None, netloc=None, path=None, params=None, query=None, fragment=None): + # this constructor should be compatable with the old one from named_tuple + self.scheme = scheme + self.netloc = netloc + self.path = path + self.params = params + self.query = query + self.fragment = fragment def geturl(self): return urlunparse(self) @@ -145,14 +179,16 @@ params = '' return ParseResult(scheme, netloc, url, params, query, fragment) + def _splitparams(url): - if '/' in url: + if '/' in url: i = url.find(';', url.rfind('/')) if i < 0: return url, '' else: i = url.find(';') - return url[:i], url[i+1:] + return url[:i], url[i + 1:] + def _splitnetloc(url, start=0): delim = len(url) # position of end of domain part of url, default is end @@ -162,6 +198,7 @@ delim = min(delim, wdelim) # use earliest delim position return url[start:delim], url[delim:] # return (domain, rest) + def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: :///?# @@ -173,14 +210,14 @@ cached = _parse_cache.get(key, None) if cached: return cached - if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth + if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: - if url[:i] == 'http': # optimize the common case + if url[:i] == 'http': # optimize the common case scheme = url[:i].lower() - url = url[i+1:] + url = url[i + 1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or @@ -199,7 +236,7 @@ else: # make sure "url" is not actually a port number (in which case # "scheme" is really part of the path) - rest = url[i+1:] + rest = url[i + 1:] if not rest or any(c not in '0123456789' for c in rest): # not a port number scheme, url = url[:i].lower(), rest @@ -217,6 +254,7 @@ _parse_cache[key] = v return v + def urlunparse(data): """Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed @@ -227,6 +265,7 @@ url = "%s;%s" % (url, params) return urlunsplit((scheme, netloc, url, query, fragment)) + def urlunsplit(data): """Combine the elements of a tuple as returned by urlsplit() into a complete URL as a string. The data argument can be any five-item iterable. @@ -245,6 +284,7 @@ url = url + '#' + fragment return url + def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" @@ -284,10 +324,10 @@ n = len(segments) - 1 while i < n: if (segments[i] == '..' - and segments[i-1] not in ('', '..')): - del segments[i-1:i+1] + and segments[i - 1] not in ('', '..')): + del segments[i - 1:i + 1] break - i = i+1 + i = i + 1 else: break if segments == ['', '..']: @@ -297,6 +337,7 @@ return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment)) + def urldefrag(url): """Removes any existing fragment from URL. @@ -317,9 +358,10 @@ # update it also in urllib. This code duplication does not existin in Python3. _hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) +_hextochr = dict((a + b, chr(int(a + b, 16))) for a in _hexdig for b in _hexdig) + def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') @@ -336,6 +378,7 @@ s += unichr(int(item[:2], 16)) + item[2:] return s + def parse_qs(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument. @@ -362,6 +405,7 @@ dict[name] = [value] return dict + def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument. @@ -389,7 +433,7 @@ nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: - raise ValueError, "bad query field: %r" % (name_value,) + raise ValueError("bad query field: %r" % (name_value,)) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') diff -r 2dde5a7439fd Misc/ACKS --- a/Misc/ACKS Thu Aug 30 14:56:13 2012 +0000 +++ b/Misc/ACKS Thu Aug 30 13:16:37 2012 -0500 @@ -872,6 +872,7 @@ Frank J. Tobin R Lindsay Todd Bennett Todd +Ben Toews Matias Torchinsky Sandro Tosi Richard Townsend