diff -r b1bbe519770b Lib/urllib.py --- a/Lib/urllib.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/urllib.py Wed Feb 13 22:59:32 2013 +0200 @@ -28,6 +28,7 @@ import time import sys import base64 +import re from urlparse import urljoin as basejoin @@ -1198,22 +1199,35 @@ _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a + b, chr(int(a + b, 16))) for a in _hexdig for b in _hexdig) +_asciire = re.compile('([\x00-\x7f]+)') def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') + if _is_unicode(s): + if '%' not in s: + return s + bits = _asciire.split(s) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote(str(bits[i])).decode('latin1')) + append(bits[i + 1]) + return ''.join(res) + + bits = s.split('%') # fastpath - if len(res) == 1: + if len(bits) == 1: return s - s = res[0] - for item in res[1:]: + res = [bits[0]] + append = res.append + for item in bits[1:]: try: - s += _hextochr[item[:2]] + item[2:] + append(_hextochr[item[:2]]) + append(item[2:]) except KeyError: - s += '%' + item - except UnicodeDecodeError: - s += unichr(int(item[:2], 16)) + item[2:] - return s + append('%') + append(item) + return ''.join(res) def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" diff -r b1bbe519770b Lib/urlparse.py --- a/Lib/urlparse.py Wed Feb 13 12:05:14 2013 +0000 +++ b/Lib/urlparse.py Wed Feb 13 22:59:32 2013 +0200 @@ -28,6 +28,8 @@ """ +import re + __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] @@ -311,6 +313,15 @@ else: return url, '' +try: + unicode +except NameError: + def _is_unicode(x): + return 0 +else: + def _is_unicode(x): + return isinstance(x, unicode) + # unquote method for parse_qs and parse_qsl # Cannot use directly from urllib as it would create a circular reference # because urllib uses urlparse methods (urljoin). If you update this function, @@ -319,22 +330,35 @@ _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) +_asciire = re.compile('([\x00-\x7f]+)') def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') + if _is_unicode(s): + if '%' not in s: + return s + bits = _asciire.split(s) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote(str(bits[i])).decode('latin1')) + append(bits[i + 1]) + return ''.join(res) + + bits = s.split('%') # fastpath - if len(res) == 1: + if len(bits) == 1: return s - s = res[0] - for item in res[1:]: + res = [bits[0]] + append = res.append + for item in bits[1:]: try: - s += _hextochr[item[:2]] + item[2:] + append(_hextochr[item[:2]]) + append(item[2:]) except KeyError: - s += '%' + item - except UnicodeDecodeError: - s += unichr(int(item[:2], 16)) + item[2:] - return s + append('%') + append(item) + return ''.join(res) def parse_qs(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument.