Index: Lib/urllib/parse.py =================================================================== --- Lib/urllib/parse.py (revision 81228) +++ Lib/urllib/parse.py (working copy) @@ -307,17 +307,20 @@ """unquote_to_bytes('abc%20def') -> b'abc def'.""" # Note: strings are encoded as UTF-8. This is only an issue if it contains # unescaped non-ASCII characters, which URIs should not. + if not string: + return b'' if isinstance(string, str): string = string.encode('utf-8') res = string.split(b'%') - res[0] = res[0] - for i in range(1, len(res)): - item = res[i] + if len(res) == 1: + return string + string = res[0] + for item in res[1:]: try: - res[i] = bytes([int(item[:2], 16)]) + item[2:] + string += bytes([int(item[:2], 16)]) + item[2:] except ValueError: - res[i] = b'%' + item - return b''.join(res) + string += b'%' + item + return string def unquote(string, encoding='utf-8', errors='replace'): """Replace %xx escapes by their single-character equivalent. The optional @@ -439,6 +442,7 @@ b'abcdefghijklmnopqrstuvwxyz' b'0123456789' b'_.-') +_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) _safe_quoters= {} class Quoter(collections.defaultdict): @@ -451,7 +455,7 @@ # of cached keys don't call Python code at all). def __init__(self, safe): """safe: bytes object.""" - self.safe = _ALWAYS_SAFE.union(c for c in safe if c < 128) + self.safe = _ALWAYS_SAFE.union(safe) def __repr__(self): # Without this, will just display as a defaultdict @@ -493,6 +497,8 @@ errors='strict' (unsupported characters raise a UnicodeEncodeError). """ if isinstance(string, str): + if not string: + return string if encoding is None: encoding = 'utf-8' if errors is None: @@ -527,18 +533,23 @@ not perform string-to-bytes encoding. It always returns an ASCII string. quote_from_bytes(b'abc def\xab') -> 'abc%20def%AB' """ + if not isinstance(bs, (bytes, bytearray)): + raise TypeError("quote_from_bytes() expected bytes") + if not bs: + return '' if isinstance(safe, str): # Normalize 'safe' by converting to bytes and removing non-ASCII chars safe = safe.encode('ascii', 'ignore') - cachekey = bytes(safe) # In case it was a bytearray - if not (isinstance(bs, bytes) or isinstance(bs, bytearray)): - raise TypeError("quote_from_bytes() expected a bytes") + else: + safe = bytes([c for c in safe if c < 128]) + if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): + return bs.decode() try: - quoter = _safe_quoters[cachekey] + quoter = _safe_quoters[safe] except KeyError: - quoter = Quoter(safe) - _safe_quoters[cachekey] = quoter - return ''.join([quoter[char] for char in bs]) + quoter = Quoter(safe).__getitem__ + _safe_quoters[safe] = quoter + return ''.join([quoter(char) for char in bs]) def urlencode(query, doseq=False): """Encode a sequence of two-element tuples or dictionary into a URL query string.