Index: Lib/urllib/parse.py =================================================================== --- Lib/urllib/parse.py (revision 64756) +++ Lib/urllib/parse.py (working copy) @@ -259,21 +259,24 @@ return url, '' -_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) -_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) +_hextochr = dict((('%02x' % i).encode('ascii'), bytes([i])) + for i in range(256)) +_hextochr.update((('%02X' % i).encode('ascii'), bytes([i])) + for i in range(256)) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" - res = s.split('%') + # Manipulate as bytes rather than str, so we can + # decode from UTF-8 at the end. + res = s.encode("utf-8").split(b'%') for i in range(1, len(res)): item = res[i] try: res[i] = _hextochr[item[:2]] + item[2:] except KeyError: - res[i] = '%' + item - except UnicodeDecodeError: - res[i] = chr(int(item[:2], 16)) + item[2:] - return "".join(res) + res[i] = b'%' + item + # Replace malformed UTF-8 sequences with '\ufffd' + return b"".join(res).decode("utf-8", "replace") def unquote_plus(s): """unquote('%7e/abc+def') -> '~/abc def'""" @@ -294,12 +297,17 @@ try: return self.cache[c] except KeyError: - if ord(c) < 256: + if ord(c) < 128: res = (c in self.safe) and c or ('%%%02X' % ord(c)) self.cache[c] = res return res else: - return "".join(['%%%02X' % i for i in c.encode("utf-8")]) + if c in self.safe: + res = c + else: + res = "".join(['%%%02X' % i for i in c.encode("utf-8")]) + self.cache[c] = res + return res def quote(s, safe = '/'): """quote('abc def') -> 'abc%20def' Index: Lib/test/test_urllib.py =================================================================== --- Lib/test/test_urllib.py (revision 64756) +++ Lib/test/test_urllib.py (working copy) @@ -465,7 +465,7 @@ def test_unquote_with_unicode(self): r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc') - self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc') + self.assertEqual(r, 'br\u00fcckner_sapporo_20050930.doc') class urlencode_Tests(unittest.TestCase): """Tests for urlencode()"""