Index: Doc/library/urllib.rst =================================================================== --- Doc/library/urllib.rst (revision 78945) +++ Doc/library/urllib.rst (working copy) @@ -225,8 +225,19 @@ Replace ``%xx`` escapes by their single-character equivalent. + *string* may be either a :class:`str` or a :class:`unicode`. + + If it is a :class:`unicode`, the result is also a :class:`unicode`. + Percent-escaped octets are decoded as Latin-1. (For example, the string + ``u'%FC'`` describes the Unicode character ``u'\00fc'``, rather than the + byte ``'\xfc'``.) To interpret the escaped characters with another + encoding, *string* should first be encoded as a :class:`str`, unquoted, and + then decoded with the desired encoding. + + Example: ``unquote('/%7Econnolly/')`` yields ``'/~connolly/'``. + Example: ``unquote(u'/El%20Ni%F1o/')`` yields ``u'/El NiƱo/'``. .. function:: unquote_plus(string) Index: Lib/test/test_urllib.py =================================================================== --- Lib/test/test_urllib.py (revision 78945) +++ Lib/test/test_urllib.py (working copy) @@ -463,9 +463,35 @@ self.assertEqual(expect, result, "using unquote_plus(): %s != %s" % (expect, result)) + def test_unquote_nonascii_bytes(self): + # Test on escaped non-ASCII bytes + given = '%A2%D8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + # Test on unescaped non-ASCII bytes + # (Technically an invalid URI; expect those bytes to be preserved) + given = b'%A2\xd8ab%FF' + expect = b'\xa2\xd8ab\xff' + result = urllib.unquote(given) + self.assertEqual(expect, result, "using unquote(): %r != %r" + % (expect, result)) + def test_unquote_with_unicode(self): - r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc') - self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc') + # Percent-escaped characters in unicode strings are decoded with + # Latin-1. (This is not ideal, but necessary for historical reasons -- + # see issue #8136). + r = urllib.unquote(u'br%FCckner_sapporo_20050930.doc') + self.assertEqual(r, u'br\u00fcckner_sapporo_20050930.doc') + # Test on a string with unescaped non-ASCII characters + # (Technically an invalid URI; expect those characters to be UTF-8 + # encoded). + given = u'\u6f22%FC%EA' + result = urllib.unquote(given) + expect = u'\u6f22\xfc\xea' + self.assertEqual(expect, result, + "using unquote(): %r != %r" % (expect, result)) class urlencode_Tests(unittest.TestCase): """Tests for urlencode()"""