Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(150866)

Delta Between Two Patch Sets: Lib/test/test_codecs.py

Issue 23001: Accept mutable bytes-like objects
Left Patch Set: Created 5 years ago
Right Patch Set: Created 4 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 import codecs 1 import codecs
2 import contextlib 2 import contextlib
3 import io 3 import io
4 import locale 4 import locale
5 import sys 5 import sys
6 import unittest 6 import unittest
7 import warnings 7 import warnings
8 import encodings 8 import encodings
9 9
10 from test import support 10 from test import support
(...skipping 360 matching lines...) Expand 10 before | Expand all | Expand 10 after
371 self.assertEqual(test_string.encode(self.encoding, 371 self.assertEqual(test_string.encode(self.encoding,
372 "surrogatepass"), 372 "surrogatepass"),
373 test_sequence) 373 test_sequence)
374 self.assertEqual(test_sequence.decode(self.encoding, 374 self.assertEqual(test_sequence.decode(self.encoding,
375 "surrogatepass"), 375 "surrogatepass"),
376 test_string) 376 test_string)
377 self.assertEqual(test_sequence.decode(self.encoding, "ignore"), 377 self.assertEqual(test_sequence.decode(self.encoding, "ignore"),
378 before + after) 378 before + after)
379 self.assertEqual(test_sequence.decode(self.encoding, "replace"), 379 self.assertEqual(test_sequence.decode(self.encoding, "replace"),
380 before + self.ill_formed_sequence_replace + after) 380 before + self.ill_formed_sequence_replace + after)
381 backslashreplace = ''.join('\\x%02x' % b
382 for b in self.ill_formed_sequence)
383 self.assertEqual(test_sequence.decode(self.encoding, "backslashrepla ce"),
384 before + backslashreplace + after)
381 385
382 class UTF32Test(ReadTest, unittest.TestCase): 386 class UTF32Test(ReadTest, unittest.TestCase):
383 encoding = "utf-32" 387 encoding = "utf-32"
384 if sys.byteorder == 'little': 388 if sys.byteorder == 'little':
385 ill_formed_sequence = b"\x80\xdc\x00\x00" 389 ill_formed_sequence = b"\x80\xdc\x00\x00"
386 else: 390 else:
387 ill_formed_sequence = b"\x00\x00\xdc\x80" 391 ill_formed_sequence = b"\x00\x00\xdc\x80"
388 392
389 spamle = (b'\xff\xfe\x00\x00' 393 spamle = (b'\xff\xfe\x00\x00'
390 b's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00' 394 b's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
(...skipping 743 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 1138
1135 class RecodingTest(unittest.TestCase): 1139 class RecodingTest(unittest.TestCase):
1136 def test_recoding(self): 1140 def test_recoding(self):
1137 f = io.BytesIO() 1141 f = io.BytesIO()
1138 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8") 1142 f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
1139 f2.write("a") 1143 f2.write("a")
1140 f2.close() 1144 f2.close()
1141 # Python used to crash on this at exit because of a refcount 1145 # Python used to crash on this at exit because of a refcount
1142 # bug in _codecsmodule.c 1146 # bug in _codecsmodule.c
1143 1147
1148 self.assertTrue(f.closed)
1149
1144 # From RFC 3492 1150 # From RFC 3492
1145 punycode_testcases = [ 1151 punycode_testcases = [
1146 # A Arabic (Egyptian): 1152 # A Arabic (Egyptian):
1147 ("\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1153 ("\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1148 "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F", 1154 "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
1149 b"egbpdaj6bu4bxfgehfvwxn"), 1155 b"egbpdaj6bu4bxfgehfvwxn"),
1150 # B Chinese (simplified): 1156 # B Chinese (simplified):
1151 ("\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587", 1157 ("\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
1152 b"ihqwcrb4cv8a8dqg056pqjye"), 1158 b"ihqwcrb4cv8a8dqg056pqjye"),
1153 # C Chinese (traditional): 1159 # C Chinese (traditional):
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
1292 self.assertEqual(uni, internal.decode("unicode_internal")) 1298 self.assertEqual(uni, internal.decode("unicode_internal"))
1293 for internal in not_ok: 1299 for internal in not_ok:
1294 if sys.byteorder == "little": 1300 if sys.byteorder == "little":
1295 internal = bytes(reversed(internal)) 1301 internal = bytes(reversed(internal))
1296 with support.check_warnings(('unicode_internal codec has been ' 1302 with support.check_warnings(('unicode_internal codec has been '
1297 'deprecated', DeprecationWarning)): 1303 'deprecated', DeprecationWarning)):
1298 self.assertRaises(UnicodeDecodeError, internal.decode, 1304 self.assertRaises(UnicodeDecodeError, internal.decode,
1299 "unicode_internal") 1305 "unicode_internal")
1300 if sys.byteorder == "little": 1306 if sys.byteorder == "little":
1301 invalid = b"\x00\x00\x11\x00" 1307 invalid = b"\x00\x00\x11\x00"
1308 invalid_backslashreplace = r"\x00\x00\x11\x00"
1302 else: 1309 else:
1303 invalid = b"\x00\x11\x00\x00" 1310 invalid = b"\x00\x11\x00\x00"
1311 invalid_backslashreplace = r"\x00\x11\x00\x00"
1304 with support.check_warnings(): 1312 with support.check_warnings():
1305 self.assertRaises(UnicodeDecodeError, 1313 self.assertRaises(UnicodeDecodeError,
1306 invalid.decode, "unicode_internal") 1314 invalid.decode, "unicode_internal")
1307 with support.check_warnings(): 1315 with support.check_warnings():
1308 self.assertEqual(invalid.decode("unicode_internal", "replace"), 1316 self.assertEqual(invalid.decode("unicode_internal", "replace"),
1309 '\ufffd') 1317 '\ufffd')
1318 with support.check_warnings():
1319 self.assertEqual(invalid.decode("unicode_internal", "backslashreplac e"),
1320 invalid_backslashreplace)
1310 1321
1311 @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t') 1322 @unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
1312 def test_decode_error_attributes(self): 1323 def test_decode_error_attributes(self):
1313 try: 1324 try:
1314 with support.check_warnings(('unicode_internal codec has been ' 1325 with support.check_warnings(('unicode_internal codec has been '
1315 'deprecated', DeprecationWarning)): 1326 'deprecated', DeprecationWarning)):
1316 b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") 1327 b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
1317 except UnicodeDecodeError as ex: 1328 except UnicodeDecodeError as ex:
1318 self.assertEqual("unicode_internal", ex.encoding) 1329 self.assertEqual("unicode_internal", ex.encoding)
1319 self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) 1330 self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after
1586 encoder = codecs.getincrementalencoder("idna")() 1597 encoder = codecs.getincrementalencoder("idna")()
1587 self.assertEqual(encoder.encode("\xe4x"), b"") 1598 self.assertEqual(encoder.encode("\xe4x"), b"")
1588 self.assertEqual(encoder.encode("ample.org"), b"xn--xample-9ta.") 1599 self.assertEqual(encoder.encode("ample.org"), b"xn--xample-9ta.")
1589 self.assertEqual(encoder.encode("", True), b"org") 1600 self.assertEqual(encoder.encode("", True), b"org")
1590 1601
1591 encoder.reset() 1602 encoder.reset()
1592 self.assertEqual(encoder.encode("\xe4x"), b"") 1603 self.assertEqual(encoder.encode("\xe4x"), b"")
1593 self.assertEqual(encoder.encode("ample.org."), b"xn--xample-9ta.org.") 1604 self.assertEqual(encoder.encode("ample.org."), b"xn--xample-9ta.org.")
1594 self.assertEqual(encoder.encode("", True), b"") 1605 self.assertEqual(encoder.encode("", True), b"")
1595 1606
1607 def test_errors(self):
1608 """Only supports "strict" error handler"""
1609 "python.org".encode("idna", "strict")
1610 b"python.org".decode("idna", "strict")
1611 for errors in ("ignore", "replace", "backslashreplace",
1612 "surrogateescape"):
1613 self.assertRaises(Exception, "python.org".encode, "idna", errors)
1614 self.assertRaises(Exception,
1615 b"python.org".decode, "idna", errors)
1616
1596 class CodecsModuleTest(unittest.TestCase): 1617 class CodecsModuleTest(unittest.TestCase):
1597 1618
1598 def test_decode(self): 1619 def test_decode(self):
1599 self.assertEqual(codecs.decode(b'\xe4\xf6\xfc', 'latin-1'), 1620 self.assertEqual(codecs.decode(b'\xe4\xf6\xfc', 'latin-1'),
1600 '\xe4\xf6\xfc') 1621 '\xe4\xf6\xfc')
1601 self.assertRaises(TypeError, codecs.decode) 1622 self.assertRaises(TypeError, codecs.decode)
1602 self.assertEqual(codecs.decode(b'abc'), 'abc') 1623 self.assertEqual(codecs.decode(b'abc'), 'abc')
1603 self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii') 1624 self.assertRaises(UnicodeDecodeError, codecs.decode, b'\xff', 'ascii')
1604 1625
1605 # test keywords 1626 # test keywords
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
1652 # because 'I' is lowercased as "ı" (dotless i) 1673 # because 'I' is lowercased as "ı" (dotless i)
1653 oldlocale = locale.setlocale(locale.LC_CTYPE) 1674 oldlocale = locale.setlocale(locale.LC_CTYPE)
1654 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) 1675 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
1655 try: 1676 try:
1656 locale.setlocale(locale.LC_CTYPE, 'tr_TR') 1677 locale.setlocale(locale.LC_CTYPE, 'tr_TR')
1657 except locale.Error: 1678 except locale.Error:
1658 # Unsupported locale on this system 1679 # Unsupported locale on this system
1659 self.skipTest('test needs Turkish locale') 1680 self.skipTest('test needs Turkish locale')
1660 c = codecs.lookup('ASCII') 1681 c = codecs.lookup('ASCII')
1661 self.assertEqual(c.name, 'ascii') 1682 self.assertEqual(c.name, 'ascii')
1683
1684 def test_all(self):
1685 api = (
1686 "encode", "decode",
1687 "register", "CodecInfo", "Codec", "IncrementalEncoder",
1688 "IncrementalDecoder", "StreamReader", "StreamWriter", "lookup",
1689 "getencoder", "getdecoder", "getincrementalencoder",
1690 "getincrementaldecoder", "getreader", "getwriter",
1691 "register_error", "lookup_error",
1692 "strict_errors", "replace_errors", "ignore_errors",
1693 "xmlcharrefreplace_errors", "backslashreplace_errors",
1694 "namereplace_errors",
1695 "open", "EncodedFile",
1696 "iterencode", "iterdecode",
1697 "BOM", "BOM_BE", "BOM_LE",
1698 "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_BE", "BOM_UTF16_LE",
1699 "BOM_UTF32", "BOM_UTF32_BE", "BOM_UTF32_LE",
1700 "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", # Undocumented
1701 "StreamReaderWriter", "StreamRecoder",
1702 )
1703 self.assertCountEqual(api, codecs.__all__)
1704 for api in codecs.__all__:
1705 getattr(codecs, api)
1706
1707 def test_open(self):
1708 self.addCleanup(support.unlink, support.TESTFN)
1709 for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'):
1710 with self.subTest(mode), \
1711 codecs.open(support.TESTFN, mode, 'ascii') as file:
1712 self.assertIsInstance(file, codecs.StreamReaderWriter)
1713
1714 def test_undefined(self):
1715 self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
1716 self.assertRaises(UnicodeError, codecs.decode, b'abc', 'undefined')
1717 self.assertRaises(UnicodeError, codecs.encode, '', 'undefined')
1718 self.assertRaises(UnicodeError, codecs.decode, b'', 'undefined')
1719 for errors in ('strict', 'ignore', 'replace', 'backslashreplace'):
1720 self.assertRaises(UnicodeError,
1721 codecs.encode, 'abc', 'undefined', errors)
1722 self.assertRaises(UnicodeError,
1723 codecs.decode, b'abc', 'undefined', errors)
1662 1724
1663 class StreamReaderTest(unittest.TestCase): 1725 class StreamReaderTest(unittest.TestCase):
1664 1726
1665 def setUp(self): 1727 def setUp(self):
1666 self.reader = codecs.getreader('utf-8') 1728 self.reader = codecs.getreader('utf-8')
1667 self.stream = io.BytesIO(b'\xed\x95\x9c\n\xea\xb8\x80') 1729 self.stream = io.BytesIO(b'\xed\x95\x9c\n\xea\xb8\x80')
1668 1730
1669 def test_readlines(self): 1731 def test_readlines(self):
1670 f = self.reader(self.stream) 1732 f = self.reader(self.stream)
1671 self.assertEqual(f.readlines(), ['\ud55c\n', '\uae00']) 1733 self.assertEqual(f.readlines(), ['\ud55c\n', '\uae00'])
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
1786 ] 1848 ]
1787 1849
1788 if hasattr(codecs, "mbcs_encode"): 1850 if hasattr(codecs, "mbcs_encode"):
1789 all_unicode_encodings.append("mbcs") 1851 all_unicode_encodings.append("mbcs")
1790 1852
1791 # The following encoding is not tested, because it's not supposed 1853 # The following encoding is not tested, because it's not supposed
1792 # to work: 1854 # to work:
1793 # "undefined" 1855 # "undefined"
1794 1856
1795 # The following encodings don't work in stateful mode 1857 # The following encodings don't work in stateful mode
1796 broken_unicode_with_streams = [ 1858 broken_unicode_with_stateful = [
1797 "punycode", 1859 "punycode",
1798 "unicode_internal" 1860 "unicode_internal"
1799 ]
1800 broken_incremental_coders = broken_unicode_with_streams + [
1801 "idna",
1802 ] 1861 ]
1803 1862
1804 class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling): 1863 class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
1805 def test_basics(self): 1864 def test_basics(self):
1806 s = "abc123" # all codecs should be able to encode these 1865 s = "abc123" # all codecs should be able to encode these
1807 for encoding in all_unicode_encodings: 1866 for encoding in all_unicode_encodings:
1808 name = codecs.lookup(encoding).name 1867 name = codecs.lookup(encoding).name
1809 if encoding.endswith("_codec"): 1868 if encoding.endswith("_codec"):
1810 name += "_codec" 1869 name += "_codec"
1811 elif encoding == "latin_1": 1870 elif encoding == "latin_1":
1812 name = "latin_1" 1871 name = "latin_1"
1813 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) 1872 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
1814 1873
1815 with support.check_warnings(): 1874 with support.check_warnings():
1816 # unicode-internal has been deprecated 1875 # unicode-internal has been deprecated
1817 (b, size) = codecs.getencoder(encoding)(s) 1876 (b, size) = codecs.getencoder(encoding)(s)
1818 self.assertEqual(size, len(s), "encoding=%r" % encoding) 1877 self.assertEqual(size, len(s), "encoding=%r" % encoding)
1819 (chars, size) = codecs.getdecoder(encoding)(b) 1878 (chars, size) = codecs.getdecoder(encoding)(b)
1820 self.assertEqual(chars, s, "encoding=%r" % encoding) 1879 self.assertEqual(chars, s, "encoding=%r" % encoding)
1821 1880
1822 if encoding not in broken_unicode_with_streams: 1881 if encoding not in broken_unicode_with_stateful:
1823 # check stream reader/writer 1882 # check stream reader/writer
1824 q = Queue(b"") 1883 q = Queue(b"")
1825 writer = codecs.getwriter(encoding)(q) 1884 writer = codecs.getwriter(encoding)(q)
1826 encodedresult = b"" 1885 encodedresult = b""
1827 for c in s: 1886 for c in s:
1828 writer.write(c) 1887 writer.write(c)
1829 chunk = q.read() 1888 chunk = q.read()
1830 self.assertTrue(type(chunk) is bytes, type(chunk)) 1889 self.assertTrue(type(chunk) is bytes, type(chunk))
1831 encodedresult += chunk 1890 encodedresult += chunk
1832 q = Queue(b"") 1891 q = Queue(b"")
1833 reader = codecs.getreader(encoding)(q) 1892 reader = codecs.getreader(encoding)(q)
1834 decodedresult = "" 1893 decodedresult = ""
1835 for c in encodedresult: 1894 for c in encodedresult:
1836 q.write(bytes([c])) 1895 q.write(bytes([c]))
1837 decodedresult += reader.read() 1896 decodedresult += reader.read()
1838 self.assertEqual(decodedresult, s, "encoding=%r" % encoding) 1897 self.assertEqual(decodedresult, s, "encoding=%r" % encoding)
1839 1898
1840 if encoding not in broken_incremental_coders: 1899 if encoding not in broken_unicode_with_stateful:
1841 # check incremental decoder/encoder and iterencode()/iterdecode( ) 1900 # check incremental decoder/encoder and iterencode()/iterdecode( )
1842 try: 1901 try:
1843 encoder = codecs.getincrementalencoder(encoding)() 1902 encoder = codecs.getincrementalencoder(encoding)()
1844 except LookupError: # no IncrementalEncoder 1903 except LookupError: # no IncrementalEncoder
1845 pass 1904 pass
1846 else: 1905 else:
1847 # check incremental decoder/encoder 1906 # check incremental decoder/encoder
1848 encodedresult = b"" 1907 encodedresult = b""
1849 for c in s: 1908 for c in s:
1850 encodedresult += encoder.encode(c) 1909 encodedresult += encoder.encode(c)
(...skipping 28 matching lines...) Expand all
1879 decodedresult = "".join(decoder.decode(bytes([c])) 1938 decodedresult = "".join(decoder.decode(bytes([c]))
1880 for c in encodedresult) 1939 for c in encodedresult)
1881 self.assertEqual(decodedresult, s, 1940 self.assertEqual(decodedresult, s,
1882 "encoding=%r" % encoding) 1941 "encoding=%r" % encoding)
1883 1942
1884 @support.cpython_only 1943 @support.cpython_only
1885 def test_basics_capi(self): 1944 def test_basics_capi(self):
1886 from _testcapi import codec_incrementalencoder, codec_incrementaldecoder 1945 from _testcapi import codec_incrementalencoder, codec_incrementaldecoder
1887 s = "abc123" # all codecs should be able to encode these 1946 s = "abc123" # all codecs should be able to encode these
1888 for encoding in all_unicode_encodings: 1947 for encoding in all_unicode_encodings:
1889 if encoding not in broken_incremental_coders: 1948 if encoding not in broken_unicode_with_stateful:
1890 # check incremental decoder/encoder (fetched via the C API) 1949 # check incremental decoder/encoder (fetched via the C API)
1891 try: 1950 try:
1892 cencoder = codec_incrementalencoder(encoding) 1951 cencoder = codec_incrementalencoder(encoding)
1893 except LookupError: # no IncrementalEncoder 1952 except LookupError: # no IncrementalEncoder
1894 pass 1953 pass
1895 else: 1954 else:
1896 # check C API 1955 # check C API
1897 encodedresult = b"" 1956 encodedresult = b""
1898 for c in s: 1957 for c in s:
1899 encodedresult += cencoder.encode(c) 1958 encodedresult += cencoder.encode(c)
(...skipping 19 matching lines...) Expand all
1919 for c in encodedresult) 1978 for c in encodedresult)
1920 self.assertEqual(decodedresult, s, 1979 self.assertEqual(decodedresult, s,
1921 "encoding=%r" % encoding) 1980 "encoding=%r" % encoding)
1922 1981
1923 def test_seek(self): 1982 def test_seek(self):
1924 # all codecs should be able to encode these 1983 # all codecs should be able to encode these
1925 s = "%s\n%s\n" % (100*"abc123", 100*"def456") 1984 s = "%s\n%s\n" % (100*"abc123", 100*"def456")
1926 for encoding in all_unicode_encodings: 1985 for encoding in all_unicode_encodings:
1927 if encoding == "idna": # FIXME: See SF bug #1163178 1986 if encoding == "idna": # FIXME: See SF bug #1163178
1928 continue 1987 continue
1929 if encoding in broken_unicode_with_streams: 1988 if encoding in broken_unicode_with_stateful:
1930 continue 1989 continue
1931 reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding))) 1990 reader = codecs.getreader(encoding)(io.BytesIO(s.encode(encoding)))
1932 for t in range(5): 1991 for t in range(5):
1933 # Test that calling seek resets the internal codec state and buf fers 1992 # Test that calling seek resets the internal codec state and buf fers
1934 reader.seek(0, 0) 1993 reader.seek(0, 0)
1935 data = reader.read() 1994 data = reader.read()
1936 self.assertEqual(s, data) 1995 self.assertEqual(s, data)
1937 1996
1938 def test_bad_decode_args(self): 1997 def test_bad_decode_args(self):
1939 for encoding in all_unicode_encodings: 1998 for encoding in all_unicode_encodings:
(...skipping 12 matching lines...) Expand all
1952 def test_encoding_map_type_initialized(self): 2011 def test_encoding_map_type_initialized(self):
1953 from encodings import cp1140 2012 from encodings import cp1140
1954 # This used to crash, we are only verifying there's no crash. 2013 # This used to crash, we are only verifying there's no crash.
1955 table_type = type(cp1140.encoding_table) 2014 table_type = type(cp1140.encoding_table)
1956 self.assertEqual(table_type, table_type) 2015 self.assertEqual(table_type, table_type)
1957 2016
1958 def test_decoder_state(self): 2017 def test_decoder_state(self):
1959 # Check that getstate() and setstate() handle the state properly 2018 # Check that getstate() and setstate() handle the state properly
1960 u = "abc123" 2019 u = "abc123"
1961 for encoding in all_unicode_encodings: 2020 for encoding in all_unicode_encodings:
1962 if encoding not in broken_incremental_coders: 2021 if encoding not in broken_unicode_with_stateful:
1963 self.check_state_handling_decode(encoding, u, u.encode(encoding) ) 2022 self.check_state_handling_decode(encoding, u, u.encode(encoding) )
1964 self.check_state_handling_encode(encoding, u, u.encode(encoding) ) 2023 self.check_state_handling_encode(encoding, u, u.encode(encoding) )
1965 2024
1966 class CharmapTest(unittest.TestCase): 2025 class CharmapTest(unittest.TestCase):
1967 def test_decode_with_string_map(self): 2026 def test_decode_with_string_map(self):
1968 self.assertEqual( 2027 self.assertEqual(
1969 codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"), 2028 codecs.charmap_decode(b"\x00\x01\x02", "strict", "abc"),
1970 ("abc", 3) 2029 ("abc", 3)
1971 ) 2030 )
1972 2031
(...skipping 11 matching lines...) Expand all
1984 ) 2043 )
1985 2044
1986 self.assertEqual( 2045 self.assertEqual(
1987 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"), 2046 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
1988 ("ab\ufffd", 3) 2047 ("ab\ufffd", 3)
1989 ) 2048 )
1990 2049
1991 self.assertEqual( 2050 self.assertEqual(
1992 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab\ufffe"), 2051 codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab\ufffe"),
1993 ("ab\ufffd", 3) 2052 ("ab\ufffd", 3)
2053 )
2054
2055 self.assertEqual(
2056 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab"),
2057 ("ab\\x02", 3)
2058 )
2059
2060 self.assertEqual(
2061 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace", "ab\ufffe "),
2062 ("ab\\x02", 3)
1994 ) 2063 )
1995 2064
1996 self.assertEqual( 2065 self.assertEqual(
1997 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab"), 2066 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab"),
1998 ("ab", 3) 2067 ("ab", 3)
1999 ) 2068 )
2000 2069
2001 self.assertEqual( 2070 self.assertEqual(
2002 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab\ufffe"), 2071 codecs.charmap_decode(b"\x00\x01\x02", "ignore", "ab\ufffe"),
2003 ("ab", 3) 2072 ("ab", 3)
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
2063 ) 2132 )
2064 2133
2065 # Issue #14850 2134 # Issue #14850
2066 self.assertEqual( 2135 self.assertEqual(
2067 codecs.charmap_decode(b"\x00\x01\x02", "replace", 2136 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2068 {0: 'a', 1: 'b', 2: '\ufffe'}), 2137 {0: 'a', 1: 'b', 2: '\ufffe'}),
2069 ("ab\ufffd", 3) 2138 ("ab\ufffd", 3)
2070 ) 2139 )
2071 2140
2072 self.assertEqual( 2141 self.assertEqual(
2142 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2143 {0: 'a', 1: 'b'}),
2144 ("ab\\x02", 3)
2145 )
2146
2147 self.assertEqual(
2148 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2149 {0: 'a', 1: 'b', 2: None}),
2150 ("ab\\x02", 3)
2151 )
2152
2153 # Issue #14850
2154 self.assertEqual(
2155 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2156 {0: 'a', 1: 'b', 2: '\ufffe'}),
2157 ("ab\\x02", 3)
2158 )
2159
2160 self.assertEqual(
2073 codecs.charmap_decode(b"\x00\x01\x02", "ignore", 2161 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2074 {0: 'a', 1: 'b'}), 2162 {0: 'a', 1: 'b'}),
2075 ("ab", 3) 2163 ("ab", 3)
2076 ) 2164 )
2077 2165
2078 self.assertEqual( 2166 self.assertEqual(
2079 codecs.charmap_decode(b"\x00\x01\x02", "ignore", 2167 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2080 {0: 'a', 1: 'b', 2: None}), 2168 {0: 'a', 1: 'b', 2: None}),
2081 ("ab", 3) 2169 ("ab", 3)
2082 ) 2170 )
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
2136 self.assertEqual( 2224 self.assertEqual(
2137 codecs.charmap_decode(b"\x00\x01\x02", "replace", 2225 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2138 {0: a, 1: b}), 2226 {0: a, 1: b}),
2139 ("ab\ufffd", 3) 2227 ("ab\ufffd", 3)
2140 ) 2228 )
2141 2229
2142 self.assertEqual( 2230 self.assertEqual(
2143 codecs.charmap_decode(b"\x00\x01\x02", "replace", 2231 codecs.charmap_decode(b"\x00\x01\x02", "replace",
2144 {0: a, 1: b, 2: 0xFFFE}), 2232 {0: a, 1: b, 2: 0xFFFE}),
2145 ("ab\ufffd", 3) 2233 ("ab\ufffd", 3)
2234 )
2235
2236 self.assertEqual(
2237 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2238 {0: a, 1: b}),
2239 ("ab\\x02", 3)
2240 )
2241
2242 self.assertEqual(
2243 codecs.charmap_decode(b"\x00\x01\x02", "backslashreplace",
2244 {0: a, 1: b, 2: 0xFFFE}),
2245 ("ab\\x02", 3)
2146 ) 2246 )
2147 2247
2148 self.assertEqual( 2248 self.assertEqual(
2149 codecs.charmap_decode(b"\x00\x01\x02", "ignore", 2249 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2150 {0: a, 1: b}), 2250 {0: a, 1: b}),
2151 ("ab", 3) 2251 ("ab", 3)
2152 ) 2252 )
2153 2253
2154 self.assertEqual( 2254 self.assertEqual(
2155 codecs.charmap_decode(b"\x00\x01\x02", "ignore", 2255 codecs.charmap_decode(b"\x00\x01\x02", "ignore",
2156 {0: a, 1: b, 2: 0xFFFE}), 2256 {0: a, 1: b, 2: 0xFFFE}),
2157 ("ab", 3) 2257 ("ab", 3)
2158 ) 2258 )
2159 2259
2160 2260
2161 class WithStmtTest(unittest.TestCase): 2261 class WithStmtTest(unittest.TestCase):
2162 def test_encodedfile(self): 2262 def test_encodedfile(self):
2163 f = io.BytesIO(b"\xc3\xbc") 2263 f = io.BytesIO(b"\xc3\xbc")
2164 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef: 2264 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
2165 self.assertEqual(ef.read(), b"\xfc") 2265 self.assertEqual(ef.read(), b"\xfc")
2266 self.assertTrue(f.closed)
2166 2267
2167 def test_streamreaderwriter(self): 2268 def test_streamreaderwriter(self):
2168 f = io.BytesIO(b"\xc3\xbc") 2269 f = io.BytesIO(b"\xc3\xbc")
2169 info = codecs.lookup("utf-8") 2270 info = codecs.lookup("utf-8")
2170 with codecs.StreamReaderWriter(f, info.streamreader, 2271 with codecs.StreamReaderWriter(f, info.streamreader,
2171 info.streamwriter, 'strict') as srw: 2272 info.streamwriter, 'strict') as srw:
2172 self.assertEqual(srw.read(), "\xfc") 2273 self.assertEqual(srw.read(), "\xfc")
2173 2274
2174 class TypesTest(unittest.TestCase): 2275 class TypesTest(unittest.TestCase):
2175 def test_decode_unicode(self): 2276 def test_decode_unicode(self):
(...skipping 20 matching lines...) Expand all
2196 def test_unicode_escape(self): 2297 def test_unicode_escape(self):
2197 # Escape-decoding an unicode string is supported ang gives the same 2298 # Escape-decoding an unicode string is supported ang gives the same
2198 # result as decoding the equivalent ASCII bytes string. 2299 # result as decoding the equivalent ASCII bytes string.
2199 self.assertEqual(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6)) 2300 self.assertEqual(codecs.unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2200 self.assertEqual(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6) ) 2301 self.assertEqual(codecs.unicode_escape_decode(br"\u1234"), ("\u1234", 6) )
2201 self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6)) 2302 self.assertEqual(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
2202 self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234" , 6)) 2303 self.assertEqual(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234" , 6))
2203 2304
2204 self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\ U00110000") 2305 self.assertRaises(UnicodeDecodeError, codecs.unicode_escape_decode, br"\ U00110000")
2205 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10)) 2306 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "replace"), ("\ufffd", 10))
2307 self.assertEqual(codecs.unicode_escape_decode(r"\U00110000", "backslashr eplace"),
2308 (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
2206 2309
2207 self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000") 2310 self.assertRaises(UnicodeDecodeError, codecs.raw_unicode_escape_decode, br"\U00110000")
2208 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replac e"), ("\ufffd", 10)) 2311 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "replac e"), ("\ufffd", 10))
2312 self.assertEqual(codecs.raw_unicode_escape_decode(r"\U00110000", "backsl ashreplace"),
2313 (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
2209 2314
2210 2315
2211 class UnicodeEscapeTest(unittest.TestCase): 2316 class UnicodeEscapeTest(unittest.TestCase):
2212 def test_empty(self): 2317 def test_empty(self):
2213 self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0)) 2318 self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
2214 self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0)) 2319 self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
2215 2320
2216 def test_raw_encode(self): 2321 def test_raw_encode(self):
2217 encode = codecs.unicode_escape_encode 2322 encode = codecs.unicode_escape_encode
2218 for b in range(32, 127): 2323 for b in range(32, 127):
(...skipping 618 matching lines...) Expand 10 before | Expand all | Expand 10 after
2837 ('[\udcff]', 'surrogateescape', b'[\xff]'), 2942 ('[\udcff]', 'surrogateescape', b'[\xff]'),
2838 ('[\udcff]', 'surrogatepass', None), 2943 ('[\udcff]', 'surrogatepass', None),
2839 )) 2944 ))
2840 self.check_decode(932, ( 2945 self.check_decode(932, (
2841 (b'abc', 'strict', 'abc'), 2946 (b'abc', 'strict', 'abc'),
2842 (b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'), 2947 (b'\x82\x84\xe9\x80', 'strict', '\uff44\u9a3e'),
2843 # invalid bytes 2948 # invalid bytes
2844 (b'[\xff]', 'strict', None), 2949 (b'[\xff]', 'strict', None),
2845 (b'[\xff]', 'ignore', '[]'), 2950 (b'[\xff]', 'ignore', '[]'),
2846 (b'[\xff]', 'replace', '[\ufffd]'), 2951 (b'[\xff]', 'replace', '[\ufffd]'),
2952 (b'[\xff]', 'backslashreplace', '[\\xff]'),
2847 (b'[\xff]', 'surrogateescape', '[\udcff]'), 2953 (b'[\xff]', 'surrogateescape', '[\udcff]'),
2848 (b'[\xff]', 'surrogatepass', None), 2954 (b'[\xff]', 'surrogatepass', None),
2849 (b'\x81\x00abc', 'strict', None), 2955 (b'\x81\x00abc', 'strict', None),
2850 (b'\x81\x00abc', 'ignore', '\x00abc'), 2956 (b'\x81\x00abc', 'ignore', '\x00abc'),
2851 (b'\x81\x00abc', 'replace', '\ufffd\x00abc'), 2957 (b'\x81\x00abc', 'replace', '\ufffd\x00abc'),
2958 (b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
2852 )) 2959 ))
2853 2960
2854 def test_cp1252(self): 2961 def test_cp1252(self):
2855 self.check_encode(1252, ( 2962 self.check_encode(1252, (
2856 ('abc', 'strict', b'abc'), 2963 ('abc', 'strict', b'abc'),
2857 ('\xe9\u20ac', 'strict', b'\xe9\x80'), 2964 ('\xe9\u20ac', 'strict', b'\xe9\x80'),
2858 ('\xff', 'strict', b'\xff'), 2965 ('\xff', 'strict', b'\xff'),
2859 # test error handlers 2966 # test error handlers
2860 ('\u0141', 'strict', None), 2967 ('\u0141', 'strict', None),
2861 ('\u0141', 'ignore', b''), 2968 ('\u0141', 'ignore', b''),
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
2919 self.assertEqual(decoded, ('\u9a3e\u9a3e', 4)) 3026 self.assertEqual(decoded, ('\u9a3e\u9a3e', 4))
2920 3027
2921 decoded = codecs.code_page_decode(932, 3028 decoded = codecs.code_page_decode(932,
2922 b'abc', 'strict', 3029 b'abc', 'strict',
2923 False) 3030 False)
2924 self.assertEqual(decoded, ('abc', 3)) 3031 self.assertEqual(decoded, ('abc', 3))
2925 3032
2926 3033
2927 if __name__ == "__main__": 3034 if __name__ == "__main__":
2928 unittest.main() 3035 unittest.main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+