Index: Misc/NEWS =================================================================== --- Misc/NEWS (revision 79088) +++ Misc/NEWS (working copy) @@ -285,6 +285,8 @@ Library ------- +- Issue #8024: Update the Unicode database to 5.2. + - Issue #8168: py_compile now handles files with utf-8 BOMS. - ``tokenize.detect_encoding`` now returns ``'utf-8-sig'`` when a UTF-8 BOM is Index: Tools/unicode/makeunicodedata.py =================================================================== --- Tools/unicode/makeunicodedata.py (revision 79088) +++ Tools/unicode/makeunicodedata.py (working copy) @@ -31,7 +31,7 @@ VERSION = "2.6" # The Unicode Database -UNIDATA_VERSION = "5.1.0" +UNIDATA_VERSION = "5.2.0" UNICODE_DATA = "UnicodeData%s.txt" COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" EASTASIAN_WIDTH = "EastAsianWidth%s.txt" Index: Lib/test/test_bigmem.py =================================================================== --- Lib/test/test_bigmem.py (revision 79087) +++ Lib/test/test_bigmem.py (working copy) @@ -618,7 +618,7 @@ @precisionbigmemtest(size=_4G // 5, memuse=character_size * (6 + 1)) def test_unicode_repr_overflow(self, size): try: - s = "\uAAAA"*size + s = "\uDCBA"*size r = repr(s) except MemoryError: pass # acceptable on 32-bit @@ -679,22 +679,24 @@ @bigmemtest(minsize=2**32 / 5, memuse=character_size * 7) def test_unicode_repr(self, size): - s = "\uAAAA" * size + # Use an assigned, but not printable code point. + # It is in the range of the low surrogates \uDC00-\uDFFF. + s = "\uDCBA" * size for f in (repr, ascii): r = f(s) self.assertTrue(len(r) > size) - self.assertTrue(r.endswith(r"\uaaaa'"), r[-10:]) + self.assertTrue(r.endswith(r"\udcba'"), r[-10:]) del r # The character takes 4 bytes even in UCS-2 builds because it will # be decomposed into surrogates. @bigmemtest(minsize=2**32 / 5, memuse=4 + character_size * 9) def test_unicode_repr_wide(self, size): - s = "\U0001AAAA" * size + s = "\U0001DCBA" * size for f in (repr, ascii): r = f(s) self.assertTrue(len(r) > size) - self.assertTrue(r.endswith(r"\U0001aaaa'"), r[-12:]) + self.assertTrue(r.endswith(r"\U0001dcba'"), r[-12:]) del r Index: Lib/test/test_unicodedata.py =================================================================== --- Lib/test/test_unicodedata.py (revision 79088) +++ Lib/test/test_unicodedata.py (working copy) @@ -21,7 +21,7 @@ class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = '0b915116051f3ed029a98542c2b7df63c9646272' + expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf' def test_method_checksum(self): h = hashlib.sha1() @@ -80,7 +80,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): # update this, if the database changes - expectedchecksum = 'd4169ccff998ebbd1ec007a0b3fbd66e5ccf0229' + expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6' def test_function_checksum(self): data = []