Index: Objects/unicodetype_db.h =================================================================== --- Objects/unicodetype_db.h (revision 71889) +++ Objects/unicodetype_db.h (working copy) @@ -127,7 +127,7 @@ {0, 0, 0, 0, 8, 1540}, {0, 0, 0, 0, 9, 1540}, {0, 0, 0, 0, 0, 1792}, - {42877, 0, 42877, 0, 0, 3849}, + {42877, 7545, 42877, 0, 0, 3849}, {3814, 0, 3814, 0, 0, 1801}, {65477, 0, 65477, 0, 0, 1801}, {0, 57921, 0, 0, 0, 1921}, @@ -174,7 +174,7 @@ {0, 54787, 0, 0, 0, 1921}, {0, 54753, 0, 0, 0, 1921}, {58272, 0, 58272, 0, 0, 1801}, - {0, 7545, 0, 0, 0, 3969}, + {42877, 7545, 42877, 0, 0, 3969}, {0, 40, 0, 0, 0, 1921}, {65496, 0, 65496, 0, 0, 1801}, }; Index: Tools/unicode/makeunicodedata.py =================================================================== --- Tools/unicode/makeunicodedata.py (revision 71889) +++ Tools/unicode/makeunicodedata.py (working copy) @@ -383,33 +383,32 @@ flags |= XID_CONTINUE_MASK # use delta predictor for upper/lower/title if it fits if record[12]: - upper = int(record[12], 16) - char - if -32768 <= upper <= 32767 and delta: - upper = upper & 0xffff - else: - upper += char - delta = False + upper = int(record[12], 16) else: - upper = 0 + upper = char if record[13]: - lower = int(record[13], 16) - char - if -32768 <= lower <= 32767 and delta: - lower = lower & 0xffff - else: - lower += char - delta = False + lower = int(record[13], 16) else: - lower = 0 + lower = char if record[14]: - title = int(record[14], 16) - char - if -32768 <= lower <= 32767 and delta: - title = title & 0xffff - else: - title += char - delta = False + title = int(record[14], 16) else: - title = 0 - if not delta: + # UCD.html says that a missing title char means that + # it defaults to the uppercase character, not to the + # character itself. Apparently, in the current UCD (5.x) + # this feature is never used + title = upper + upper_d = upper - char + lower_d = lower - char + title_d = title - char + if -32768 <= upper_d <= 32767 and \ + -32768 <= lower_d <= 32767 and \ + -32768 <= title_d <= 32767: + # use deltas + upper = upper_d & 0xffff + lower = lower_d & 0xffff + title = title_d & 0xffff + else: flags |= NODELTA_MASK # decimal digit, integer digit decimal = 0 Index: Lib/test/test_unicodedata.py =================================================================== --- Lib/test/test_unicodedata.py (revision 71889) +++ Lib/test/test_unicodedata.py (working copy) @@ -20,7 +20,7 @@ class UnicodeMethodsTest(unittest.TestCase): # update this, if the database changes - expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e' + expectedchecksum = 'b7db9b5f1d804976fa921d2009cbef6f025620c1' def test_method_checksum(self): h = hashlib.sha1() @@ -258,7 +258,19 @@ # the upper-case mapping: as delta, or as absolute value self.assert_("a".upper()=='A') self.assert_("\u1d79".upper()=='\ua77d') + self.assert_(".".upper()=='.') + def test_bug_5828(self): + self.assertEqual("\u1d79".lower(), "\u1d79") + # Only U+0000 should have U+0000 as its upper/lower/titlecase variant + self.assertEqual( + [ + c for c in range(sys.maxunicode+1) + if chr(c).lower() == "\x00" or chr(c).upper() == "\x00" or chr(c).title() == "\x00" + ], + [0] + ) + def test_main(): test.support.run_unittest( UnicodeMiscTest,