diff -r 8e538ed41766 Doc/library/stdtypes.rst --- a/Doc/library/stdtypes.rst Fri Jan 13 22:12:37 2012 +0100 +++ b/Doc/library/stdtypes.rst Fri Jan 13 19:46:01 2012 -0500 @@ -997,16 +997,24 @@ In addition, Python's strings support th functions based on regular expressions. .. method:: str.capitalize() Return a copy of the string with its first character capitalized and the rest lowercased. +.. method:: str.casefold() + + Return a casefolded copy of the string. Casefolded strings may be used for + caseless matching. For example, ``"MASSE".casefold() == "maße".casefold()``. + + .. versionadded:: 3.3 + + .. method:: str.center(width[, fillchar]) Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is a space). .. method:: str.count(sub[, start[, end]]) diff -r 8e538ed41766 Include/unicodeobject.h --- a/Include/unicodeobject.h Fri Jan 13 22:12:37 2012 +0100 +++ b/Include/unicodeobject.h Fri Jan 13 19:46:01 2012 -0500 @@ -2018,16 +2018,21 @@ PyAPI_FUNC(int) _PyUnicode_ToTitleFull( Py_UCS4 *res ); PyAPI_FUNC(int) _PyUnicode_ToUpperFull( Py_UCS4 ch, /* Unicode character */ Py_UCS4 *res ); +PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( + Py_UCS4 ch, /* Unicode character */ + Py_UCS4 *res + ); + PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsCased( Py_UCS4 ch /* Unicode character */ ); diff -r 8e538ed41766 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Fri Jan 13 22:12:37 2012 +0100 +++ b/Lib/test/test_unicode.py Fri Jan 13 19:46:01 2012 -0500 @@ -560,16 +560,24 @@ class UnicodeTest(string_tests.CommonTes self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2') self.assertEqual('A\u0345\u03a3a'.lower(), 'a\u0345\u03c3a') self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2') self.assertEqual('A\u03a3\u0345'.lower(), 'a\u03c2\u0345') self.assertEqual('\u03a3\u0345 '.lower(), '\u03c3\u0345 ') self.assertEqual('\U0008fffe'.lower(), '\U0008fffe') self.assertEqual('\u2177'.lower(), '\u2177') + def test_casefold(self): + self.assertEqual('hello'.casefold(), 'hello') + self.assertEqual('hELlo'.casefold(), 'hello') + self.assertEqual('ß'.casefold(), 'ss') + self.assertEqual('fi'.casefold(), 'fi') + self.assertEqual('\u03a3'.casefold(), '\u03c3') + self.assertEqual('A\u0345\u03a3'.casefold(), 'a\u03b9\u03c3') + def test_upper(self): string_tests.CommonTest.test_upper(self) self.assertEqual('\U0001044F'.upper(), '\U00010427') self.assertEqual('\U0001044F\U0001044F'.upper(), '\U00010427\U00010427') self.assertEqual('\U00010427\U0001044F'.upper(), '\U00010427\U00010427') self.assertEqual('X\U00010427x\U0001044F'.upper(), diff -r 8e538ed41766 Objects/unicodectype.c --- a/Objects/unicodectype.c Fri Jan 13 22:12:37 2012 +0100 +++ b/Objects/unicodectype.c Fri Jan 13 19:46:01 2012 -0500 @@ -180,80 +180,95 @@ int _PyUnicode_IsUppercase(Py_UCS4 ch) /* Returns the uppercase Unicode characters corresponding to ch or just ch if no uppercase mapping is known. */ Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->flags & EXTENDED_CASE_MASK) - return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFFFF]; + return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF]; return ctype->upper ? ctype->upper : ch; } /* Returns the lowercase Unicode characters corresponding to ch or just ch if no lowercase mapping is known. */ Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->flags & EXTENDED_CASE_MASK) - return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFFFF]; + return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF]; return ctype->lower ? ctype->lower : ch; } int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->flags & EXTENDED_CASE_MASK) { - int index = ctype->lower & 0xFFFFFF; + int index = ctype->lower & 0xFFFF; int n = ctype->lower >> 24; int i; for (i = 0; i < n; i++) res[i] = _PyUnicode_ExtendedCase[index + i]; return n; } res[0] = ctype->lower ? ctype->lower : ch; return 1; } int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->flags & EXTENDED_CASE_MASK) { - int index = ctype->title & 0xFFFFFF; + int index = ctype->title & 0xFFFF; int n = ctype->title >> 24; int i; for (i = 0; i < n; i++) res[i] = _PyUnicode_ExtendedCase[index + i]; return n; } res[0] = ctype->title ? ctype->title : ch; return 1; } int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); if (ctype->flags & EXTENDED_CASE_MASK) { - int index = ctype->upper & 0xFFFFFF; + int index = ctype->upper & 0xFFFF; int n = ctype->upper >> 24; int i; for (i = 0; i < n; i++) res[i] = _PyUnicode_ExtendedCase[index + i]; return n; } res[0] = ctype->upper ? ctype->upper : ch; return 1; } +int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res) +{ + const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); + + if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) { + int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24); + int n = (ctype->lower >> 20) & 7; + int i; + for (i = 0; i < n; i++) + res[i] = _PyUnicode_ExtendedCase[index + i]; + return n; + } + return _PyUnicode_ToLowerFull(ch, res); +} + int _PyUnicode_IsCased(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); return (ctype->flags & CASED_MASK) != 0; } int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch) diff -r 8e538ed41766 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Fri Jan 13 22:12:37 2012 +0100 +++ b/Objects/unicodeobject.c Fri Jan 13 19:46:01 2012 -0500 @@ -9572,16 +9572,34 @@ do_upper(int kind, void *data, Py_ssize_ static Py_ssize_t do_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { return do_upper_or_lower(kind, data, length, res, maxchar, 1); } static Py_ssize_t +do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) +{ + Py_ssize_t i, k = 0; + + for (i = 0; i < length; i++) { + Py_UCS4 c = PyUnicode_READ(kind, data, i); + Py_UCS4 mapped[3]; + int j, n_res = _PyUnicode_ToFoldedFull(c, mapped); + for (j = 0; j < n_res; j++) { + if (mapped[j] > *maxchar) + *maxchar = mapped[j]; + res[k++] = mapped[j]; + } + } + return k; +} + +static Py_ssize_t do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { Py_ssize_t i, k = 0; int previous_is_cased; previous_is_cased = 0; for (i = 0; i < length; i++) { const Py_UCS4 c = PyUnicode_READ(kind, data, i); @@ -10496,16 +10514,32 @@ unicode_capitalize(PyObject *self) { if (PyUnicode_READY(self) == -1) return NULL; if (PyUnicode_GET_LENGTH(self) == 0) return unicode_result_unchanged(self); return case_operation(self, do_capitalize); } +PyDoc_STRVAR(casefold__doc__, + "S.casefold() -> str\n\ +\n\ +Return a version of S suitable for caseless comparisons."); + +static PyObject * +unicode_casefold(PyObject *self) +{ + if (PyUnicode_READY(self) == -1) + return NULL; + if (PyUnicode_IS_ASCII(self)) + return ascii_upper_or_lower(self, 1); + return case_operation(self, do_casefold); +} + + /* Argument converter. Coerces to a single unicode character */ static int convert_uc(PyObject *obj, void *addr) { Py_UCS4 *fillcharloc = (Py_UCS4 *)addr; PyObject *uniobj; @@ -12993,16 +13027,17 @@ static PyMethodDef unicode_methods[] = { appear first, since lookup is done sequentially. */ {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, {"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__}, {"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__}, {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__}, {"join", (PyCFunction) unicode_join, METH_O, join__doc__}, {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS, capitalize__doc__}, + {"casefold", (PyCFunction) unicode_casefold, METH_NOARGS, casefold__doc__}, {"title", (PyCFunction) unicode_title, METH_NOARGS, title__doc__}, {"center", (PyCFunction) unicode_center, METH_VARARGS, center__doc__}, {"count", (PyCFunction) unicode_count, METH_VARARGS, count__doc__}, {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS, expandtabs__doc__}, {"find", (PyCFunction) unicode_find, METH_VARARGS, find__doc__}, {"partition", (PyCFunction) unicode_partition, METH_O, partition__doc__}, {"index", (PyCFunction) unicode_index, METH_VARARGS, index__doc__}, {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, diff -r 8e538ed41766 Objects/unicodetype_db.h --- a/Objects/unicodetype_db.h Fri Jan 13 22:12:37 2012 +0100 +++ b/Objects/unicodetype_db.h Fri Jan 13 19:46:01 2012 -0500 @@ -71,17 +71,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {87, 119, 87, 0, 0, 9993}, {88, 120, 88, 0, 0, 9993}, {89, 121, 89, 0, 0, 9993}, {90, 122, 90, 0, 0, 9993}, {0, 0, 0, 0, 0, 9993}, {0, 0, 0, 0, 0, 4096}, {0, 0, 0, 0, 2, 3076}, {0, 0, 0, 0, 3, 3076}, - {924, 181, 924, 0, 0, 9993}, + {16777218, 17825792, 16777218, 0, 0, 26377}, {0, 0, 0, 0, 0, 5632}, {0, 0, 0, 0, 1, 3076}, {0, 0, 0, 0, 0, 3072}, {192, 224, 192, 0, 0, 10113}, {193, 225, 193, 0, 0, 10113}, {194, 226, 194, 0, 0, 10113}, {195, 227, 195, 0, 0, 10113}, {196, 228, 196, 0, 0, 10113}, @@ -105,17 +105,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {214, 246, 214, 0, 0, 10113}, {216, 248, 216, 0, 0, 10113}, {217, 249, 217, 0, 0, 10113}, {218, 250, 218, 0, 0, 10113}, {219, 251, 219, 0, 0, 10113}, {220, 252, 220, 0, 0, 10113}, {221, 253, 221, 0, 0, 10113}, {222, 254, 222, 0, 0, 10113}, - {33554433, 16777216, 33554435, 0, 0, 26377}, + {33554438, 18874371, 33554440, 0, 0, 26377}, {192, 224, 192, 0, 0, 9993}, {193, 225, 193, 0, 0, 9993}, {194, 226, 194, 0, 0, 9993}, {195, 227, 195, 0, 0, 9993}, {196, 228, 196, 0, 0, 9993}, {197, 229, 197, 0, 0, 9993}, {198, 230, 198, 0, 0, 9993}, {199, 231, 199, 0, 0, 9993}, @@ -185,17 +185,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {296, 297, 296, 0, 0, 10113}, {296, 297, 296, 0, 0, 9993}, {298, 299, 298, 0, 0, 10113}, {298, 299, 298, 0, 0, 9993}, {300, 301, 300, 0, 0, 10113}, {300, 301, 300, 0, 0, 9993}, {302, 303, 302, 0, 0, 10113}, {302, 303, 302, 0, 0, 9993}, - {16777223, 33554437, 16777223, 0, 0, 26497}, + {16777228, 33554442, 16777228, 0, 0, 26497}, {73, 305, 73, 0, 0, 9993}, {306, 307, 306, 0, 0, 10113}, {306, 307, 306, 0, 0, 9993}, {308, 309, 308, 0, 0, 10113}, {308, 309, 308, 0, 0, 9993}, {310, 311, 310, 0, 0, 10113}, {310, 311, 310, 0, 0, 9993}, {313, 314, 313, 0, 0, 10113}, @@ -209,17 +209,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {321, 322, 321, 0, 0, 10113}, {321, 322, 321, 0, 0, 9993}, {323, 324, 323, 0, 0, 10113}, {323, 324, 323, 0, 0, 9993}, {325, 326, 325, 0, 0, 10113}, {325, 326, 325, 0, 0, 9993}, {327, 328, 327, 0, 0, 10113}, {327, 328, 327, 0, 0, 9993}, - {33554441, 16777224, 33554441, 0, 0, 26377}, + {33554448, 18874381, 33554448, 0, 0, 26377}, {330, 331, 330, 0, 0, 10113}, {330, 331, 330, 0, 0, 9993}, {332, 333, 332, 0, 0, 10113}, {332, 333, 332, 0, 0, 9993}, {334, 335, 334, 0, 0, 10113}, {334, 335, 334, 0, 0, 9993}, {336, 337, 336, 0, 0, 10113}, {336, 337, 336, 0, 0, 9993}, @@ -263,17 +263,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {374, 375, 374, 0, 0, 9993}, {376, 255, 376, 0, 0, 10113}, {377, 378, 377, 0, 0, 10113}, {377, 378, 377, 0, 0, 9993}, {379, 380, 379, 0, 0, 10113}, {379, 380, 379, 0, 0, 9993}, {381, 382, 381, 0, 0, 10113}, {381, 382, 381, 0, 0, 9993}, - {83, 383, 83, 0, 0, 9993}, + {16777236, 17825810, 16777236, 0, 0, 26377}, {579, 384, 579, 0, 0, 9993}, {385, 595, 385, 0, 0, 10113}, {386, 387, 386, 0, 0, 10113}, {386, 387, 386, 0, 0, 9993}, {388, 389, 388, 0, 0, 10113}, {388, 389, 388, 0, 0, 9993}, {390, 596, 390, 0, 0, 10113}, {391, 392, 391, 0, 0, 10113}, @@ -366,17 +366,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {488, 489, 488, 0, 0, 10113}, {488, 489, 488, 0, 0, 9993}, {490, 491, 490, 0, 0, 10113}, {490, 491, 490, 0, 0, 9993}, {492, 493, 492, 0, 0, 10113}, {492, 493, 492, 0, 0, 9993}, {494, 495, 494, 0, 0, 10113}, {494, 495, 494, 0, 0, 9993}, - {33554444, 16777227, 33554444, 0, 0, 26377}, + {33554456, 18874389, 33554456, 0, 0, 26377}, {497, 499, 498, 0, 0, 10113}, {497, 499, 498, 0, 0, 10049}, {497, 499, 498, 0, 0, 9993}, {500, 501, 500, 0, 0, 10113}, {500, 501, 500, 0, 0, 9993}, {502, 405, 502, 0, 0, 10113}, {503, 447, 503, 0, 0, 10113}, {504, 505, 504, 0, 0, 10113}, @@ -485,17 +485,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {430, 648, 430, 0, 0, 9993}, {580, 649, 580, 0, 0, 9993}, {433, 650, 433, 0, 0, 9993}, {434, 651, 434, 0, 0, 9993}, {581, 652, 581, 0, 0, 9993}, {439, 658, 439, 0, 0, 9993}, {0, 0, 0, 0, 0, 14089}, {0, 0, 0, 0, 0, 5889}, - {921, 837, 921, 0, 0, 13832}, + {16777244, 17825818, 16777244, 0, 0, 30216}, {880, 881, 880, 0, 0, 10113}, {880, 881, 880, 0, 0, 9993}, {882, 883, 882, 0, 0, 10113}, {882, 883, 882, 0, 0, 9993}, {886, 887, 886, 0, 0, 10113}, {886, 887, 886, 0, 0, 9993}, {0, 0, 0, 0, 0, 13321}, {1021, 891, 1021, 0, 0, 9993}, @@ -503,17 +503,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {1023, 893, 1023, 0, 0, 9993}, {902, 940, 902, 0, 0, 10113}, {904, 941, 904, 0, 0, 10113}, {905, 942, 905, 0, 0, 10113}, {906, 943, 906, 0, 0, 10113}, {908, 972, 908, 0, 0, 10113}, {910, 973, 910, 0, 0, 10113}, {911, 974, 911, 0, 0, 10113}, - {50331663, 16777230, 50331663, 0, 0, 26377}, + {50331681, 19922973, 50331681, 0, 0, 26377}, {913, 945, 913, 0, 0, 10113}, {914, 946, 914, 0, 0, 10113}, {915, 947, 915, 0, 0, 10113}, {916, 948, 916, 0, 0, 10113}, {917, 949, 917, 0, 0, 10113}, {918, 950, 918, 0, 0, 10113}, {919, 951, 919, 0, 0, 10113}, {920, 952, 920, 0, 0, 10113}, @@ -534,17 +534,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {936, 968, 936, 0, 0, 10113}, {937, 969, 937, 0, 0, 10113}, {938, 970, 938, 0, 0, 10113}, {939, 971, 939, 0, 0, 10113}, {902, 940, 902, 0, 0, 9993}, {904, 941, 904, 0, 0, 9993}, {905, 942, 905, 0, 0, 9993}, {906, 943, 906, 0, 0, 9993}, - {50331667, 16777234, 50331667, 0, 0, 26377}, + {50331688, 19922980, 50331688, 0, 0, 26377}, {913, 945, 913, 0, 0, 9993}, {914, 946, 914, 0, 0, 9993}, {915, 947, 915, 0, 0, 9993}, {916, 948, 916, 0, 0, 9993}, {917, 949, 917, 0, 0, 9993}, {918, 950, 918, 0, 0, 9993}, {919, 951, 919, 0, 0, 9993}, {920, 952, 920, 0, 0, 9993}, @@ -552,35 +552,35 @@ const _PyUnicode_TypeRecord _PyUnicode_T {922, 954, 922, 0, 0, 9993}, {923, 955, 923, 0, 0, 9993}, {924, 956, 924, 0, 0, 9993}, {925, 957, 925, 0, 0, 9993}, {926, 958, 926, 0, 0, 9993}, {927, 959, 927, 0, 0, 9993}, {928, 960, 928, 0, 0, 9993}, {929, 961, 929, 0, 0, 9993}, - {931, 962, 931, 0, 0, 9993}, + {16777261, 17825835, 16777261, 0, 0, 26377}, {931, 963, 931, 0, 0, 9993}, {932, 964, 932, 0, 0, 9993}, {933, 965, 933, 0, 0, 9993}, {934, 966, 934, 0, 0, 9993}, {935, 967, 935, 0, 0, 9993}, {936, 968, 936, 0, 0, 9993}, {937, 969, 937, 0, 0, 9993}, {938, 970, 938, 0, 0, 9993}, {939, 971, 939, 0, 0, 9993}, {908, 972, 908, 0, 0, 9993}, {910, 973, 910, 0, 0, 9993}, {911, 974, 911, 0, 0, 9993}, {975, 983, 975, 0, 0, 10113}, - {914, 976, 914, 0, 0, 9993}, - {920, 977, 920, 0, 0, 9993}, + {16777264, 17825838, 16777264, 0, 0, 26377}, + {16777267, 17825841, 16777267, 0, 0, 26377}, {0, 0, 0, 0, 0, 10113}, - {934, 981, 934, 0, 0, 9993}, - {928, 982, 928, 0, 0, 9993}, + {16777270, 17825844, 16777270, 0, 0, 26377}, + {16777273, 17825847, 16777273, 0, 0, 26377}, {975, 983, 975, 0, 0, 9993}, {984, 985, 984, 0, 0, 10113}, {984, 985, 984, 0, 0, 9993}, {986, 987, 986, 0, 0, 10113}, {986, 987, 986, 0, 0, 9993}, {988, 989, 988, 0, 0, 10113}, {988, 989, 988, 0, 0, 9993}, {990, 991, 990, 0, 0, 10113}, @@ -596,21 +596,21 @@ const _PyUnicode_TypeRecord _PyUnicode_T {1000, 1001, 1000, 0, 0, 10113}, {1000, 1001, 1000, 0, 0, 9993}, {1002, 1003, 1002, 0, 0, 10113}, {1002, 1003, 1002, 0, 0, 9993}, {1004, 1005, 1004, 0, 0, 10113}, {1004, 1005, 1004, 0, 0, 9993}, {1006, 1007, 1006, 0, 0, 10113}, {1006, 1007, 1006, 0, 0, 9993}, - {922, 1008, 922, 0, 0, 9993}, - {929, 1009, 929, 0, 0, 9993}, + {16777276, 17825850, 16777276, 0, 0, 26377}, + {16777279, 17825853, 16777279, 0, 0, 26377}, {1017, 1010, 1017, 0, 0, 9993}, {1012, 952, 1012, 0, 0, 10113}, - {917, 1013, 917, 0, 0, 9993}, + {16777282, 17825856, 16777282, 0, 0, 26377}, {1015, 1016, 1015, 0, 0, 10113}, {1015, 1016, 1015, 0, 0, 9993}, {1017, 1010, 1017, 0, 0, 10113}, {1018, 1019, 1018, 0, 0, 10113}, {1018, 1019, 1018, 0, 0, 9993}, {1021, 891, 1021, 0, 0, 10113}, {1022, 892, 1022, 0, 0, 10113}, {1023, 893, 1023, 0, 0, 10113}, @@ -973,17 +973,17 @@ const _PyUnicode_TypeRecord _PyUnicode_T {1359, 1407, 1359, 0, 0, 9993}, {1360, 1408, 1360, 0, 0, 9993}, {1361, 1409, 1361, 0, 0, 9993}, {1362, 1410, 1362, 0, 0, 9993}, {1363, 1411, 1363, 0, 0, 9993}, {1364, 1412, 1364, 0, 0, 9993}, {1365, 1413, 1365, 0, 0, 9993}, {1366, 1414, 1366, 0, 0, 9993}, - {33554455, 16777238, 33554457, 0, 0, 26377}, + {33554502, 18874435, 33554504, 0, 0, 26377}, {0, 0, 0, 0, 0, 1537}, {4256, 11520, 4256, 0, 0, 10113}, {4257, 11521, 4257, 0, 0, 10113}, {4258, 11522, 4258, 0, 0, 10113}, {4259, 11523, 4259, 0, 0, 10113}, {4260, 11524, 4260, 0, 0, 10113}, {4261, 11525, 4261, 0, 0, 10113}, {4262, 11526, 4262, 0, 0, 10113}, @@ -1175,23 +1175,23 @@ const _PyUnicode_TypeRecord _PyUnicode_T {7822, 7823, 7822, 0, 0, 10113}, {7822, 7823, 7822, 0, 0, 9993}, {7824, 7825, 7824, 0, 0, 10113}, {7824, 7825, 7824, 0, 0, 9993}, {7826, 7827, 7826, 0, 0, 10113}, {7826, 7827, 7826, 0, 0, 9993}, {7828, 7829, 7828, 0, 0, 10113}, {7828, 7829, 7828, 0, 0, 9993}, - {33554460, 16777243, 33554460, 0, 0, 26377}, - {33554463, 16777246, 33554463, 0, 0, 26377}, - {33554466, 16777249, 33554466, 0, 0, 26377}, - {33554469, 16777252, 33554469, 0, 0, 26377}, - {33554472, 16777255, 33554472, 0, 0, 26377}, - {7776, 7835, 7776, 0, 0, 9993}, - {7838, 223, 7838, 0, 0, 10113}, + {33554509, 18874442, 33554509, 0, 0, 26377}, + {33554514, 18874447, 33554514, 0, 0, 26377}, + {33554519, 18874452, 33554519, 0, 0, 26377}, + {33554524, 18874457, 33554524, 0, 0, 26377}, + {33554529, 18874462, 33554529, 0, 0, 26377}, + {16777317, 17825891, 16777317, 0, 0, 26377}, + {16777321, 18874470, 16777321, 0, 0, 26497}, {7840, 7841, 7840, 0, 0, 10113}, {7840, 7841, 7840, 0, 0, 9993}, {7842, 7843, 7842, 0, 0, 10113}, {7842, 7843, 7842, 0, 0, 9993}, {7844, 7845, 7844, 0, 0, 10113}, {7844, 7845, 7844, 0, 0, 9993}, {7846, 7847, 7846, 0, 0, 10113}, {7846, 7847, 7846, 0, 0, 9993}, @@ -1350,23 +1350,23 @@ const _PyUnicode_TypeRecord _PyUnicode_T {8012, 8004, 8012, 0, 0, 9993}, {8013, 8005, 8013, 0, 0, 9993}, {8008, 8000, 8008, 0, 0, 10113}, {8009, 8001, 8009, 0, 0, 10113}, {8010, 8002, 8010, 0, 0, 10113}, {8011, 8003, 8011, 0, 0, 10113}, {8012, 8004, 8012, 0, 0, 10113}, {8013, 8005, 8013, 0, 0, 10113}, - {33554475, 16777258, 33554475, 0, 0, 26377}, + {33554541, 18874474, 33554541, 0, 0, 26377}, {8025, 8017, 8025, 0, 0, 9993}, - {50331694, 16777261, 50331694, 0, 0, 26377}, + {50331763, 19923055, 50331763, 0, 0, 26377}, {8027, 8019, 8027, 0, 0, 9993}, - {50331698, 16777265, 50331698, 0, 0, 26377}, + {50331770, 19923062, 50331770, 0, 0, 26377}, {8029, 8021, 8029, 0, 0, 9993}, - {50331702, 16777269, 50331702, 0, 0, 26377}, + {50331777, 19923069, 50331777, 0, 0, 26377}, {8031, 8023, 8031, 0, 0, 9993}, {8025, 8017, 8025, 0, 0, 10113}, {8027, 8019, 8027, 0, 0, 10113}, {8029, 8021, 8029, 0, 0, 10113}, {8031, 8023, 8031, 0, 0, 10113}, {8040, 8032, 8040, 0, 0, 9993}, {8041, 8033, 8041, 0, 0, 9993}, {8042, 8034, 8042, 0, 0, 9993}, @@ -1392,120 +1392,120 @@ const _PyUnicode_TypeRecord _PyUnicode_T {8154, 8054, 8154, 0, 0, 9993}, {8155, 8055, 8155, 0, 0, 9993}, {8184, 8056, 8184, 0, 0, 9993}, {8185, 8057, 8185, 0, 0, 9993}, {8170, 8058, 8170, 0, 0, 9993}, {8171, 8059, 8171, 0, 0, 9993}, {8186, 8060, 8186, 0, 0, 9993}, {8187, 8061, 8187, 0, 0, 9993}, - {33554490, 16777273, 16777276, 0, 0, 26377}, - {33554494, 16777277, 16777280, 0, 0, 26377}, - {33554498, 16777281, 16777284, 0, 0, 26377}, - {33554502, 16777285, 16777288, 0, 0, 26377}, - {33554506, 16777289, 16777292, 0, 0, 26377}, - {33554510, 16777293, 16777296, 0, 0, 26377}, - {33554514, 16777297, 16777300, 0, 0, 26377}, - {33554518, 16777301, 16777304, 0, 0, 26377}, - {33554522, 16777305, 16777308, 0, 0, 26433}, - {33554526, 16777309, 16777312, 0, 0, 26433}, - {33554530, 16777313, 16777316, 0, 0, 26433}, - {33554534, 16777317, 16777320, 0, 0, 26433}, - {33554538, 16777321, 16777324, 0, 0, 26433}, - {33554542, 16777325, 16777328, 0, 0, 26433}, - {33554546, 16777329, 16777332, 0, 0, 26433}, - {33554550, 16777333, 16777336, 0, 0, 26433}, - {33554554, 16777337, 16777340, 0, 0, 26377}, - {33554558, 16777341, 16777344, 0, 0, 26377}, - {33554562, 16777345, 16777348, 0, 0, 26377}, - {33554566, 16777349, 16777352, 0, 0, 26377}, - {33554570, 16777353, 16777356, 0, 0, 26377}, - {33554574, 16777357, 16777360, 0, 0, 26377}, - {33554578, 16777361, 16777364, 0, 0, 26377}, - {33554582, 16777365, 16777368, 0, 0, 26377}, - {33554586, 16777369, 16777372, 0, 0, 26433}, - {33554590, 16777373, 16777376, 0, 0, 26433}, - {33554594, 16777377, 16777380, 0, 0, 26433}, - {33554598, 16777381, 16777384, 0, 0, 26433}, - {33554602, 16777385, 16777388, 0, 0, 26433}, - {33554606, 16777389, 16777392, 0, 0, 26433}, - {33554610, 16777393, 16777396, 0, 0, 26433}, - {33554614, 16777397, 16777400, 0, 0, 26433}, - {33554618, 16777401, 16777404, 0, 0, 26377}, - {33554622, 16777405, 16777408, 0, 0, 26377}, - {33554626, 16777409, 16777412, 0, 0, 26377}, - {33554630, 16777413, 16777416, 0, 0, 26377}, - {33554634, 16777417, 16777420, 0, 0, 26377}, - {33554638, 16777421, 16777424, 0, 0, 26377}, - {33554642, 16777425, 16777428, 0, 0, 26377}, - {33554646, 16777429, 16777432, 0, 0, 26377}, - {33554650, 16777433, 16777436, 0, 0, 26433}, - {33554654, 16777437, 16777440, 0, 0, 26433}, - {33554658, 16777441, 16777444, 0, 0, 26433}, - {33554662, 16777445, 16777448, 0, 0, 26433}, - {33554666, 16777449, 16777452, 0, 0, 26433}, - {33554670, 16777453, 16777456, 0, 0, 26433}, - {33554674, 16777457, 16777460, 0, 0, 26433}, - {33554678, 16777461, 16777464, 0, 0, 26433}, + {33554567, 18874500, 16777353, 0, 0, 26377}, + {33554573, 18874506, 16777359, 0, 0, 26377}, + {33554579, 18874512, 16777365, 0, 0, 26377}, + {33554585, 18874518, 16777371, 0, 0, 26377}, + {33554591, 18874524, 16777377, 0, 0, 26377}, + {33554597, 18874530, 16777383, 0, 0, 26377}, + {33554603, 18874536, 16777389, 0, 0, 26377}, + {33554609, 18874542, 16777395, 0, 0, 26377}, + {33554615, 18874548, 16777401, 0, 0, 26433}, + {33554621, 18874554, 16777407, 0, 0, 26433}, + {33554627, 18874560, 16777413, 0, 0, 26433}, + {33554633, 18874566, 16777419, 0, 0, 26433}, + {33554639, 18874572, 16777425, 0, 0, 26433}, + {33554645, 18874578, 16777431, 0, 0, 26433}, + {33554651, 18874584, 16777437, 0, 0, 26433}, + {33554657, 18874590, 16777443, 0, 0, 26433}, + {33554663, 18874596, 16777449, 0, 0, 26377}, + {33554669, 18874602, 16777455, 0, 0, 26377}, + {33554675, 18874608, 16777461, 0, 0, 26377}, + {33554681, 18874614, 16777467, 0, 0, 26377}, + {33554687, 18874620, 16777473, 0, 0, 26377}, + {33554693, 18874626, 16777479, 0, 0, 26377}, + {33554699, 18874632, 16777485, 0, 0, 26377}, + {33554705, 18874638, 16777491, 0, 0, 26377}, + {33554711, 18874644, 16777497, 0, 0, 26433}, + {33554717, 18874650, 16777503, 0, 0, 26433}, + {33554723, 18874656, 16777509, 0, 0, 26433}, + {33554729, 18874662, 16777515, 0, 0, 26433}, + {33554735, 18874668, 16777521, 0, 0, 26433}, + {33554741, 18874674, 16777527, 0, 0, 26433}, + {33554747, 18874680, 16777533, 0, 0, 26433}, + {33554753, 18874686, 16777539, 0, 0, 26433}, + {33554759, 18874692, 16777545, 0, 0, 26377}, + {33554765, 18874698, 16777551, 0, 0, 26377}, + {33554771, 18874704, 16777557, 0, 0, 26377}, + {33554777, 18874710, 16777563, 0, 0, 26377}, + {33554783, 18874716, 16777569, 0, 0, 26377}, + {33554789, 18874722, 16777575, 0, 0, 26377}, + {33554795, 18874728, 16777581, 0, 0, 26377}, + {33554801, 18874734, 16777587, 0, 0, 26377}, + {33554807, 18874740, 16777593, 0, 0, 26433}, + {33554813, 18874746, 16777599, 0, 0, 26433}, + {33554819, 18874752, 16777605, 0, 0, 26433}, + {33554825, 18874758, 16777611, 0, 0, 26433}, + {33554831, 18874764, 16777617, 0, 0, 26433}, + {33554837, 18874770, 16777623, 0, 0, 26433}, + {33554843, 18874776, 16777629, 0, 0, 26433}, + {33554849, 18874782, 16777635, 0, 0, 26433}, {8120, 8112, 8120, 0, 0, 9993}, {8121, 8113, 8121, 0, 0, 9993}, - {33554682, 16777465, 33554684, 0, 0, 26377}, - {33554687, 16777470, 16777473, 0, 0, 26377}, - {33554691, 16777474, 33554693, 0, 0, 26377}, - {33554696, 16777479, 33554696, 0, 0, 26377}, - {50331915, 16777482, 50331918, 0, 0, 26377}, + {33554855, 18874788, 33554857, 0, 0, 26377}, + {33554862, 18874795, 16777648, 0, 0, 26377}, + {33554868, 18874801, 33554870, 0, 0, 26377}, + {33554875, 18874808, 33554875, 0, 0, 26377}, + {50332097, 19923389, 50332100, 0, 0, 26377}, {8120, 8112, 8120, 0, 0, 10113}, {8121, 8113, 8121, 0, 0, 10113}, {8122, 8048, 8122, 0, 0, 10113}, {8123, 8049, 8123, 0, 0, 10113}, - {33554706, 16777489, 16777492, 0, 0, 26433}, - {921, 8126, 921, 0, 0, 9993}, - {33554710, 16777493, 33554712, 0, 0, 26377}, - {33554715, 16777498, 16777501, 0, 0, 26377}, - {33554719, 16777502, 33554721, 0, 0, 26377}, - {33554724, 16777507, 33554724, 0, 0, 26377}, - {50331943, 16777510, 50331946, 0, 0, 26377}, + {33554890, 18874823, 16777676, 0, 0, 26433}, + {16777679, 17826253, 16777679, 0, 0, 26377}, + {33554899, 18874832, 33554901, 0, 0, 26377}, + {33554906, 18874839, 16777692, 0, 0, 26377}, + {33554912, 18874845, 33554914, 0, 0, 26377}, + {33554919, 18874852, 33554919, 0, 0, 26377}, + {50332141, 19923433, 50332144, 0, 0, 26377}, {8136, 8050, 8136, 0, 0, 10113}, {8137, 8051, 8137, 0, 0, 10113}, {8138, 8052, 8138, 0, 0, 10113}, {8139, 8053, 8139, 0, 0, 10113}, - {33554734, 16777517, 16777520, 0, 0, 26433}, + {33554934, 18874867, 16777720, 0, 0, 26433}, {8152, 8144, 8152, 0, 0, 9993}, {8153, 8145, 8153, 0, 0, 9993}, - {50331954, 16777521, 50331954, 0, 0, 26377}, - {50331958, 16777525, 50331958, 0, 0, 26377}, - {33554746, 16777529, 33554746, 0, 0, 26377}, - {50331965, 16777532, 50331965, 0, 0, 26377}, + {50332157, 19923449, 50332157, 0, 0, 26377}, + {50332164, 19923456, 50332164, 0, 0, 26377}, + {33554954, 18874887, 33554954, 0, 0, 26377}, + {50332176, 19923468, 50332176, 0, 0, 26377}, {8152, 8144, 8152, 0, 0, 10113}, {8153, 8145, 8153, 0, 0, 10113}, {8154, 8054, 8154, 0, 0, 10113}, {8155, 8055, 8155, 0, 0, 10113}, {8168, 8160, 8168, 0, 0, 9993}, {8169, 8161, 8169, 0, 0, 9993}, - {50331969, 16777536, 50331969, 0, 0, 26377}, - {50331973, 16777540, 50331973, 0, 0, 26377}, - {33554761, 16777544, 33554761, 0, 0, 26377}, + {50332183, 19923475, 50332183, 0, 0, 26377}, + {50332190, 19923482, 50332190, 0, 0, 26377}, + {33554980, 18874913, 33554980, 0, 0, 26377}, {8172, 8165, 8172, 0, 0, 9993}, - {33554764, 16777547, 33554764, 0, 0, 26377}, - {50331983, 16777550, 50331983, 0, 0, 26377}, + {33554985, 18874918, 33554985, 0, 0, 26377}, + {50332207, 19923499, 50332207, 0, 0, 26377}, {8168, 8160, 8168, 0, 0, 10113}, {8169, 8161, 8169, 0, 0, 10113}, {8170, 8058, 8170, 0, 0, 10113}, {8171, 8059, 8171, 0, 0, 10113}, {8172, 8165, 8172, 0, 0, 10113}, - {33554771, 16777554, 33554773, 0, 0, 26377}, - {33554776, 16777559, 16777562, 0, 0, 26377}, - {33554780, 16777563, 33554782, 0, 0, 26377}, - {33554785, 16777568, 33554785, 0, 0, 26377}, - {50332004, 16777571, 50332007, 0, 0, 26377}, + {33554997, 18874930, 33554999, 0, 0, 26377}, + {33555004, 18874937, 16777790, 0, 0, 26377}, + {33555010, 18874943, 33555012, 0, 0, 26377}, + {33555017, 18874950, 33555017, 0, 0, 26377}, + {50332239, 19923531, 50332242, 0, 0, 26377}, {8184, 8056, 8184, 0, 0, 10113}, {8185, 8057, 8185, 0, 0, 10113}, {8186, 8060, 8186, 0, 0, 10113}, {8187, 8061, 8187, 0, 0, 10113}, - {33554795, 16777578, 16777581, 0, 0, 26433}, + {33555032, 18874965, 16777818, 0, 0, 26433}, {0, 0, 0, 0, 0, 3076}, {0, 0, 0, 0, 4, 3076}, {0, 0, 0, 0, 5, 3076}, {0, 0, 0, 0, 6, 3076}, {0, 0, 0, 0, 7, 3076}, {0, 0, 0, 0, 8, 3076}, {0, 0, 0, 0, 9, 3076}, {0, 0, 0, 0, 0, 1792}, @@ -2032,28 +2032,28 @@ const _PyUnicode_TypeRecord _PyUnicode_T {42914, 42915, 42914, 0, 0, 10113}, {42914, 42915, 42914, 0, 0, 9993}, {42916, 42917, 42916, 0, 0, 10113}, {42916, 42917, 42916, 0, 0, 9993}, {42918, 42919, 42918, 0, 0, 10113}, {42918, 42919, 42918, 0, 0, 9993}, {42920, 42921, 42920, 0, 0, 10113}, {42920, 42921, 42920, 0, 0, 9993}, - {33554799, 16777582, 33554801, 0, 0, 26377}, - {33554804, 16777587, 33554806, 0, 0, 26377}, - {33554809, 16777592, 33554811, 0, 0, 26377}, - {50332030, 16777597, 50332033, 0, 0, 26377}, - {50332037, 16777604, 50332040, 0, 0, 26377}, - {33554828, 16777611, 33554830, 0, 0, 26377}, - {33554833, 16777616, 33554835, 0, 0, 26377}, - {33554838, 16777621, 33554840, 0, 0, 26377}, - {33554843, 16777626, 33554845, 0, 0, 26377}, - {33554848, 16777631, 33554850, 0, 0, 26377}, - {33554853, 16777636, 33554855, 0, 0, 26377}, - {33554858, 16777641, 33554860, 0, 0, 26377}, + {33555038, 18874971, 33555040, 0, 0, 26377}, + {33555045, 18874978, 33555047, 0, 0, 26377}, + {33555052, 18874985, 33555054, 0, 0, 26377}, + {50332276, 19923568, 50332279, 0, 0, 26377}, + {50332286, 19923578, 50332289, 0, 0, 26377}, + {33555079, 18875012, 33555081, 0, 0, 26377}, + {33555086, 18875019, 33555088, 0, 0, 26377}, + {33555093, 18875026, 33555095, 0, 0, 26377}, + {33555100, 18875033, 33555102, 0, 0, 26377}, + {33555107, 18875040, 33555109, 0, 0, 26377}, + {33555114, 18875047, 33555116, 0, 0, 26377}, + {33555121, 18875054, 33555123, 0, 0, 26377}, {0, 0, 0, 0, 0, 1025}, {65313, 65345, 65313, 0, 0, 10113}, {65314, 65346, 65314, 0, 0, 10113}, {65315, 65347, 65315, 0, 0, 10113}, {65316, 65348, 65316, 0, 0, 10113}, {65317, 65349, 65317, 0, 0, 10113}, {65318, 65350, 65318, 0, 0, 10113}, {65319, 65351, 65319, 0, 0, 10113}, @@ -2183,442 +2183,705 @@ const _PyUnicode_TypeRecord _PyUnicode_T {66597, 66637, 66597, 0, 0, 9993}, {66598, 66638, 66598, 0, 0, 9993}, {66599, 66639, 66599, 0, 0, 9993}, }; /* extended case mappings */ const Py_UCS4 _PyUnicode_ExtendedCase[] = { + 181, + 956, + 924, 223, + 115, + 115, 83, 83, 83, 115, 105, 775, 304, 329, 700, + 110, + 700, 78, + 383, + 115, + 83, 496, + 106, + 780, 74, 780, + 837, + 953, + 921, 912, + 953, + 776, + 769, 921, 776, 769, 944, + 965, + 776, + 769, 933, 776, 769, + 962, + 963, + 931, + 976, + 946, + 914, + 977, + 952, + 920, + 981, + 966, + 934, + 982, + 960, + 928, + 1008, + 954, + 922, + 1009, + 961, + 929, + 1013, + 949, + 917, 1415, + 1381, + 1410, 1333, 1362, 1333, 1410, 7830, + 104, + 817, 72, 817, 7831, + 116, + 776, 84, 776, 7832, + 119, + 778, 87, 778, 7833, + 121, + 778, 89, 778, 7834, + 97, + 702, 65, 702, + 7835, + 7777, + 7776, + 223, + 115, + 115, + 7838, 8016, + 965, + 787, 933, 787, 8018, + 965, + 787, + 768, 933, 787, 768, 8020, + 965, + 787, + 769, 933, 787, 769, 8022, + 965, + 787, + 834, 933, 787, 834, 8064, + 7936, + 953, 7944, 921, 8072, 8065, + 7937, + 953, 7945, 921, 8073, 8066, + 7938, + 953, 7946, 921, 8074, 8067, + 7939, + 953, 7947, 921, 8075, 8068, + 7940, + 953, 7948, 921, 8076, 8069, + 7941, + 953, 7949, 921, 8077, 8070, + 7942, + 953, 7950, 921, 8078, 8071, + 7943, + 953, 7951, 921, 8079, 8064, + 7936, + 953, 7944, 921, 8072, 8065, + 7937, + 953, 7945, 921, 8073, 8066, + 7938, + 953, 7946, 921, 8074, 8067, + 7939, + 953, 7947, 921, 8075, 8068, + 7940, + 953, 7948, 921, 8076, 8069, + 7941, + 953, 7949, 921, 8077, 8070, + 7942, + 953, 7950, 921, 8078, 8071, + 7943, + 953, 7951, 921, 8079, 8080, + 7968, + 953, 7976, 921, 8088, 8081, + 7969, + 953, 7977, 921, 8089, 8082, + 7970, + 953, 7978, 921, 8090, 8083, + 7971, + 953, 7979, 921, 8091, 8084, + 7972, + 953, 7980, 921, 8092, 8085, + 7973, + 953, 7981, 921, 8093, 8086, + 7974, + 953, 7982, 921, 8094, 8087, + 7975, + 953, 7983, 921, 8095, 8080, + 7968, + 953, 7976, 921, 8088, 8081, + 7969, + 953, 7977, 921, 8089, 8082, + 7970, + 953, 7978, 921, 8090, 8083, + 7971, + 953, 7979, 921, 8091, 8084, + 7972, + 953, 7980, 921, 8092, 8085, + 7973, + 953, 7981, 921, 8093, 8086, + 7974, + 953, 7982, 921, 8094, 8087, + 7975, + 953, 7983, 921, 8095, 8096, + 8032, + 953, 8040, 921, 8104, 8097, + 8033, + 953, 8041, 921, 8105, 8098, + 8034, + 953, 8042, 921, 8106, 8099, + 8035, + 953, 8043, 921, 8107, 8100, + 8036, + 953, 8044, 921, 8108, 8101, + 8037, + 953, 8045, 921, 8109, 8102, + 8038, + 953, 8046, 921, 8110, 8103, + 8039, + 953, 8047, 921, 8111, 8096, + 8032, + 953, 8040, 921, 8104, 8097, + 8033, + 953, 8041, 921, 8105, 8098, + 8034, + 953, 8042, 921, 8106, 8099, + 8035, + 953, 8043, 921, 8107, 8100, + 8036, + 953, 8044, 921, 8108, 8101, + 8037, + 953, 8045, 921, 8109, 8102, + 8038, + 953, 8046, 921, 8110, 8103, + 8039, + 953, 8047, 921, 8111, 8114, + 8048, + 953, 8122, 921, 8122, 837, 8115, + 945, + 953, 913, 921, 8124, 8116, + 940, + 953, 902, 921, 902, 837, 8118, + 945, + 834, 913, 834, 8119, + 945, + 834, + 953, 913, 834, 921, 913, 834, 837, 8115, + 945, + 953, 913, 921, 8124, + 8126, + 953, + 921, 8130, + 8052, + 953, 8138, 921, 8138, 837, 8131, + 951, + 953, 919, 921, 8140, 8132, + 942, + 953, 905, 921, 905, 837, 8134, + 951, + 834, 919, 834, 8135, + 951, + 834, + 953, 919, 834, 921, 919, 834, 837, 8131, + 951, + 953, 919, 921, 8140, 8146, + 953, + 776, + 768, 921, 776, 768, 8147, + 953, + 776, + 769, 921, 776, 769, 8150, + 953, + 834, 921, 834, 8151, + 953, + 776, + 834, 921, 776, 834, 8162, + 965, + 776, + 768, 933, 776, 768, 8163, + 965, + 776, + 769, 933, 776, 769, 8164, + 961, + 787, 929, 787, 8166, + 965, + 834, 933, 834, 8167, + 965, + 776, + 834, 933, 776, 834, 8178, + 8060, + 953, 8186, 921, 8186, 837, 8179, + 969, + 953, 937, 921, 8188, 8180, + 974, + 953, 911, 921, 911, 837, 8182, + 969, + 834, 937, 834, 8183, + 969, + 834, + 953, 937, 834, 921, 937, 834, 837, 8179, + 969, + 953, 937, 921, 8188, 64256, + 102, + 102, 70, 70, 70, 102, 64257, + 102, + 105, 70, 73, 70, 105, 64258, + 102, + 108, 70, 76, 70, 108, 64259, + 102, + 102, + 105, 70, 70, 73, 70, 102, 105, 64260, + 102, + 102, + 108, 70, 70, 76, 70, 102, 108, 64261, + 115, + 116, 83, 84, 83, 116, 64262, + 115, + 116, 83, 84, 83, 116, 64275, + 1396, + 1398, 1348, 1350, 1348, 1398, 64276, + 1396, + 1381, 1348, 1333, 1348, 1381, 64277, + 1396, + 1387, 1348, 1339, 1348, 1387, 64278, + 1406, + 1398, 1358, 1350, 1358, 1398, 64279, + 1396, + 1389, 1348, 1341, 1348, 1389, }; /* type indexes */ #define SHIFT 7 diff -r 8e538ed41766 Tools/unicode/makeunicodedata.py --- a/Tools/unicode/makeunicodedata.py Fri Jan 13 22:12:37 2012 +0100 +++ b/Tools/unicode/makeunicodedata.py Fri Jan 13 19:46:01 2012 -0500 @@ -44,16 +44,17 @@ COMPOSITION_EXCLUSIONS = "CompositionExc EASTASIAN_WIDTH = "EastAsianWidth%s.txt" UNIHAN = "Unihan%s.zip" DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt" DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt" LINE_BREAK = "LineBreak%s.txt" NAME_ALIASES = "NameAliases%s.txt" NAMED_SEQUENCES = "NamedSequences%s.txt" SPECIAL_CASING = "SpecialCasing%s.txt" +CASE_FOLDING = "CaseFolding%s.txt" # Private Use Areas -- in planes 1, 15, 16 PUA_1 = range(0xE000, 0xF900) PUA_15 = range(0xF0000, 0xFFFFE) PUA_16 = range(0x100000, 0x10FFFE) # we use this ranges of PUA_15 to store name aliases and named sequences NAME_ALIASES_START = 0xF0000 @@ -419,38 +420,46 @@ def makeunicodetype(unicode, trace): flags |= XID_START_MASK if "XID_Continue" in properties: flags |= XID_CONTINUE_MASK if "Cased" in properties: flags |= CASED_MASK if "Case_Ignorable" in properties: flags |= CASE_IGNORABLE_MASK sc = unicode.special_casing.get(char) + cf = unicode.case_folding.get(char, [char]) + if record[12]: + upper = int(record[12], 16) + else: + upper = char + if record[13]: + lower = int(record[13], 16) + else: + lower = char + if record[14]: + title = int(record[14], 16) + else: + title = upper + if sc is None and cf != [lower]: + sc = ([lower], [title], [upper]) if sc is None: - if record[12]: - upper = int(record[12], 16) - else: - upper = char - if record[13]: - lower = int(record[13], 16) - else: - lower = char - if record[14]: - title = int(record[14], 16) - else: - title = upper if upper == lower == title: upper = lower = title = 0 else: - # This happens when some character maps to more than one - # character in uppercase, lowercase, or titlecase. The extra - # characters are stored in a different array. + # This happens either when some character maps to more than one + # character in uppercase, lowercase, or titlecase or the + # casefolded version of the character is different from the + # lowercase. The extra characters are stored in a different + # array. flags |= EXTENDED_CASE_MASK lower = len(extra_casing) | (len(sc[0]) << 24) extra_casing.extend(sc[0]) + if cf != sc[0]: + lower |= len(cf) << 20 + extra_casing.extend(cf) upper = len(extra_casing) | (len(sc[2]) << 24) extra_casing.extend(sc[2]) # Title is probably equal to upper. if sc[1] == sc[2]: title = upper else: title = len(extra_casing) | (len(sc[1]) << 24) extra_casing.extend(sc[1]) @@ -1102,16 +1111,27 @@ class UnicodeData: # languages) except for one, which is hardcoded. See # handle_capital_sigma in unicodeobject.c. continue c = int(data[0], 16) lower = [int(char, 16) for char in data[1].split()] title = [int(char, 16) for char in data[2].split()] upper = [int(char, 16) for char in data[3].split()] sc[c] = (lower, title, upper) + cf = self.case_folding = {} + if version != '3.2.0': + with open_data(CASE_FOLDING, version) as file: + for s in file: + s = s[:-1].split('#', 1)[0] + if not s: + continue + data = s.split("; ") + if data[1] in "CF": + c = int(data[0], 16) + cf[c] = [int(char, 16) for char in data[2].split()] def uselatin1(self): # restrict character range to ISO Latin 1 self.chars = list(range(256)) # hash table tools # this is a straight-forward reimplementation of Python's built-in