Index: Objects/unicodectype.c =================================================================== --- Objects/unicodectype.c (revision 73674) +++ Objects/unicodectype.c (working copy) @@ -141,13 +141,17 @@ switch (ch) { case 0x0F33: return (double) -1 / 2; + case 0x0C78: case 0x17F0: case 0x3007: + case 0xF9B2: #ifdef Py_UNICODE_WIDE case 0x1018A: #endif return (double) 0; case 0x09F4: + case 0x0C79: + case 0x0C7C: case 0x17F1: case 0x215F: case 0x2160: @@ -164,9 +168,18 @@ case 0x1015A: case 0x10320: case 0x103D1: + case 0x10916: + case 0x12415: + case 0x1241E: + case 0x1242C: + case 0x12434: + case 0x1244F: + case 0x12458: + case 0x1D360: #endif return (double) 1; case 0x00BD: + case 0x0D74: case 0x0F2A: case 0x2CFD: #ifdef Py_UNICODE_WIDE @@ -176,19 +189,33 @@ #endif return (double) 1 / 2; case 0x2153: +#ifdef Py_UNICODE_WIDE + case 0x1245A: + case 0x1245D: +#endif return (double) 1 / 3; case 0x00BC: + case 0x0D73: #ifdef Py_UNICODE_WIDE case 0x10140: + case 0x12460: + case 0x12462: #endif return (double) 1 / 4; case 0x2155: return (double) 1 / 5; case 0x2159: +#ifdef Py_UNICODE_WIDE + case 0x12461: +#endif return (double) 1 / 6; case 0x215B: +#ifdef Py_UNICODE_WIDE + case 0x1245F: +#endif return (double) 1 / 8; case 0x0BF0: + case 0x0D70: case 0x1372: case 0x2169: case 0x2179: @@ -202,6 +229,8 @@ case 0x3038: case 0x3229: case 0x3289: + case 0xF973: + case 0xF9FD: #ifdef Py_UNICODE_WIDE case 0x10110: case 0x10149: @@ -214,10 +243,13 @@ case 0x10164: case 0x10322: case 0x103D3: + case 0x10917: case 0x10A44: + case 0x1D369: #endif return (double) 10; case 0x0BF1: + case 0x0D71: case 0x137B: case 0x216D: case 0x217D: @@ -227,10 +259,12 @@ case 0x10152: case 0x1016A: case 0x103D5: + case 0x10919: case 0x10A46: #endif return (double) 100; case 0x0BF2: + case 0x0D72: case 0x216F: case 0x217F: case 0x2180: @@ -249,6 +283,8 @@ case 0x10155: #endif return (double) 10000; + case 0x2188: + return (double) 100000; case 0x216A: case 0x217A: case 0x246A: @@ -311,6 +347,8 @@ case 0x24F3: return (double) 19; case 0x09F5: + case 0x0C7A: + case 0x0C7D: case 0x17F2: case 0x2161: case 0x2171: @@ -318,6 +356,7 @@ case 0x3193: case 0x3221: case 0x3281: + case 0xF978: #ifdef Py_UNICODE_WIDE case 0x10108: case 0x1015B: @@ -325,11 +364,23 @@ case 0x1015D: case 0x1015E: case 0x103D2: + case 0x12400: + case 0x12416: + case 0x1241F: + case 0x12423: + case 0x1242D: + case 0x12435: + case 0x1244A: + case 0x12450: + case 0x12459: + case 0x1D361: #endif return (double) 2; case 0x2154: #ifdef Py_UNICODE_WIDE case 0x10177: + case 0x1245B: + case 0x1245E: #endif return (double) 2 / 3; case 0x2156: @@ -343,7 +394,9 @@ #ifdef Py_UNICODE_WIDE case 0x10111: case 0x103D4: + case 0x10918: case 0x10A45: + case 0x1D36A: #endif return (double) 20; #ifdef Py_UNICODE_WIDE @@ -373,6 +426,8 @@ case 0x3259: return (double) 29; case 0x09F6: + case 0x0C7B: + case 0x0C7E: case 0x17F3: case 0x2162: case 0x2172: @@ -380,13 +435,30 @@ case 0x3194: case 0x3222: case 0x3282: + case 0xF96B: #ifdef Py_UNICODE_WIDE case 0x10109: + case 0x12401: + case 0x12408: + case 0x12417: + case 0x12420: + case 0x12424: + case 0x12425: + case 0x1242E: + case 0x1242F: + case 0x12436: + case 0x12437: + case 0x1243A: + case 0x1243B: + case 0x1244B: + case 0x12451: + case 0x1D362: #endif return (double) 3; case 0x0F2B: return (double) 3 / 2; case 0x00BE: + case 0x0D75: #ifdef Py_UNICODE_WIDE case 0x10178: #endif @@ -401,6 +473,7 @@ #ifdef Py_UNICODE_WIDE case 0x10112: case 0x10165: + case 0x1D36B: #endif return (double) 30; #ifdef Py_UNICODE_WIDE @@ -440,6 +513,22 @@ case 0x3283: #ifdef Py_UNICODE_WIDE case 0x1010A: + case 0x12402: + case 0x12409: + case 0x1240F: + case 0x12418: + case 0x12421: + case 0x12426: + case 0x12430: + case 0x12438: + case 0x1243C: + case 0x1243D: + case 0x1243E: + case 0x1243F: + case 0x1244C: + case 0x12452: + case 0x12453: + case 0x1D363: #endif return (double) 4; case 0x2158: @@ -448,6 +537,7 @@ case 0x32B5: #ifdef Py_UNICODE_WIDE case 0x10113: + case 0x1D36C: #endif return (double) 40; #ifdef Py_UNICODE_WIDE @@ -490,17 +580,33 @@ case 0x1015F: case 0x10173: case 0x10321: + case 0x12403: + case 0x1240A: + case 0x12410: + case 0x12419: + case 0x12422: + case 0x12427: + case 0x12431: + case 0x12439: + case 0x1244D: + case 0x12454: + case 0x12455: + case 0x1D364: #endif return (double) 5; case 0x0F2C: return (double) 5 / 2; case 0x215A: +#ifdef Py_UNICODE_WIDE + case 0x1245C: +#endif return (double) 5 / 6; case 0x215D: return (double) 5 / 8; case 0x1376: case 0x216C: case 0x217C: + case 0x2186: case 0x32BF: #ifdef Py_UNICODE_WIDE case 0x10114: @@ -513,6 +619,7 @@ case 0x10169: case 0x10174: case 0x10323: + case 0x1D36D: #endif return (double) 50; case 0x216E: @@ -537,25 +644,38 @@ case 0x10172: #endif return (double) 5000; + case 0x2187: #ifdef Py_UNICODE_WIDE case 0x1012F: case 0x10147: case 0x10156: +#endif return (double) 50000; -#endif case 0x17F6: case 0x2165: case 0x2175: + case 0x2185: case 0x3026: case 0x3225: case 0x3285: + case 0xF9D1: + case 0xF9D3: #ifdef Py_UNICODE_WIDE case 0x1010C: + case 0x12404: + case 0x1240B: + case 0x12411: + case 0x1241A: + case 0x12428: + case 0x12440: + case 0x1244E: + case 0x1D365: #endif return (double) 6; case 0x1377: #ifdef Py_UNICODE_WIDE case 0x10115: + case 0x1D36E: #endif return (double) 60; #ifdef Py_UNICODE_WIDE @@ -574,6 +694,15 @@ case 0x3286: #ifdef Py_UNICODE_WIDE case 0x1010D: + case 0x12405: + case 0x1240C: + case 0x12412: + case 0x1241B: + case 0x12429: + case 0x12441: + case 0x12442: + case 0x12443: + case 0x1D366: #endif return (double) 7; case 0x0F2D: @@ -583,6 +712,7 @@ case 0x1378: #ifdef Py_UNICODE_WIDE case 0x10116: + case 0x1D36F: #endif return (double) 70; #ifdef Py_UNICODE_WIDE @@ -601,11 +731,20 @@ case 0x3287: #ifdef Py_UNICODE_WIDE case 0x1010E: + case 0x12406: + case 0x1240D: + case 0x12413: + case 0x1241C: + case 0x1242A: + case 0x12444: + case 0x12445: + case 0x1D367: #endif return (double) 8; case 0x1379: #ifdef Py_UNICODE_WIDE case 0x10117: + case 0x1D370: #endif return (double) 80; #ifdef Py_UNICODE_WIDE @@ -624,6 +763,17 @@ case 0x3288: #ifdef Py_UNICODE_WIDE case 0x1010F: + case 0x12407: + case 0x1240E: + case 0x12414: + case 0x1241D: + case 0x1242B: + case 0x12446: + case 0x12447: + case 0x12448: + case 0x12449: + case 0x1D368: + case 0x2F890: #endif return (double) 9; case 0x0F2E: @@ -631,6 +781,8 @@ case 0x137A: #ifdef Py_UNICODE_WIDE case 0x10118: + case 0x10341: + case 0x1D371: #endif return (double) 90; #ifdef Py_UNICODE_WIDE Index: Tools/unicode/gentonumeric.py =================================================================== --- Tools/unicode/gentonumeric.py (revision 0) +++ Tools/unicode/gentonumeric.py (revision 0) @@ -0,0 +1,70 @@ +# generate code for the _PyUnicode_ToNumeric function +# Yes, you must copy/paste the output into Objects/unicodectype.c + +from makeunicodedata import UNICODE_DATA + +def read_database(): + filename = UNICODE_DATA % "" + numbers = {} + for line in open(filename): + record = line[:-1].split(';') + char = int(record[0], 16) + digit = record[7] + number = record[8] + if number and not digit: + numbers.setdefault(number, []).append(char) + return numbers + + +def write_function(numbers): + + print("""\ +double _PyUnicode_ToNumeric(Py_UNICODE ch) +{ + switch (ch) {\ +""") + + sortednumbers = sorted(numbers.items()) + wide = False + for i, (value, chars) in enumerate(sortednumbers): + for char in chars: + if char > 0xFFFF and not wide: + print("#ifdef Py_UNICODE_WIDE") + wide = True + print(" case 0x%04X:" % (char,)) + + # close before the return if we have a char in the BMP + if wide and chars[0] <= 0xFFFF: + print("#endif") + wide = False + + # the return statement + # XXX keep tabs to reduce diffs + needtab = (value in """0 1 1/2 1/3 1/4 1/5 1/6 1/8 10 100 1000 10000 + 11 12 13 14 15 16 17 18 19 2 + 2/3 3 3/4 3/5 3/8 30 4 4/5 5 5/6 5/8 50 500 5000 + 6 60 7 7/8 70 8 80 9 90""".split()) + value = value.replace("/", " / ") + if needtab: + print("\treturn (double) %s;" % (value,)) + else: + print(" return (double) %s;" % (value,)) + + # close the #ifdef after the return, if the next item has a char in the BMP + if wide: + if i+1 < len(sortednumbers) and sortednumbers[i+1][1][0] <= 0xFFFF: + print("#endif") + wide = False + + if wide: + print("#endif") + + print("""\ + default: +\treturn (double) _PyUnicode_ToDigit(ch); + } +} +""") + +numbers = read_database() +write_function(numbers)