Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 69230) +++ Include/unicodeobject.h (working copy) @@ -218,24 +218,6 @@ # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS2_Fini # define _PyUnicode_Init _PyUnicodeUCS2_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase #else @@ -316,24 +298,6 @@ # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS4_Fini # define _PyUnicode_Init _PyUnicodeUCS4_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase #endif @@ -345,7 +309,7 @@ configure Python using --with-wctype-functions. This reduces the interpreter's code size. */ -#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) +#if defined(Py_UNICODE_WIDE) && defined(WANT_WCTYPE_FUNCTIONS) #include @@ -1503,75 +1467,75 @@ */ PyAPI_FUNC(int) _PyUnicode_IsLowercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsUppercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsTitlecase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidStart( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidContinue( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(double) _PyUnicode_ToNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsPrintable( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsAlpha( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u); Index: Objects/unicodectype.c =================================================================== --- Objects/unicodectype.c (revision 69230) +++ Objects/unicodectype.c (working copy) @@ -36,16 +36,13 @@ #include "unicodetype_db.h" static const _PyUnicode_TypeRecord * -gettyperecord(Py_UNICODE code) +gettyperecord(Py_UCS4 code) { int index; -#ifdef Py_UNICODE_WIDE if (code >= 0x110000) index = 0; - else -#endif - { + else { index = index1[(code>>SHIFT)]; index = index2[(index<flags & DECIMAL_MASK) ? ctype->decimal : -1; } -int _PyUnicode_IsDecimalDigit(Py_UNICODE ch) +int _PyUnicode_IsDecimalDigit(Py_UCS4 ch) { if (_PyUnicode_ToDecimalDigit(ch) < 0) return 0; @@ -145,14 +142,14 @@ /* Returns the integer digit (0-9) for Unicode characters having this property, -1 otherwise. */ -int _PyUnicode_ToDigit(Py_UNICODE ch) +int _PyUnicode_ToDigit(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; } -int _PyUnicode_IsDigit(Py_UNICODE ch) +int _PyUnicode_IsDigit(Py_UCS4 ch) { if (_PyUnicode_ToDigit(ch) < 0) return 0; @@ -164,16 +161,14 @@ /* TODO: replace with unicodetype_db.h table */ -double _PyUnicode_ToNumeric(Py_UNICODE ch) +double _PyUnicode_ToNumeric(Py_UCS4 ch) { switch (ch) { case 0x0F33: return (double) -1 / 2; case 0x17F0: case 0x3007: -#ifdef Py_UNICODE_WIDE case 0x1018A: -#endif return (double) 0; case 0x09F4: case 0x17F1: @@ -184,7 +179,6 @@ case 0x3192: case 0x3220: case 0x3280: -#ifdef Py_UNICODE_WIDE case 0x10107: case 0x10142: case 0x10158: @@ -192,23 +186,18 @@ case 0x1015A: case 0x10320: case 0x103D1: -#endif return (double) 1; case 0x00BD: case 0x0F2A: case 0x2CFD: -#ifdef Py_UNICODE_WIDE case 0x10141: case 0x10175: case 0x10176: -#endif return (double) 1 / 2; case 0x2153: return (double) 1 / 3; case 0x00BC: -#ifdef Py_UNICODE_WIDE case 0x10140: -#endif return (double) 1 / 4; case 0x2155: return (double) 1 / 5; @@ -230,7 +219,6 @@ case 0x3038: case 0x3229: case 0x3289: -#ifdef Py_UNICODE_WIDE case 0x10110: case 0x10149: case 0x10150: @@ -243,39 +231,32 @@ case 0x10322: case 0x103D3: case 0x10A44: -#endif return (double) 10; case 0x0BF1: case 0x137B: case 0x216D: case 0x217D: -#ifdef Py_UNICODE_WIDE case 0x10119: case 0x1014B: case 0x10152: case 0x1016A: case 0x103D5: case 0x10A46: -#endif return (double) 100; case 0x0BF2: case 0x216F: case 0x217F: case 0x2180: -#ifdef Py_UNICODE_WIDE case 0x10122: case 0x1014D: case 0x10154: case 0x10171: case 0x10A47: -#endif return (double) 1000; case 0x137C: case 0x2182: -#ifdef Py_UNICODE_WIDE case 0x1012B: case 0x10155: -#endif return (double) 10000; case 0x216A: case 0x217A: @@ -346,19 +327,15 @@ case 0x3193: case 0x3221: case 0x3281: -#ifdef Py_UNICODE_WIDE case 0x10108: case 0x1015B: case 0x1015C: case 0x1015D: case 0x1015E: case 0x103D2: -#endif return (double) 2; case 0x2154: -#ifdef Py_UNICODE_WIDE case 0x10177: -#endif return (double) 2 / 3; case 0x2156: return (double) 2 / 5; @@ -368,20 +345,16 @@ case 0x249B: case 0x24F4: case 0x3039: -#ifdef Py_UNICODE_WIDE case 0x10111: case 0x103D4: case 0x10A45: -#endif return (double) 20; -#ifdef Py_UNICODE_WIDE case 0x1011A: return (double) 200; case 0x10123: return (double) 2000; case 0x1012C: return (double) 20000; -#endif case 0x3251: return (double) 21; case 0x3252: @@ -408,16 +381,12 @@ case 0x3194: case 0x3222: case 0x3282: -#ifdef Py_UNICODE_WIDE case 0x10109: -#endif return (double) 3; case 0x0F2B: return (double) 3 / 2; case 0x00BE: -#ifdef Py_UNICODE_WIDE case 0x10178: -#endif return (double) 3 / 4; case 0x2157: return (double) 3 / 5; @@ -426,12 +395,9 @@ case 0x1374: case 0x303A: case 0x325A: -#ifdef Py_UNICODE_WIDE case 0x10112: case 0x10165: -#endif return (double) 30; -#ifdef Py_UNICODE_WIDE case 0x1011B: case 0x1016B: return (double) 300; @@ -439,7 +405,6 @@ return (double) 3000; case 0x1012D: return (double) 30000; -#endif case 0x325B: return (double) 31; case 0x325C: @@ -466,26 +431,20 @@ case 0x3195: case 0x3223: case 0x3283: -#ifdef Py_UNICODE_WIDE case 0x1010A: -#endif return (double) 4; case 0x2158: return (double) 4 / 5; case 0x1375: case 0x32B5: -#ifdef Py_UNICODE_WIDE case 0x10113: -#endif return (double) 40; -#ifdef Py_UNICODE_WIDE case 0x1011C: return (double) 400; case 0x10125: return (double) 4000; case 0x1012E: return (double) 40000; -#endif case 0x32B6: return (double) 41; case 0x32B7: @@ -510,7 +469,6 @@ case 0x3025: case 0x3224: case 0x3284: -#ifdef Py_UNICODE_WIDE case 0x1010B: case 0x10143: case 0x10148: @@ -518,7 +476,6 @@ case 0x1015F: case 0x10173: case 0x10321: -#endif return (double) 5; case 0x0F2C: return (double) 5 / 2; @@ -530,7 +487,6 @@ case 0x216C: case 0x217C: case 0x32BF: -#ifdef Py_UNICODE_WIDE case 0x10114: case 0x10144: case 0x1014A: @@ -541,11 +497,9 @@ case 0x10169: case 0x10174: case 0x10323: -#endif return (double) 50; case 0x216E: case 0x217E: -#ifdef Py_UNICODE_WIDE case 0x1011D: case 0x10145: case 0x1014C: @@ -555,113 +509,85 @@ case 0x1016E: case 0x1016F: case 0x10170: -#endif return (double) 500; case 0x2181: -#ifdef Py_UNICODE_WIDE case 0x10126: case 0x10146: case 0x1014E: case 0x10172: -#endif return (double) 5000; -#ifdef Py_UNICODE_WIDE case 0x1012F: case 0x10147: case 0x10156: return (double) 50000; -#endif case 0x17F6: case 0x2165: case 0x2175: case 0x3026: case 0x3225: case 0x3285: -#ifdef Py_UNICODE_WIDE case 0x1010C: -#endif return (double) 6; case 0x1377: -#ifdef Py_UNICODE_WIDE case 0x10115: -#endif return (double) 60; -#ifdef Py_UNICODE_WIDE case 0x1011E: return (double) 600; case 0x10127: return (double) 6000; case 0x10130: return (double) 60000; -#endif case 0x17F7: case 0x2166: case 0x2176: case 0x3027: case 0x3226: case 0x3286: -#ifdef Py_UNICODE_WIDE case 0x1010D: -#endif return (double) 7; case 0x0F2D: return (double) 7 / 2; case 0x215E: return (double) 7 / 8; case 0x1378: -#ifdef Py_UNICODE_WIDE case 0x10116: -#endif return (double) 70; -#ifdef Py_UNICODE_WIDE case 0x1011F: return (double) 700; case 0x10128: return (double) 7000; case 0x10131: return (double) 70000; -#endif case 0x17F8: case 0x2167: case 0x2177: case 0x3028: case 0x3227: case 0x3287: -#ifdef Py_UNICODE_WIDE case 0x1010E: -#endif return (double) 8; case 0x1379: -#ifdef Py_UNICODE_WIDE case 0x10117: -#endif return (double) 80; -#ifdef Py_UNICODE_WIDE case 0x10120: return (double) 800; case 0x10129: return (double) 8000; case 0x10132: return (double) 80000; -#endif case 0x17F9: case 0x2168: case 0x2178: case 0x3029: case 0x3228: case 0x3288: -#ifdef Py_UNICODE_WIDE case 0x1010F: -#endif return (double) 9; case 0x0F2E: return (double) 9 / 2; case 0x137A: -#ifdef Py_UNICODE_WIDE case 0x10118: -#endif return (double) 90; -#ifdef Py_UNICODE_WIDE case 0x10121: case 0x1034A: return (double) 900; @@ -669,13 +595,12 @@ return (double) 9000; case 0x10133: return (double) 90000; -#endif default: return (double) _PyUnicode_ToDigit(ch); } } -int _PyUnicode_IsNumeric(Py_UNICODE ch) +int _PyUnicode_IsNumeric(Py_UCS4 ch) { return _PyUnicode_ToNumeric(ch) != -1.0; } @@ -693,7 +618,7 @@ * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) * Zs (Separator, Space) other than ASCII space('\x20'). */ -int _PyUnicode_IsPrintable(Py_UNICODE ch) +int _PyUnicode_IsPrintable(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -705,7 +630,7 @@ /* Returns 1 for Unicode characters having the bidirectional type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */ -int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) +int _PyUnicode_IsWhitespace(register const Py_UCS4 ch) { switch (ch) { case 0x0009: /* HORIZONTAL TABULATION */ @@ -747,7 +672,7 @@ /* Returns 1 for Unicode characters having the category 'Ll', 0 otherwise. */ -int _PyUnicode_IsLowercase(Py_UNICODE ch) +int _PyUnicode_IsLowercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -757,7 +682,7 @@ /* Returns 1 for Unicode characters having the category 'Lu', 0 otherwise. */ -int _PyUnicode_IsUppercase(Py_UNICODE ch) +int _PyUnicode_IsUppercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -767,7 +692,7 @@ /* Returns the uppercase Unicode characters corresponding to ch or just ch if no uppercase mapping is known. */ -Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); int delta = ctype->upper; @@ -781,7 +706,7 @@ /* Returns the lowercase Unicode characters corresponding to ch or just ch if no lowercase mapping is known. */ -Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); int delta = ctype->lower; @@ -795,7 +720,7 @@ /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', 'Lo' or 'Lm', 0 otherwise. */ -int _PyUnicode_IsAlpha(Py_UNICODE ch) +int _PyUnicode_IsAlpha(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -807,32 +732,32 @@ /* Export the interfaces using the wchar_t type for portability reasons: */ -int _PyUnicode_IsWhitespace(Py_UNICODE ch) +int _PyUnicode_IsWhitespace(Py_UCS4 ch) { return iswspace(ch); } -int _PyUnicode_IsLowercase(Py_UNICODE ch) +int _PyUnicode_IsLowercase(Py_UCS4 ch) { return iswlower(ch); } -int _PyUnicode_IsUppercase(Py_UNICODE ch) +int _PyUnicode_IsUppercase(Py_UCS4 ch) { return iswupper(ch); } -Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) { return towlower(ch); } -Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) { return towupper(ch); } -int _PyUnicode_IsAlpha(Py_UNICODE ch) +int _PyUnicode_IsAlpha(Py_UCS4 ch) { return iswalpha(ch); }