Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 75273) +++ Include/unicodeobject.h (working copy) @@ -219,24 +219,6 @@ # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS2_Fini # define _PyUnicode_Init _PyUnicodeUCS2_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase #else @@ -318,24 +300,6 @@ # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString # define _PyUnicode_Fini _PyUnicodeUCS4_Fini # define _PyUnicode_Init _PyUnicodeUCS4_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric -# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable -# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase -# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart -# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue -# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase #endif @@ -347,7 +311,7 @@ configure Python using --with-wctype-functions. This reduces the interpreter's code size. */ -#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) +#if defined(Py_UNICODE_WIDE) && defined(WANT_WCTYPE_FUNCTIONS) #include @@ -1520,75 +1484,75 @@ */ PyAPI_FUNC(int) _PyUnicode_IsLowercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsUppercase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsTitlecase( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidStart( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsXidContinue( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - const Py_UNICODE ch /* Unicode character */ + const Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( + Py_UCS4 ch /* Unicode character */ ); -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( - Py_UNICODE ch /* Unicode character */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_ToDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(double) _PyUnicode_ToNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsDigit( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsNumeric( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsPrintable( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(int) _PyUnicode_IsAlpha( - Py_UNICODE ch /* Unicode character */ + Py_UCS4 ch /* Unicode character */ ); PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u); Index: Objects/unicodectype.c =================================================================== --- Objects/unicodectype.c (revision 75274) +++ Objects/unicodectype.c (working copy) @@ -26,9 +26,9 @@ #define NUMERIC_MASK 0x1000 typedef struct { - const Py_UNICODE upper; - const Py_UNICODE lower; - const Py_UNICODE title; + const Py_UCS4 upper; + const Py_UCS4 lower; + const Py_UCS4 title; const unsigned char decimal; const unsigned char digit; const unsigned short flags; @@ -37,15 +37,10 @@ #include "unicodetype_db.h" static const _PyUnicode_TypeRecord * -gettyperecord(Py_UNICODE code) +gettyperecord(Py_UCS4 code) { int index; -#ifdef Py_UNICODE_WIDE - if (code >= 0x110000) - index = 0; - else -#endif { index = index1[(code>>SHIFT)]; index = index2[(index<title; @@ -74,7 +69,7 @@ /* Returns 1 for Unicode characters having the category 'Lt', 0 otherwise. */ -int _PyUnicode_IsTitlecase(Py_UNICODE ch) +int _PyUnicode_IsTitlecase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -84,7 +79,7 @@ /* Returns 1 for Unicode characters having the XID_Start property, 0 otherwise. */ -int _PyUnicode_IsXidStart(Py_UNICODE ch) +int _PyUnicode_IsXidStart(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -94,7 +89,7 @@ /* Returns 1 for Unicode characters having the XID_Continue property, 0 otherwise. */ -int _PyUnicode_IsXidContinue(Py_UNICODE ch) +int _PyUnicode_IsXidContinue(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -104,14 +99,14 @@ /* Returns the integer decimal (0-9) for Unicode characters having this property, -1 otherwise. */ -int _PyUnicode_ToDecimalDigit(Py_UNICODE ch) +int _PyUnicode_ToDecimalDigit(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1; } -int _PyUnicode_IsDecimalDigit(Py_UNICODE ch) +int _PyUnicode_IsDecimalDigit(Py_UCS4 ch) { if (_PyUnicode_ToDecimalDigit(ch) < 0) return 0; @@ -121,14 +116,14 @@ /* Returns the integer digit (0-9) for Unicode characters having this property, -1 otherwise. */ -int _PyUnicode_ToDigit(Py_UNICODE ch) +int _PyUnicode_ToDigit(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; } -int _PyUnicode_IsDigit(Py_UNICODE ch) +int _PyUnicode_IsDigit(Py_UCS4 ch) { if (_PyUnicode_ToDigit(ch) < 0) return 0; @@ -138,7 +133,7 @@ /* Returns the numeric value as double for Unicode characters having this property, -1.0 otherwise. */ -int _PyUnicode_IsNumeric(Py_UNICODE ch) +int _PyUnicode_IsNumeric(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -158,7 +153,7 @@ * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) * Zs (Separator, Space) other than ASCII space('\x20'). */ -int _PyUnicode_IsPrintable(Py_UNICODE ch) +int _PyUnicode_IsPrintable(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -170,7 +165,7 @@ /* Returns 1 for Unicode characters having the category 'Ll', 0 otherwise. */ -int _PyUnicode_IsLowercase(Py_UNICODE ch) +int _PyUnicode_IsLowercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -180,7 +175,7 @@ /* Returns 1 for Unicode characters having the category 'Lu', 0 otherwise. */ -int _PyUnicode_IsUppercase(Py_UNICODE ch) +int _PyUnicode_IsUppercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -190,7 +185,7 @@ /* Returns the uppercase Unicode characters corresponding to ch or just ch if no uppercase mapping is known. */ -Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); int delta = ctype->upper; @@ -204,7 +199,7 @@ /* Returns the lowercase Unicode characters corresponding to ch or just ch if no lowercase mapping is known. */ -Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); int delta = ctype->lower; @@ -218,7 +213,7 @@ /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', 'Lo' or 'Lm', 0 otherwise. */ -int _PyUnicode_IsAlpha(Py_UNICODE ch) +int _PyUnicode_IsAlpha(Py_UCS4 ch) { const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); @@ -230,27 +225,27 @@ /* Export the interfaces using the wchar_t type for portability reasons: */ -int _PyUnicode_IsLowercase(Py_UNICODE ch) +int _PyUnicode_IsLowercase(Py_UCS4 ch) { return iswlower(ch); } -int _PyUnicode_IsUppercase(Py_UNICODE ch) +int _PyUnicode_IsUppercase(Py_UCS4 ch) { return iswupper(ch); } -Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) { return towlower(ch); } -Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch) +Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) { return towupper(ch); } -int _PyUnicode_IsAlpha(Py_UNICODE ch) +int _PyUnicode_IsAlpha(Py_UCS4 ch) { return iswalpha(ch); } Index: Tools/unicode/makeunicodedata.py =================================================================== --- Tools/unicode/makeunicodedata.py (revision 75274) +++ Tools/unicode/makeunicodedata.py (working copy) @@ -475,25 +475,14 @@ print('/* Returns the numeric value as double for Unicode characters', file=fp) print(' * having this property, -1.0 otherwise.', file=fp) print(' */', file=fp) - print('double _PyUnicode_ToNumeric(Py_UNICODE ch)', file=fp) + print('double _PyUnicode_ToNumeric(Py_UCS4 ch)', file=fp) print('{', file=fp) print(' switch (ch) {', file=fp) for value, codepoints in numeric_items: - haswide = False - hasnonewide = False codepoints.sort() for codepoint in codepoints: - if codepoint < 0x10000: - hasnonewide = True - if codepoint >= 0x10000 and not haswide: - print('#ifdef Py_UNICODE_WIDE', file=fp) - haswide = True print(' case 0x%04X:' % (codepoint,), file=fp) - if haswide and hasnonewide: - print('#endif', file=fp) print(' return (double) %s;' % (value,), file=fp) - if haswide and not hasnonewide: - print('#endif', file=fp) print(' }', file=fp) print(' return -1.0;', file=fp) print('}', file=fp) @@ -503,28 +492,17 @@ print("/* Returns 1 for Unicode characters having the bidirectional", file=fp) print(" * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise.", file=fp) print(" */", file=fp) - print('int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)', file=fp) + print('int _PyUnicode_IsWhitespace(register const Py_UCS4 ch)', file=fp) print('{', file=fp) print('#ifdef WANT_WCTYPE_FUNCTIONS', file=fp) print(' return iswspace(ch);', file=fp) print('#else', file=fp) print(' switch (ch) {', file=fp) - haswide = False - hasnonewide = False spaces.sort() for codepoint in spaces: - if codepoint < 0x10000: - hasnonewide = True - if codepoint >= 0x10000 and not haswide: - print('#ifdef Py_UNICODE_WIDE', file=fp) - haswide = True print(' case 0x%04X:' % (codepoint,), file=fp) - if haswide and hasnonewide: - print('#endif', file=fp) print(' return 1;', file=fp) - if haswide and not hasnonewide: - print('#endif', file=fp) print(' }', file=fp) print(' return 0;', file=fp) @@ -536,24 +514,13 @@ print("/* Returns 1 for Unicode characters having the category 'Zl',", file=fp) print(" * 'Zp' or type 'B', 0 otherwise.", file=fp) print(" */", file=fp) - print('int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)', file=fp) + print('int _PyUnicode_IsLinebreak(register const Py_UCS4 ch)', file=fp) print('{', file=fp) print(' switch (ch) {', file=fp) - haswide = False - hasnonewide = False linebreaks.sort() for codepoint in linebreaks: - if codepoint < 0x10000: - hasnonewide = True - if codepoint >= 0x10000 and not haswide: - print('#ifdef Py_UNICODE_WIDE', file=fp) - haswide = True print(' case 0x%04X:' % (codepoint,), file=fp) - if haswide and hasnonewide: - print('#endif', file=fp) print(' return 1;', file=fp) - if haswide and not hasnonewide: - print('#endif', file=fp) print(' }', file=fp) print(' return 0;', file=fp)