diff -r d49cf0800c14 Doc/c-api/unicode.rst --- a/Doc/c-api/unicode.rst Mon Aug 22 09:46:56 2011 +0200 +++ b/Doc/c-api/unicode.rst Mon Aug 22 19:15:54 2011 +0300 @@ -195,6 +195,27 @@ possible. This macro does not raise exceptions. +These APIs can be used to work with surrogates: + +.. c:macro:: Py_UNICODE_IS_SURROGATE(ch) + + Check if *ch* is a surrogate (``0xD800 <= ch <= 0xDFFF``). + +.. c:macro:: Py_UNICODE_IS_HIGH_SURROGATE(ch) + + Check if *ch* is an high surrogate (``0xD800 <= ch <= 0xDBFF``). + +.. c:macro:: Py_UNICODE_IS_LOW_SURROGATE(ch) + + Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``). + +.. c:macro:: Py_UNICODE_JOIN_SURROGATES(high, low) + + Join two surrogate characters and return a single Py_UCS4 value. + *high* and *low* are respectively the leading and trailing surrogates in a + surrogate pair. + + Plain Py_UNICODE """""""""""""""" diff -r d49cf0800c14 Include/unicodeobject.h --- a/Include/unicodeobject.h Mon Aug 22 09:46:56 2011 +0200 +++ b/Include/unicodeobject.h Mon Aug 22 19:15:54 2011 +0300 @@ -362,6 +362,15 @@ for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ } while (0) +/* macros to work with surrogates */ +#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF) +#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF) +#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF) +/* Join two surrogate characters and return a single Py_UCS4 value. */ +#define Py_UNICODE_JOIN_SURROGATES(high, low) \ + (((((Py_UCS4)(high) & 0x03FF) << 10) | \ + ((Py_UCS4)(low) & 0x03FF)) + 0x10000) + /* Check if substring matches at given offset. The offset must be valid, and the substring must not be empty. */