Index: Include/unicodeobject.h
===================================================================
--- Include/unicodeobject.h	(revision 86824)
+++ Include/unicodeobject.h	(working copy)
@@ -355,6 +355,22 @@
     for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
     } while (0)
 
+#define Py_UNICODE_ISSURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
+#define Py_UNICODE_ISHIGHSURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
+#define Py_UNICODE_ISLOWSURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
+#define Py_UNICODE_JOIN_SURROGATES(high, low)  \
+  ((Py_UCS4)(((((Py_UCS4)high - 0xD800) << 10) |    \
+	      ((Py_UCS4)low - 0xDC00)) + 0x10000))
+#ifdef Py_UNICODE_WIDE
+#define Py_UNICODE_NEXT(ptr, end) *ptr++
+#else
+#define Py_UNICODE_NEXT(ptr, end)                                     \
+    ((Py_UNICODE_ISHIGHSURROGATE(*ptr) && ptr < end) ?                \
+     (Py_UNICODE_ISLOWSURROGATE(ptr[1]) ?                             \
+      (ptr += 2,Py_UNICODE_JOIN_SURROGATES(ptr[-2], ptr[-1])) :       \
+      (Py_UCS4)*ptr++) :					      \
+     (Py_UCS4)*ptr++)
+#endif
 /* Check if substring matches at given offset.  The offset must be
    valid, and the substring must not be empty. */
 
@@ -737,7 +753,7 @@
     const char *errors          /* error handling */
     );
 
-/* Encodes a Unicode object and returns the result as Python string
+/* Encodes a Unicode object and returns the result as Python bytes
    object. */
 
 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(revision 86824)
+++ Objects/unicodeobject.c	(working copy)
@@ -1192,19 +1192,8 @@
     if (w != NULL) {
         worig = w;
         wend = w + size;
-        while (u != uend && w != wend) {
-            if (0xD800 <= u[0] && u[0] <= 0xDBFF
-                && 0xDC00 <= u[1] && u[1] <= 0xDFFF)
-            {
-                *w = (((u[0] & 0x3FF) << 10) | (u[1] & 0x3FF)) + 0x10000;
-                u += 2;
-            }
-            else {
-                *w = *u;
-                u++;
-            }
-            w++;
-        }
+        while (u != uend && w != wend)
+            *w++ = Py_UNICODE_NEXT(u, uend);
         if (w != wend)
             *w = L'\0';
         return w - worig;
@@ -3213,6 +3202,7 @@
                       const char *errors,
                       int byteorder)
 {
+    const Py_UNICODE *send = s + size;
     PyObject *v;
     unsigned char *p;
     Py_ssize_t nsize, bytesize;
@@ -3257,7 +3247,7 @@
     if (byteorder == 0)
         STORECHAR(0xFEFF);
     if (size == 0)
-        goto done;
+        return v;
 
     if (byteorder == -1) {
         /* force LE */
@@ -3274,22 +3264,11 @@
         iorder[3] = 0;
     }
 
-    while (size-- > 0) {
-        Py_UCS4 ch = *s++;
-#ifndef Py_UNICODE_WIDE
-        if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
-            Py_UCS4 ch2 = *s;
-            if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
-                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
-                s++;
-                size--;
-            }
-        }
-#endif
+    while (s < send) {
+        Py_UCS4 ch;
+        ch = Py_UNICODE_NEXT(s, send);
         STORECHAR(ch);
     }
-
-  done:
     return v;
 #undef STORECHAR
 }
@@ -7654,8 +7633,8 @@
 
     e = p + PyUnicode_GET_SIZE(self);
     cased = 0;
-    for (; p < e; p++) {
-        register const Py_UNICODE ch = *p;
+    while (p < e) {
+        register const Py_UCS4 ch = Py_UNICODE_NEXT(p, e);
 
         if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
             return PyBool_FromLong(0);
@@ -7688,8 +7667,8 @@
 
     e = p + PyUnicode_GET_SIZE(self);
     cased = 0;
-    for (; p < e; p++) {
-        register const Py_UNICODE ch = *p;
+    while (p < e) {
+        register const Py_UCS4 ch = Py_UNICODE_NEXT(p, e);
 
         if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
             return PyBool_FromLong(0);
@@ -7726,8 +7705,8 @@
     e = p + PyUnicode_GET_SIZE(self);
     cased = 0;
     previous_is_cased = 0;
-    for (; p < e; p++) {
-        register const Py_UNICODE ch = *p;
+    while (p < e) {
+        register const Py_UCS4 ch = Py_UNICODE_NEXT(p, e);
 
         if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
             if (previous_is_cased)
@@ -7798,8 +7777,8 @@
         return PyBool_FromLong(0);
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISALPHA(*p))
+    while (p < e) {
+        if (!Py_UNICODE_ISALPHA(Py_UNICODE_NEXT(p, e)))
             return PyBool_FromLong(0);
     }
     return PyBool_FromLong(1);
@@ -7827,8 +7806,9 @@
         return PyBool_FromLong(0);
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISALNUM(*p))
+    while (p < e) {
+        Py_UCS4 ch = Py_UNICODE_NEXT(p, e);
+        if (!Py_UNICODE_ISALNUM(ch))
             return PyBool_FromLong(0);
     }
     return PyBool_FromLong(1);
@@ -7856,8 +7836,8 @@
         return PyBool_FromLong(0);
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISDECIMAL(*p))
+    while (p < e) {
+        if (!Py_UNICODE_ISDECIMAL(Py_UNICODE_NEXT(p, e)))
             return PyBool_FromLong(0);
     }
     return PyBool_FromLong(1);
@@ -7885,8 +7865,8 @@
         return PyBool_FromLong(0);
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISDIGIT(*p))
+    while (p < e) {
+        if (!Py_UNICODE_ISDIGIT(Py_UNICODE_NEXT(p, e)))
             return PyBool_FromLong(0);
     }
     return PyBool_FromLong(1);
@@ -7914,8 +7894,8 @@
         return PyBool_FromLong(0);
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISNUMERIC(*p))
+    while (p < e) {
+        if (!Py_UNICODE_ISNUMERIC(Py_UNICODE_NEXT(p, e)))
             return PyBool_FromLong(0);
     }
     return PyBool_FromLong(1);
@@ -7926,11 +7906,13 @@
 {
     register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
     register const Py_UNICODE *e;
+    Py_UCS4 ch;
 
     /* Special case for empty strings */
     if (PyUnicode_GET_SIZE(self) == 0)
         return 0;
 
+    e = p + PyUnicode_GET_SIZE(self);
     /* PEP 3131 says that the first character must be in
        XID_Start and subsequent characters in XID_Continue,
        and for the ASCII range, the 2.x rules apply (i.e
@@ -7939,14 +7921,14 @@
        definition of XID_Start and XID_Continue, it is sufficient
        to check just for these, except that _ must be allowed
        as starting an identifier.  */
-    if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
+    ch = Py_UNICODE_NEXT(p, e);
+    if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */)
         return 0;
-
-    e = p + PyUnicode_GET_SIZE(self);
-    for (p++; p < e; p++) {
-        if (!_PyUnicode_IsXidContinue(*p))
+    do {
+        if (!_PyUnicode_IsXidContinue(ch))
             return 0;
-    }
+        ch = Py_UNICODE_NEXT(p, e);
+    }  while (p < e);
     return 1;
 }
 
@@ -7980,8 +7962,8 @@
     }
 
     e = p + PyUnicode_GET_SIZE(self);
-    for (; p < e; p++) {
-        if (!Py_UNICODE_ISPRINTABLE(*p)) {
+    while (p < e) {
+        if (!Py_UNICODE_ISPRINTABLE(Py_UNICODE_NEXT(p, e))) {
             Py_RETURN_FALSE;
         }
     }
Index: Lib/test/test_unicode.py
===================================================================
--- Lib/test/test_unicode.py	(revision 86824)
+++ Lib/test/test_unicode.py	(working copy)
@@ -344,11 +344,17 @@
     def test_islower(self):
         string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
         self.checkequalnofix(False, '\u1FFc', 'islower')
+        nonbmp = ('\N{MATHEMATICAL BOLD SMALL A}bc'
+                  '\N{MATHEMATICAL BOLD SMALL D}ef')
+        self.checkequalnofix(True, nonbmp, 'islower')
 
     def test_isupper(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
         if not sys.platform.startswith('java'):
             self.checkequalnofix(False, '\u1FFc', 'isupper')
+        nonbmp = ('\N{MATHEMATICAL BOLD CAPITAL A}BC'
+                  '\N{MATHEMATICAL BOLD CAPITAL D}EF')
+        self.checkequalnofix(True, nonbmp, 'isupper')
 
     def test_istitle(self):
         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
@@ -364,6 +370,9 @@
     def test_isalpha(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
         self.checkequalnofix(True, '\u1FFc', 'isalpha')
+        nonbmp = ('\N{OLD ITALIC LETTER A}'
+                  '\N{MATHEMATICAL BOLD CAPITAL A}')
+        self.checkequalnofix(True, nonbmp, 'isalpha')
 
     def test_isdecimal(self):
         self.checkequalnofix(False, '', 'isdecimal')
@@ -382,6 +391,8 @@
         self.checkequalnofix(True, '\u2460', 'isdigit')
         self.checkequalnofix(False, '\xbc', 'isdigit')
         self.checkequalnofix(True, '\u0660', 'isdigit')
+        test = '\N{FULLWIDTH DIGIT ONE}23\N{FULLWIDTH DIGIT FOUR}'
+        self.checkequalnofix(True, test, 'isdigit')
 
     def test_isnumeric(self):
         self.checkequalnofix(False, '', 'isnumeric')
@@ -392,6 +403,9 @@
         self.checkequalnofix(True, '\u0660', 'isnumeric')
         self.checkequalnofix(True, '0123456789', 'isnumeric')
         self.checkequalnofix(False, '0123456789a', 'isnumeric')
+        nonbmp = ('\N{COUNTING ROD UNIT DIGIT ONE}23'
+                  '\N{COUNTING ROD UNIT DIGIT FOUR}')
+        self.checkequalnofix(True, nonbmp, 'isnumeric')
 
         self.assertRaises(TypeError, "abc".isnumeric, 42)
 
@@ -403,6 +417,9 @@
         self.assertTrue("bc".isidentifier())
         self.assertTrue("b_".isidentifier())
         self.assertTrue("µ".isidentifier())
+        nonbmp = ('\N{OLD ITALIC LETTER A}'
+                  '\N{MATHEMATICAL BOLD CAPITAL A}')
+        self.assertTrue(nonbmp.isidentifier())
 
         self.assertFalse(" ".isidentifier())
         self.assertFalse("[".isidentifier())
@@ -420,6 +437,9 @@
         self.assertFalse("\u0378".isprintable())
         # single surrogate character
         self.assertFalse("\ud800".isprintable())
+        nonbmp = ('\N{OLD ITALIC LETTER A}'
+                  '\N{MATHEMATICAL BOLD CAPITAL A}')
+        self.assertTrue(nonbmp.isprintable())
 
     def test_contains(self):
         # Testing Unicode contains method