Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(revision 77297)
+++ Objects/unicodeobject.c	(working copy)
@@ -2959,13 +2959,6 @@
     return NULL;
 }
 
-/* Return a Unicode-Escape string version of the Unicode object.
-
-   If quotes is true, the string is enclosed in u"" or u'' quotes as
-   appropriate.
-
-*/
-
 Py_LOCAL_INLINE(const Py_UNICODE *) findchar(const Py_UNICODE *s,
                                              Py_ssize_t size,
                                              Py_UNICODE ch)
@@ -2981,28 +2974,43 @@
     return NULL;
 }
 
-static
-PyObject *unicodeescape_string(const Py_UNICODE *s,
-                               Py_ssize_t size,
-                               int quotes)
+/* Return a Unicode-Escape or Raw-Unicode-Escape string version of the
+ * Unicode object.
+ *
+ * If enclose_in_quotes is true, the returned string is enclosed in
+ * quotes as appropriate (u"", u'', ur"", or ur'').  Otherwise, single
+ * quotes are always escaped.
+ *
+ * If raw is true, the returned string is suitable for use as a raw
+ * unicode literal.  Otherwise, the returned string is suitable for
+ * use as a normal unicode literal.
+ */
+Py_LOCAL(PyObject *)
+unicodeescape_string(const Py_UNICODE *s,
+                     Py_ssize_t size,
+                     int enclose_in_quotes,
+                     int raw)
 {
+
+    /* string object to return */
     PyObject *repr;
+
+    /* pointer to repr's internal buffer, will be incremented as the
+     * contents are written */
     char *p;
 
-    static const char *hexdigit = "0123456789abcdef";
-#ifdef Py_UNICODE_WIDE
-    const Py_ssize_t expandsize = 10;
-#else
-    const Py_ssize_t expandsize = 6;
-#endif
+    static const char* const hexdigit = "0123456789abcdef";
 
-    /* XXX(nnorwitz): rather than over-allocating, it would be
-       better to choose a different scheme.  Perhaps scan the
-       first N-chars of the string and allocate based on that size.
-    */
+    /* non-zero if quotes should be escaped */
+    int escape_single_quotes = 1;
+
     /* Initial allocation is based on the longest-possible unichr
        escape.
 
+       XXX(nnorwitz): rather than over-allocating, it would be
+       better to choose a different scheme.  Perhaps scan the
+       first N-chars of the string and allocate based on that size.
+
        In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
        unichr, so in this case it's the longest unichr escape. In
        narrow (UTF-16) builds this is five chars per source unichr
@@ -3013,116 +3021,151 @@
        so in the narrow (UTF-16) build case it's the longest unichr
        escape.
     */
+#ifdef Py_UNICODE_WIDE
+    static const Py_ssize_t expandsize = 10;
+#else
+    static const Py_ssize_t expandsize = 6;
+#endif
 
-    if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
+    /* raw unicode quotes add 4 characters:  ur''
+     * normal unicode quotes add 3 characters:  u''
+     */
+    const Py_ssize_t enclosingQuotesSize = enclose_in_quotes ?
+        (raw ? 4 : 3) : 0;
+
+    /* calculate the length of the new string object:  (size *
+     * expandsize) for the worst-case escaped unicode literal, plus
+     * enclosingQuotesSize
+     */
+    const Py_ssize_t maxSize = (size * expandsize) + enclosingQuotesSize;
+
+    if (maxSize > PY_SSIZE_T_MAX)
         return PyErr_NoMemory();
 
-    repr = PyString_FromStringAndSize(NULL,
-                                      2
-                                      + expandsize*size
-                                      + 1);
+    repr = PyString_FromStringAndSize(NULL, maxSize);
     if (repr == NULL)
         return NULL;
 
     p = PyString_AS_STRING(repr);
 
-    if (quotes) {
+    if (enclose_in_quotes) {
         *p++ = 'u';
-        *p++ = (findchar(s, size, '\'') &&
-                !findchar(s, size, '"')) ? '"' : '\'';
+        if (raw) *p++ = 'r';
+        if (findchar(s, size, '\'') && !findchar(s, size, '"')) {
+            *p++ = '"';
+            escape_single_quotes = 0;
+        } else {
+            *p++ = '\'';
+        }
     }
     while (size-- > 0) {
-        Py_UNICODE ch = *s++;
+        Py_UCS4 ch = *s++;
 
-        /* Escape quotes and backslashes */
-        if ((quotes &&
-             ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') {
-            *p++ = '\\';
-            *p++ = (char) ch;
-            continue;
-        }
-
-#ifdef Py_UNICODE_WIDE
-        /* Map 21-bit characters to '\U00xxxxxx' */
-        else if (ch >= 0x10000) {
-            *p++ = '\\';
-            *p++ = 'U';
-            *p++ = hexdigit[(ch >> 28) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 24) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 20) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 16) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 12) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 8) & 0x0000000F];
-            *p++ = hexdigit[(ch >> 4) & 0x0000000F];
-            *p++ = hexdigit[ch & 0x0000000F];
-            continue;
-        }
-#else
-        /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
-        else if (ch >= 0xD800 && ch < 0xDC00) {
+#ifndef Py_UNICODE_WIDE
+        /* decode UTF-16 surrogate pairs to UCS-4 */
+        if (ch >= 0xD800 && ch < 0xDC00 && size) {
             Py_UNICODE ch2;
-            Py_UCS4 ucs;
 
             ch2 = *s++;
             size--;
             if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
-                ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
-                *p++ = '\\';
-                *p++ = 'U';
-                *p++ = hexdigit[(ucs >> 28) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 24) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 20) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 16) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 12) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 8) & 0x0000000F];
-                *p++ = hexdigit[(ucs >> 4) & 0x0000000F];
-                *p++ = hexdigit[ucs & 0x0000000F];
-                continue;
+                ch = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+            } else {
+                /* back up; this is an isolated surrogate, not a
+                 * surrogate pair.  isolated surrogates are copied
+                 * as-is
+                 */
+                s--;
+                size++;
             }
-            /* Fall through: isolated surrogates are copied as-is */
-            s--;
-            size++;
         }
 #endif
 
+        /* Map 21-bit characters to '\U00xxxxxx' */
+        if (ch >= 0x10000) {
+            *p++ = '\\';
+            *p++ = 'U';
+            *p++ = hexdigit[(ch >> 28) & 0xf];
+            *p++ = hexdigit[(ch >> 24) & 0xf];
+            *p++ = hexdigit[(ch >> 20) & 0xf];
+            *p++ = hexdigit[(ch >> 16) & 0xf];
+            *p++ = hexdigit[(ch >> 12) & 0xf];
+            *p++ = hexdigit[(ch >> 8) & 0xf];
+            *p++ = hexdigit[(ch >> 4) & 0xf];
+            *p++ = hexdigit[ch & 0xf];
+            continue;
+        }
+
         /* Map 16-bit characters to '\uxxxx' */
         if (ch >= 256) {
             *p++ = '\\';
             *p++ = 'u';
-            *p++ = hexdigit[(ch >> 12) & 0x000F];
-            *p++ = hexdigit[(ch >> 8) & 0x000F];
-            *p++ = hexdigit[(ch >> 4) & 0x000F];
-            *p++ = hexdigit[ch & 0x000F];
+            *p++ = hexdigit[(ch >> 12) & 0xf];
+            *p++ = hexdigit[(ch >> 8) & 0xf];
+            *p++ = hexdigit[(ch >> 4) & 0xf];
+            *p++ = hexdigit[ch & 0xf];
+            continue;
         }
 
-        /* Map special whitespace to '\t', \n', '\r' */
-        else if (ch == '\t') {
-            *p++ = '\\';
-            *p++ = 't';
-        }
-        else if (ch == '\n') {
-            *p++ = '\\';
-            *p++ = 'n';
-        }
-        else if (ch == '\r') {
-            *p++ = '\\';
-            *p++ = 'r';
-        }
+        if (raw) {
 
-        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch >= 0x7F) {
-            *p++ = '\\';
-            *p++ = 'x';
-            *p++ = hexdigit[(ch >> 4) & 0x000F];
-            *p++ = hexdigit[ch & 0x000F];
+            /* escape quotes and backslashes.  unicode escape
+             * sequences are used because just adding a backslash
+             * changes the value of the raw unicode literal (the
+             * backslash cancels the special behavior of the next
+             * character, but the backslash itself is not removed).
+             */
+            if ((escape_single_quotes && ch == '\'') || ch == '\\') {
+                *p++ = '\\';
+                *p++ = 'u';
+                *p++ = hexdigit[(ch >> 12) & 0xf];
+                *p++ = hexdigit[(ch >> 8) & 0xf];
+                *p++ = hexdigit[(ch >> 4) & 0xf];
+                *p++ = hexdigit[ch & 0xf];
+                continue;
+            }
+
+        } else {
+
+            /* Escape quotes and backslashes */
+            if ((escape_single_quotes && ch == '\'') || ch == '\\') {
+                *p++ = '\\';
+                *p++ = (char) ch;
+                continue;
+            }
+
+            /* Map special whitespace to '\t', \n', '\r' */
+            if (ch == '\t') {
+                *p++ = '\\';
+                *p++ = 't';
+                continue;
+            }
+            if (ch == '\n') {
+                *p++ = '\\';
+                *p++ = 'n';
+                continue;
+            }
+            if (ch == '\r') {
+                *p++ = '\\';
+                *p++ = 'r';
+                continue;
+            }
+
+            /* Map non-printable US ASCII to '\xhh' */
+            if (ch < ' ' || ch >= 0x7F) {
+                *p++ = '\\';
+                *p++ = 'x';
+                *p++ = hexdigit[(ch >> 4) & 0xf];
+                *p++ = hexdigit[ch & 0xf];
+                continue;
+            }
         }
 
         /* Copy everything else as-is */
-        else
-            *p++ = (char) ch;
+        *p++ = (char) ch;
     }
-    if (quotes)
-        *p++ = PyString_AS_STRING(repr)[1];
+    if (enclose_in_quotes)
+        *p++ = PyString_AS_STRING(repr)[raw ? 2 : 1];
 
     *p = '\0';
     _PyString_Resize(&repr, p - PyString_AS_STRING(repr));
@@ -3132,7 +3175,7 @@
 PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
                                         Py_ssize_t size)
 {
-    return unicodeescape_string(s, size, 0);
+    return unicodeescape_string(s, size, 0, 0);
 }
 
 PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
@@ -3267,87 +3310,7 @@
 PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
                                            Py_ssize_t size)
 {
-    PyObject *repr;
-    char *p;
-    char *q;
-
-    static const char *hexdigit = "0123456789abcdef";
-#ifdef Py_UNICODE_WIDE
-    const Py_ssize_t expandsize = 10;
-#else
-    const Py_ssize_t expandsize = 6;
-#endif
-
-    if (size > PY_SSIZE_T_MAX / expandsize)
-        return PyErr_NoMemory();
-
-    repr = PyString_FromStringAndSize(NULL, expandsize * size);
-    if (repr == NULL)
-        return NULL;
-    if (size == 0)
-        return repr;
-
-    p = q = PyString_AS_STRING(repr);
-    while (size-- > 0) {
-        Py_UNICODE ch = *s++;
-#ifdef Py_UNICODE_WIDE
-        /* Map 32-bit characters to '\Uxxxxxxxx' */
-        if (ch >= 0x10000) {
-            *p++ = '\\';
-            *p++ = 'U';
-            *p++ = hexdigit[(ch >> 28) & 0xf];
-            *p++ = hexdigit[(ch >> 24) & 0xf];
-            *p++ = hexdigit[(ch >> 20) & 0xf];
-            *p++ = hexdigit[(ch >> 16) & 0xf];
-            *p++ = hexdigit[(ch >> 12) & 0xf];
-            *p++ = hexdigit[(ch >> 8) & 0xf];
-            *p++ = hexdigit[(ch >> 4) & 0xf];
-            *p++ = hexdigit[ch & 15];
-        }
-        else
-#else
-            /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
-            if (ch >= 0xD800 && ch < 0xDC00) {
-                Py_UNICODE ch2;
-                Py_UCS4 ucs;
-
-                ch2 = *s++;
-                size--;
-                if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
-                    ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
-                    *p++ = '\\';
-                    *p++ = 'U';
-                    *p++ = hexdigit[(ucs >> 28) & 0xf];
-                    *p++ = hexdigit[(ucs >> 24) & 0xf];
-                    *p++ = hexdigit[(ucs >> 20) & 0xf];
-                    *p++ = hexdigit[(ucs >> 16) & 0xf];
-                    *p++ = hexdigit[(ucs >> 12) & 0xf];
-                    *p++ = hexdigit[(ucs >> 8) & 0xf];
-                    *p++ = hexdigit[(ucs >> 4) & 0xf];
-                    *p++ = hexdigit[ucs & 0xf];
-                    continue;
-                }
-                /* Fall through: isolated surrogates are copied as-is */
-                s--;
-                size++;
-            }
-#endif
-        /* Map 16-bit characters to '\uxxxx' */
-        if (ch >= 256) {
-            *p++ = '\\';
-            *p++ = 'u';
-            *p++ = hexdigit[(ch >> 12) & 0xf];
-            *p++ = hexdigit[(ch >> 8) & 0xf];
-            *p++ = hexdigit[(ch >> 4) & 0xf];
-            *p++ = hexdigit[ch & 15];
-        }
-        /* Copy everything else as-is */
-        else
-            *p++ = (char) ch;
-    }
-    *p = '\0';
-    _PyString_Resize(&repr, p - q);
-    return repr;
+    return unicodeescape_string(s, size, 0, 1);
 }
 
 PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
@@ -7490,7 +7453,7 @@
 {
     return unicodeescape_string(PyUnicode_AS_UNICODE(unicode),
                                 PyUnicode_GET_SIZE(unicode),
-                                1);
+                                1, 0);
 }
 
 PyDoc_STRVAR(rfind__doc__,