diff -r 068365acbe73 -r 3e25ce2f60d9 Doc/library/unicodedata.rst
--- a/Doc/library/unicodedata.rst	Wed Mar 25 21:03:47 2015 +0200
+++ b/Doc/library/unicodedata.rst	Sat Mar 28 19:07:16 2015 +0000
@@ -131,6 +131,42 @@
    a human reader, if one has combining characters and the other
    doesn't, they may not compare equal.
 
+   
+.. function:: quick_check(form, chr)
+
+   Return the 'Quick_Check' property value for the normal form *form* for the
+   Unicode character *chr*. Valid values for form are 'NFC', 'NFKC', 'NFD', and
+   'NFKD'. The return value is a string: 'Yes', 'No', or 'Maybe'.  Return 'Yes' if
+   the 'Quick_Check' property is not defined for the character.
+
+   The 'Quick_Check' property is useful when joining (or appending to) strings
+   that have already been normalized to a particular normalization form.  If *s1*
+   and *s2* are two strings that are both normalized to a given normalization form
+   *form*, then the concatenation *s1* + *s2* is **not** guaranteed to be
+   normalized to that normalization form; there might be a non-normalized
+   subsequence of characters straddling the point where the strings were joined.
+   We might pass the concatenation *s1* + *s2* to ``normalize()``, but if *s1* or
+   *s2* is long then this implies some wasted effort since a large amount of text
+   will be processed a second time for no gain (having already been processed when
+   *s1* or *s2* was normalized).
+
+   However, it is possible to avoid most of this additional work by re-processing
+   only a small part of each string: characters at the end of *s1* and characters
+   at the start of *s2*.  Specifically, let *x* be the index of the last character
+   in *s1* that satisfies both ``quick_check(form, chr) == 'Yes'`` and
+   ``combining(chr) == 0`` (or zero if no character in *s1* satisfies these
+   conditions), and let *y* be the index of the first character in *s2* that
+   satisfies the same conditions (or the length of *s2* if no character in *s2*
+   satisfies the conditions).  In this case, the string
+   ``s1[:x] + normalize(form, s1[x:] + s2[:y]) + s2[y:]`` is guaranteed to be
+   normalized to *form*.
+
+   For more information, please refer to `section 9 of Unicode Standard Annex #15
+   ("Unicode Normalization Forms")
+   <http://unicode.org/reports/tr15/#Detecting_Normalization_Forms>`_.
+
+   .. versionadded:: 3.5
+
 
 In addition, the module exposes the following constant:
 
diff -r 068365acbe73 -r 3e25ce2f60d9 Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst	Wed Mar 25 21:03:47 2015 +0200
+++ b/Doc/whatsnew/3.5.rst	Sat Mar 28 19:07:16 2015 +0000
@@ -419,6 +419,12 @@
 * The :func:`time.monotonic` function is now always available.  (Contributed by
   Victor Stinner in :issue:`22043`.)
 
+unicodedata
+-----------
+
+* The :func:`quick_check` function was added.  (Contributed by Philip Eve in
+  :issue:`23550`.)
+
 urllib
 ------
 
diff -r 068365acbe73 -r 3e25ce2f60d9 Lib/test/test_unicodedata.py
--- a/Lib/test/test_unicodedata.py	Wed Mar 25 21:03:47 2015 +0200
+++ b/Lib/test/test_unicodedata.py	Sat Mar 28 19:07:16 2015 +0000
@@ -223,6 +223,33 @@
         self.assertEqual(eaw('\u2010'), 'A')
         self.assertEqual(eaw('\U00020000'), 'W')
 
+    def test_quick_check(self):
+        qc = self.db.quick_check
+        def qc_all(char):
+            return (qc('NFC', char), qc('NFKC', char),
+                    qc('NFD', char), qc('NFKD', char))
+        self.assertEqual(qc_all('s'), ('Yes', 'Yes', 'Yes', 'Yes'))
+        self.assertEqual(qc_all('7'), ('Yes', 'Yes', 'Yes', 'Yes'))
+        self.assertEqual(qc_all(' '), ('Yes', 'Yes', 'Yes', 'Yes'))
+        self.assertEqual(qc_all('\uFFFE'), ('Yes', 'Yes', 'Yes', 'Yes'))
+        self.assertEqual(qc_all('\u01C7'), ('Yes', 'No', 'Yes', 'No'))
+        self.assertEqual(qc_all('\u01F5'), ('Yes', 'Yes', 'No', 'No'))
+        self.assertEqual(qc_all('\u0308'), ('Maybe', 'Maybe', 'Yes', 'Yes'))
+        self.assertEqual(qc_all('\u0387'), ('No', 'No', 'No', 'No'))
+        self.assertEqual(qc_all('\u1FDD'), ('Yes', 'No', 'No', 'No'))
+        self.assertEqual(qc_all('\U0001133E'),
+                         ('Maybe', 'Maybe', 'Yes', 'Yes'))
+        with self.assertRaises(TypeError):
+            qc()
+        with self.assertRaises(TypeError):
+            qc('NFD')
+        with self.assertRaises(TypeError):
+            qc('NFD', '')
+        with self.assertRaises(TypeError):
+            qc('NFKC', 'xx')
+        with self.assertRaises(ValueError):
+            qc('unknown', 's')
+
 class UnicodeMiscTest(UnicodeDatabaseTest):
 
     def test_failed_import_during_compiling(self):
diff -r 068365acbe73 -r 3e25ce2f60d9 Modules/unicodedata.c
--- a/Modules/unicodedata.c	Wed Mar 25 21:03:47 2015 +0200
+++ b/Modules/unicodedata.c	Sat Mar 28 19:07:16 2015 +0000
@@ -36,7 +36,8 @@
     const unsigned char mirrored;       /* true if mirrored in bidir mode */
     const unsigned char east_asian_width;       /* index into
                                                    _PyUnicode_EastAsianWidth */
-    const unsigned char normalization_quick_check; /* see is_normalized() */
+    const unsigned char normalization_quick_check; /* used for quick_check()
+                                                      and normalize() */
 } _PyUnicode_DatabaseRecord;
 
 typedef struct change_record {
@@ -48,6 +49,8 @@
     const double numeric_changed;
 } change_record;
 
+typedef enum {NFC, NFKC, NFD, NFKD, Invalid_NF} normalization_form;
+
 /* data file generated by Tools/unicode/makeunicodedata.py */
 #include "unicodedata_db.h"
 
@@ -65,6 +68,20 @@
     return &_PyUnicode_Database_Records[index];
 }
 
+static normalization_form str_to_nf(const char *form)
+{
+    if (strcmp(form, "NFC") == 0)
+        return NFC;
+    else if (strcmp(form, "NFKC") == 0)
+        return NFKC;
+    else if (strcmp(form, "NFD") == 0)
+        return NFD;
+    else if (strcmp(form, "NFKD") == 0)
+        return NFKD;
+    else
+        return Invalid_NF;
+}
+
 /* ------------- Previous-version API ------------------------------------- */
 typedef struct previous_version {
     PyObject_HEAD
@@ -205,11 +222,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_digit__doc__,
-"digit(unichr[, default])\n\
-\n\
-Returns the digit value assigned to the Unicode character unichr as\n\
-integer. If no such value is defined, default is returned, or, if\n\
-not given, ValueError is raised.");
+"digit(unichr[, default])\n"
+"\n"
+"Returns the digit value assigned to the Unicode character unichr as\n"
+"integer. If no such value is defined, default is returned, or, if\n"
+"not given, ValueError is raised.");
 
 static PyObject *
 unicodedata_digit(PyObject *self, PyObject *args)
@@ -239,11 +256,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_numeric__doc__,
-"numeric(unichr[, default])\n\
-\n\
-Returns the numeric value assigned to the Unicode character unichr\n\
-as float. If no such value is defined, default is returned, or, if\n\
-not given, ValueError is raised.");
+"numeric(unichr[, default])\n"
+"\n"
+"Returns the numeric value assigned to the Unicode character unichr\n"
+"as float. If no such value is defined, default is returned, or, if\n"
+"not given, ValueError is raised.");
 
 static PyObject *
 unicodedata_numeric(PyObject *self, PyObject *args)
@@ -289,10 +306,10 @@
 }
 
 PyDoc_STRVAR(unicodedata_category__doc__,
-"category(unichr)\n\
-\n\
-Returns the general category assigned to the Unicode character\n\
-unichr as string.");
+"category(unichr)\n"
+"\n"
+"Returns the general category assigned to the Unicode character\n"
+"unichr as string.");
 
 static PyObject *
 unicodedata_category(PyObject *self, PyObject *args)
@@ -317,11 +334,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_bidirectional__doc__,
-"bidirectional(unichr)\n\
-\n\
-Returns the bidirectional class assigned to the Unicode character\n\
-unichr as string. If no such value is defined, an empty string is\n\
-returned.");
+"bidirectional(unichr)\n"
+"\n"
+"Returns the bidirectional class assigned to the Unicode character\n"
+"unichr as string. If no such value is defined, an empty string is\n"
+"returned.");
 
 static PyObject *
 unicodedata_bidirectional(PyObject *self, PyObject *args)
@@ -348,11 +365,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_combining__doc__,
-"combining(unichr)\n\
-\n\
-Returns the canonical combining class assigned to the Unicode\n\
-character unichr as integer. Returns 0 if no combining class is\n\
-defined.");
+"combining(unichr)\n"
+"\n"
+"Returns the canonical combining class assigned to the Unicode\n"
+"character unichr as integer. Returns 0 if no combining class is\n"
+"defined.");
 
 static PyObject *
 unicodedata_combining(PyObject *self, PyObject *args)
@@ -377,11 +394,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_mirrored__doc__,
-"mirrored(unichr)\n\
-\n\
-Returns the mirrored property assigned to the Unicode character\n\
-unichr as integer. Returns 1 if the character has been identified as\n\
-a \"mirrored\" character in bidirectional text, 0 otherwise.");
+"mirrored(unichr)\n"
+"\n"
+"Returns the mirrored property assigned to the Unicode character\n"
+"unichr as integer. Returns 1 if the character has been identified as\n"
+"a \"mirrored\" character in bidirectional text, 0 otherwise.");
 
 static PyObject *
 unicodedata_mirrored(PyObject *self, PyObject *args)
@@ -408,10 +425,10 @@
 }
 
 PyDoc_STRVAR(unicodedata_east_asian_width__doc__,
-"east_asian_width(unichr)\n\
-\n\
-Returns the east asian width assigned to the Unicode character\n\
-unichr as string.");
+"east_asian_width(unichr)\n"
+"\n"
+"Returns the east asian width assigned to the Unicode character\n"
+"unichr as string.");
 
 static PyObject *
 unicodedata_east_asian_width(PyObject *self, PyObject *args)
@@ -436,11 +453,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_decomposition__doc__,
-"decomposition(unichr)\n\
-\n\
-Returns the character decomposition mapping assigned to the Unicode\n\
-character unichr as string. An empty string is returned in case no\n\
-such mapping is defined.");
+"decomposition(unichr)\n"
+"\n"
+"Returns the character decomposition mapping assigned to the Unicode\n"
+"character unichr as string. An empty string is returned in case no\n"
+"such mapping is defined.");
 
 static PyObject *
 unicodedata_decomposition(PyObject *self, PyObject *args)
@@ -859,10 +876,10 @@
 }
 
 PyDoc_STRVAR(unicodedata_normalize__doc__,
-"normalize(form, unistr)\n\
-\n\
-Return the normal form 'form' for the Unicode string unistr.  Valid\n\
-values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.");
+"normalize(form, unistr)\n"
+"\n"
+"Return the normal form 'form' for the Unicode string unistr.  Valid\n"
+"values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.");
 
 static PyObject*
 unicodedata_normalize(PyObject *self, PyObject *args)
@@ -884,36 +901,122 @@
         return input;
     }
 
-    if (strcmp(form, "NFC") == 0) {
-        if (is_normalized(self, input, 1, 0)) {
-            Py_INCREF(input);
-            return input;
-        }
-        return nfc_nfkc(self, input, 0);
+    switch (str_to_nf(form)) {
+        case NFC:
+            if (is_normalized(self, input, 1, 0)) {
+                Py_INCREF(input);
+                return input;
+            }
+            return nfc_nfkc(self, input, 0);
+        case NFKC:
+            if (is_normalized(self, input, 1, 1)) {
+                Py_INCREF(input);
+                return input;
+            }
+            return nfc_nfkc(self, input, 1);
+        case NFD:
+            if (is_normalized(self, input, 0, 0)) {
+                Py_INCREF(input);
+                return input;
+            }
+            return nfd_nfkd(self, input, 0);
+        case NFKD:
+            if (is_normalized(self, input, 0, 1)) {
+                Py_INCREF(input);
+                return input;
+            }
+            return nfd_nfkd(self, input, 1);
+        default:
+            PyErr_SetString(
+                PyExc_ValueError,
+                "invalid normalization form - valid normalization forms are "
+                    "\"NFC\", \"NFKC\", \"NFD\" and \"NFKD\""
+            );
+            return NULL;
     }
-    if (strcmp(form, "NFKC") == 0) {
-        if (is_normalized(self, input, 1, 1)) {
-            Py_INCREF(input);
-            return input;
-        }
-        return nfc_nfkc(self, input, 1);
+}
+
+PyDoc_STRVAR(unicodedata_quick_check__doc__,
+"quick_check(form, unichr)\n"
+"\n"
+"Return the 'Quick_Check' property value for the normal form 'form'\n"
+"for the Unicode character unichr.  Valid values for form are 'NFC',\n"
+"'NFKC', 'NFD', and 'NFKD'.  The return value is a string: 'Yes',\n"
+"'No', or 'Maybe'.  Return 'Yes' if the 'Quick_Check' property is not\n"
+"defined for the character.\n"
+"\n"
+"The 'Quick_Check' property is useful when joining (or appending to)\n"
+"strings that have already been normalized to a particular\n"
+"normalization form.  If s1 and s2 are two strings that are both\n"
+"normalized to a given normalization form 'form', then the\n"
+"concatenation s1 + s2 is *not* guaranteed to be normalized to that\n"
+"normalization form; there might be a non-normalized subsequence of\n"
+"characters straddling the point where the strings were joined.  We\n"
+"might pass the concatenation s1 + s2 to normalize(), but if s1 or s2\n"
+"is long then this implies some wasted effort since a large amount of\n"
+"text will be processed a second time for no gain (having already\n"
+"been processed when s1 or s2 was normalized).\n"
+"\n"
+"However, it is possible to avoid most of this additional work by\n"
+"re-processing only a small part of each string: characters at the\n"
+"end of s1 and characters at the start of s2.  Specifically, let x be\n"
+"the index of the last character in s1 that satisfies\n"
+"\n"
+"    quick_check(form, chr) == 'Yes' and combining(chr) == 0\n"
+"\n"
+"(or zero if no character in s1 satisfies this condition), and let y\n"
+"be the index of the first character in s2 that satisfies the same\n"
+"condition (or the length of s2 if no character in s2 satisfies the\n"
+"condition).  In this case, the string\n"
+"\n"
+"    s1[:x] + normalize(form, s1[x:] + s2[:y]) + s2[y:]\n"
+"\n"
+"is guaranteed to be normalized to 'form'.\n"
+"\n"
+"For more information, please refer to section 9 of Unicode Standard\n"
+"Annex #15 (\"Unicode Normalization Forms\"):\n"
+"\n"
+"    http://unicode.org/reports/tr15/#Detecting_Normalization_Forms");
+
+static PyObject *
+unicodedata_quick_check(PyObject *self, PyObject *args)
+{
+    char *form;
+    PyUnicodeObject *v;
+    unsigned char quickcheck_value;
+    Py_UCS4 c;
+
+    if(!PyArg_ParseTuple(args, "sO!:quick_check",
+                         &form, &PyUnicode_Type, &v))
+        return NULL;
+
+    c = getuchar(v);
+    if (c == (Py_UCS4)-1)
+        return NULL;
+
+    quickcheck_value = _getrecord_ex(c)->normalization_quick_check;
+
+    switch (str_to_nf(form)) {
+        case NFC:  quickcheck_value >>= 4;    break;
+        case NFKC: quickcheck_value >>= 6;    break;
+        case NFD:  /*No need to do anything*/ break;
+        case NFKD: quickcheck_value >>= 2;    break;
+        default:
+            PyErr_SetString(
+                PyExc_ValueError,
+                "invalid normalization form - valid normalization forms are "
+                    "\"NFC\", \"NFKC\", \"NFD\" and \"NFKD\""
+            );
+            return NULL;
     }
-    if (strcmp(form, "NFD") == 0) {
-        if (is_normalized(self, input, 0, 0)) {
-            Py_INCREF(input);
-            return input;
-        }
-        return nfd_nfkd(self, input, 0);
-    }
-    if (strcmp(form, "NFKD") == 0) {
-        if (is_normalized(self, input, 0, 1)) {
-            Py_INCREF(input);
-            return input;
-        }
-        return nfd_nfkd(self, input, 1);
-    }
-    PyErr_SetString(PyExc_ValueError, "invalid normalization form");
-    return NULL;
+
+    /* The two quickcheck bits shifted all the way to the right
+       mean 0=Yes, 1=Maybe, 2=No, as described in
+       http://unicode.org/reports/tr15/#Annex8.  3=Unused is assigned
+       in_PyUnicode_QuickCheckNames as a precaution. */
+    return PyUnicode_FromString(
+        _PyUnicode_QuickCheckNames[quickcheck_value & 3]
+    );
 }
 
 /* -------------------------------------------------------------------- */
@@ -1233,10 +1336,10 @@
 /* Python bindings */
 
 PyDoc_STRVAR(unicodedata_name__doc__,
-"name(unichr[, default])\n\
-Returns the name assigned to the Unicode character unichr as a\n\
-string. If no name is defined, default is returned, or, if not\n\
-given, ValueError is raised.");
+"name(unichr[, default])\n"
+"Returns the name assigned to the Unicode character unichr as a\n"
+"string. If no name is defined, default is returned, or, if not\n"
+"given, ValueError is raised.");
 
 static PyObject *
 unicodedata_name(PyObject* self, PyObject* args)
@@ -1268,11 +1371,11 @@
 }
 
 PyDoc_STRVAR(unicodedata_lookup__doc__,
-"lookup(name)\n\
-\n\
-Look up character by name.  If a character with the\n\
-given name is found, return the corresponding Unicode\n\
-character.  If not found, KeyError is raised.");
+"lookup(name)\n"
+"\n"
+"Look up character by name.  If a character with the\n"
+"given name is found, return the corresponding Unicode\n"
+"character.  If not found, KeyError is raised.");
 
 static PyObject *
 unicodedata_lookup(PyObject* self, PyObject* args)
@@ -1326,6 +1429,8 @@
     {"lookup", unicodedata_lookup, METH_VARARGS, unicodedata_lookup__doc__},
     {"normalize", unicodedata_normalize, METH_VARARGS,
                   unicodedata_normalize__doc__},
+    {"quick_check", unicodedata_quick_check, METH_VARARGS,
+                    unicodedata_quick_check__doc__},
     {NULL, NULL}                /* sentinel */
 };
 
@@ -1376,13 +1481,13 @@
 };
 
 PyDoc_STRVAR(unicodedata_docstring,
-"This module provides access to the Unicode Character Database which\n\
-defines character properties for all Unicode characters. The data in\n\
-this database is based on the UnicodeData.txt file version\n\
-" UNIDATA_VERSION " which is publically available from ftp://ftp.unicode.org/.\n\
-\n\
-The module uses the same names and symbols as defined by the\n\
-UnicodeData File Format " UNIDATA_VERSION ".");
+"This module provides access to the Unicode Character Database which\n"
+"defines character properties for all Unicode characters. The data in\n"
+"this database is based on the UnicodeData.txt file version\n"
+UNIDATA_VERSION " which is publically available from ftp://ftp.unicode.org/.\n"
+"\n"
+"The module uses the same names and symbols as defined by the\n"
+"UnicodeData File Format " UNIDATA_VERSION ".");
 
 static struct PyModuleDef unicodedatamodule = {
         PyModuleDef_HEAD_INIT,
diff -r 068365acbe73 -r 3e25ce2f60d9 Modules/unicodedata_db.h
--- a/Modules/unicodedata_db.h	Wed Mar 25 21:03:47 2015 +0200
+++ b/Modules/unicodedata_db.h	Sat Mar 28 19:07:16 2015 +0000
@@ -598,7 +598,7 @@
 };
 
 /* string literals */
-const char *_PyUnicode_CategoryNames[] = {
+static const char *_PyUnicode_CategoryNames[] = {
     "Cn",
     "Lu",
     "Ll",
@@ -632,7 +632,7 @@
     "So",
     NULL
 };
-const char *_PyUnicode_BidirectionalNames[] = {
+static const char *_PyUnicode_BidirectionalNames[] = {
     "",
     "L",
     "LRE",
@@ -659,7 +659,7 @@
     "PDI",
     NULL
 };
-const char *_PyUnicode_EastAsianWidthNames[] = {
+static const char *_PyUnicode_EastAsianWidthNames[] = {
     "F",
     "H",
     "W",
@@ -668,6 +668,13 @@
     "N",
     NULL
 };
+static const char *_PyUnicode_QuickCheckNames[] = {
+    "Yes",
+    "Maybe",
+    "No",
+    "Unused",
+    NULL
+};
 static const char *decomp_prefix[] = {
     "",
     "<noBreak>",
diff -r 068365acbe73 -r 3e25ce2f60d9 Tools/unicode/makeunicodedata.py
--- a/Tools/unicode/makeunicodedata.py	Wed Mar 25 21:03:47 2015 +0200
+++ b/Tools/unicode/makeunicodedata.py	Sat Mar 28 19:07:16 2015 +0000
@@ -77,6 +77,10 @@
 
 EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
 
+# "Unused" is here as a failsafe, related to the implementation of
+# unicodedata.quick_check().
+QUICKCHECK_NAMES = [ "Yes", "Maybe", "No", "Unused" ]
+
 MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
 
 # note: should match definitions in Objects/unicodectype.c
@@ -287,24 +291,30 @@
     # the support code moved into unicodedatabase.c
 
     print("/* string literals */", file=fp)
-    print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
+    print("static const char *_PyUnicode_CategoryNames[] = {", file=fp)
     for name in CATEGORY_NAMES:
         print("    \"%s\"," % name, file=fp)
     print("    NULL", file=fp)
     print("};", file=fp)
 
-    print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
+    print("static const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
     for name in BIDIRECTIONAL_NAMES:
         print("    \"%s\"," % name, file=fp)
     print("    NULL", file=fp)
     print("};", file=fp)
 
-    print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
+    print("static const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
     for name in EASTASIANWIDTH_NAMES:
         print("    \"%s\"," % name, file=fp)
     print("    NULL", file=fp)
     print("};", file=fp)
 
+    print("static const char *_PyUnicode_QuickCheckNames[] = {", file=fp)
+    for name in QUICKCHECK_NAMES:
+        print("    \"%s\"," % name, file=fp)
+    print("    NULL", file=fp)
+    print("};", file=fp)
+
     print("static const char *decomp_prefix[] = {", file=fp)
     for name in decomp_prefix:
         print("    \"%s\"," % name, file=fp)