--- Tools/unicode/makeunicodedata.py.old	2007-06-10 00:22:08.000000000 +0300
+++ Tools/unicode/makeunicodedata.py	2007-06-10 00:55:41.000000000 +0300
@@ -34,6 +34,7 @@
 UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
+DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
 
 old_versions = ["3.2.0"]
 
@@ -65,7 +66,8 @@
     version = ""
     unicode = UnicodeData(UNICODE_DATA % version,
                           COMPOSITION_EXCLUSIONS % version,
-                          EASTASIAN_WIDTH % version)
+                          EASTASIAN_WIDTH % version,
+                          DERIVEDNORMALIZATION_PROPS % version)
 
     print len(filter(None, unicode.table)), "characters"
 
@@ -86,7 +88,7 @@
 
 def makeunicodedata(unicode, trace):
 
-    dummy = (0, 0, 0, 0, 0)
+    dummy = (0, 0, 0, 0, 0, 0)
     table = [dummy]
     cache = {0: dummy}
     index = [0] * len(unicode.chars)
@@ -106,8 +108,10 @@
             bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
             mirrored = record[9] == "Y"
             eastasianwidth = EASTASIANWIDTH_NAMES.index(record[15])
+            normalizationquickcheck = record[16]
             item = (
-                category, combining, bidirectional, mirrored, eastasianwidth
+                category, combining, bidirectional, mirrored, eastasianwidth,
+                normalizationquickcheck
                 )
             # add entry to index and item tables
             i = cache.get(item)
@@ -221,7 +225,7 @@
     print >>fp, \
           "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
     for item in table:
-        print >>fp, "    {%d, %d, %d, %d, %d}," % item
+        print >>fp, "    {%d, %d, %d, %d, %d, %d}," % item
     print >>fp, "};"
     print >>fp
 
@@ -679,7 +683,8 @@
 
 class UnicodeData:
 
-    def __init__(self, filename, exclusions, eastasianwidth, expand=1):
+    def __init__(self, filename, exclusions, eastasianwidth,
+                 derivednormalizationprops=None, expand=1):
         self.changed = []
         file = open(filename)
         table = [None] * 0x110000
@@ -742,6 +747,26 @@
         for i in range(0, 0x110000):
             if table[i] is not None:
                 table[i].append(widths[i])
+        if derivednormalizationprops:
+            quickchecks = [0] * 0x110000 # default is Yes
+            qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
+            for s in open(derivednormalizationprops):
+                if '#' in s: s = s[:s.index('#')]
+                s = [i.strip() for i in s.split(';')]
+                if len(s) < 2 or s[1] not in qc_order:
+                    continue
+                quickcheck = 'MN'.index(s[2])+1 # Maybe or No
+                quickcheck_shift = qc_order.index(s[1])*2
+                quickcheck <<= quickcheck_shift
+                if '..' not in s[0]:
+                    s[0] = '%s..%s'%(s[0], s[0])
+                first, last = [int(c, 16) for c in s[0].split('..')]
+                for char in range(first, last+1):
+                    assert not (quickchecks[char]>>quickcheck_shift)&3
+                    quickchecks[char] |= quickcheck
+            for i in range(0, 0x110000):
+                if table[i] is not None:
+                    table[i].append(quickchecks[i])
 
     def uselatin1(self):
         # restrict character range to ISO Latin 1
--- Modules/unicodedata.c.old	2007-06-10 00:28:27.000000000 +0300
+++ Modules/unicodedata.c	2007-06-10 01:37:38.000000000 +0300
@@ -27,6 +27,7 @@
     const unsigned char mirrored;	/* true if mirrored in bidir mode */
     const unsigned char east_asian_width;	/* index into
 						   _PyUnicode_EastAsianWidth */
+    const unsigned char normalization_quick_check; /* see is_normalized() */
 } _PyUnicode_DatabaseRecord;
 
 typedef struct change_record {
@@ -714,6 +715,33 @@
         PyUnicode_Resize(&result, o - PyUnicode_AS_UNICODE(result));
     return result;
 }
+
+/* Return 1 if the input is certainly normalized, 0 if it might not be. */
+static int
+is_normalized(PyObject *self, PyObject *input, int nfc, int k)
+{
+    Py_UNICODE *i, *end;
+    unsigned char prev_combining = 0;
+
+    /* The two quickcheck bits at this shift mean 0=Yes, 1=Maybe, 2=No,
+       as described in http://unicode.org/reports/tr15/#Annex8. */
+    unsigned char quickcheck_shift = ((nfc?2:0)+(k?1:0))*2;
+
+    i = PyUnicode_AS_UNICODE(input);
+    end = i+PyUnicode_GET_SIZE(input);
+    while (i < end) {
+        const _PyUnicode_DatabaseRecord *record = _getrecord_ex(*i++);
+        unsigned char combining = record->combining;
+        unsigned char quickcheck = record->normalization_quick_check;
+
+        if ((quickcheck>>quickcheck_shift) & 3)
+            return 0; /* this string might need normalization */
+        if (combining && prev_combining > combining)
+            return 0; /* non-canonical sort order, not normalized */
+        prev_combining = combining;
+    }
+    return 1; // certainly normalized
+}
 		
 PyDoc_STRVAR(unicodedata_normalize__doc__,
 "normalize(form, unistr)\n\
@@ -738,14 +766,34 @@
         return input;
     }
 
-    if (strcmp(form, "NFC") == 0)
+    if (strcmp(form, "NFC") == 0) {
+        if (is_normalized(self, input, 1, 0)) {
+            Py_INCREF(input);
+            return input;
+        }
         return nfc_nfkc(self, input, 0);
-    if (strcmp(form, "NFKC") == 0)
+    }
+    if (strcmp(form, "NFKC") == 0) {
+        if (is_normalized(self, input, 1, 1)) {
+            Py_INCREF(input);
+            return input;
+        }
         return nfc_nfkc(self, input, 1);
-    if (strcmp(form, "NFD") == 0)
+    }
+    if (strcmp(form, "NFD") == 0) {
+        if (is_normalized(self, input, 0, 0)) {
+            Py_INCREF(input);
+            return input;
+        }
         return nfd_nfkd(self, input, 0);
-    if (strcmp(form, "NFKD") == 0)
+    }
+    if (strcmp(form, "NFKD") == 0) {
+        if (is_normalized(self, input, 0, 1)) {
+            Py_INCREF(input);
+            return input;
+        }
         return nfd_nfkd(self, input, 1);
+    }
     PyErr_SetString(PyExc_ValueError, "invalid normalization form");
     return NULL;
 }