diff -r 694110bc91d8 Doc/library/stdtypes.rst
--- a/Doc/library/stdtypes.rst	Sat Jan 07 18:34:24 2012 +0100
+++ b/Doc/library/stdtypes.rst	Sat Jan 07 22:54:05 2012 -0500
@@ -1355,17 +1355,18 @@ functions based on regular expressions.
       'spacious'
       >>> 'www.example.com'.strip('cmowz.')
       'example'
 
 
 .. method:: str.swapcase()
 
    Return a copy of the string with uppercase characters converted to lowercase and
-   vice versa.
+   vice versa. Note that it is not necessarily true that
+   ``s.swapcase().swapcase() == s``.
 
 
 .. method:: str.title()
 
    Return a titlecased version of the string where words start with an uppercase
    character and the remaining characters are lowercase.
 
    The algorithm uses a simple language-independent definition of a word as
diff -r 694110bc91d8 Include/unicodeobject.h
--- a/Include/unicodeobject.h	Sat Jan 07 18:34:24 2012 +0100
+++ b/Include/unicodeobject.h	Sat Jan 07 22:54:05 2012 -0500
@@ -2003,16 +2003,34 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowerca
 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
     Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
     Py_UCS4 ch       /* Unicode character */
     );
 
+PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
+    Py_UCS4 ch,       /* Unicode character */
+    Py_UCS4 *res
+    );
+
+PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
+    Py_UCS4 ch,       /* Unicode character */
+    Py_UCS4 *res
+    );
+
+PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
+    const Py_UCS4 ch         /* Unicode character */
+    );
+
+PyAPI_FUNC(int) _PyUnicode_IsCased(
+    const Py_UCS4 ch         /* Unicode character */
+    );
+
 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
     Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_ToDigit(
     Py_UCS4 ch       /* Unicode character */
     );
 
diff -r 694110bc91d8 Lib/test/string_tests.py
--- a/Lib/test/string_tests.py	Sat Jan 07 18:34:24 2012 +0100
+++ b/Lib/test/string_tests.py	Sat Jan 07 22:54:05 2012 -0500
@@ -664,17 +664,17 @@ class CommonTest(BaseTest):
         self.checkequal(' hello ', ' hello ', 'capitalize')
         self.checkequal('Hello ', 'Hello ','capitalize')
         self.checkequal('Hello ', 'hello ','capitalize')
         self.checkequal('Aaaa', 'aaaa', 'capitalize')
         self.checkequal('Aaaa', 'AaAa', 'capitalize')
 
         # check that titlecased chars are lowered correctly
         # \u1ffc is the titlecased char
-        self.checkequal('\u1ffc\u1ff3\u1ff3\u1ff3',
+        self.checkequal('\u03a9\u0399\u1ff3\u1ff3\u1ff3',
                         '\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize')
         # check with cased non-letter chars
         self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
                         '\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize')
         self.checkequal('\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
                         '\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize')
         self.checkequal('\u2160\u2171\u2172',
                         '\u2160\u2161\u2162', 'capitalize')
diff -r 694110bc91d8 Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py	Sat Jan 07 18:34:24 2012 +0100
+++ b/Lib/test/test_unicode.py	Sat Jan 07 22:54:05 2012 -0500
@@ -543,38 +543,60 @@ class UnicodeTest(string_tests.CommonTes
         string_tests.CommonTest.test_lower(self)
         self.assertEqual('\U00010427'.lower(), '\U0001044F')
         self.assertEqual('\U00010427\U00010427'.lower(),
                          '\U0001044F\U0001044F')
         self.assertEqual('\U00010427\U0001044F'.lower(),
                          '\U0001044F\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.lower(),
                          'x\U0001044Fx\U0001044F')
+        self.assertEqual('ﬁ'.lower(), 'ﬁ')
+        self.assertEqual('\u0130'.lower(), '\u0069\u0307')
+        # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+        self.assertEqual('\u03a3'.lower(), '\u03c3')
+        self.assertEqual('\u0345\u03a3'.lower(), '\u0345\u03c3')
+        self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+        self.assertEqual('A\u0345\u03a3a'.lower(), 'a\u0345\u03c3a')
+        self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+        self.assertEqual('A\u03a3\u0345'.lower(), 'a\u03c2\u0345')
+        self.assertEqual('\u03a3\u0345 '.lower(), '\u03c3\u0345 ')
+        self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
 
     def test_upper(self):
         string_tests.CommonTest.test_upper(self)
         self.assertEqual('\U0001044F'.upper(), '\U00010427')
         self.assertEqual('\U0001044F\U0001044F'.upper(),
                          '\U00010427\U00010427')
         self.assertEqual('\U00010427\U0001044F'.upper(),
                          '\U00010427\U00010427')
         self.assertEqual('X\U00010427x\U0001044F'.upper(),
                          'X\U00010427X\U00010427')
+        self.assertEqual('ﬁ'.upper(), 'FI')
+        self.assertEqual('\u0130'.upper(), '\u0130')
+        self.assertEqual('\u03a3'.upper(), '\u03a3')
+        self.assertEqual('ß'.upper(), 'SS')
+        self.assertEqual('\u1fd2'.upper(), '\u0399\u0308\u0300')
+        self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
 
     def test_capitalize(self):
         string_tests.CommonTest.test_capitalize(self)
         self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
         self.assertEqual('\U0001044F\U0001044F'.capitalize(),
                          '\U00010427\U0001044F')
         self.assertEqual('\U00010427\U0001044F'.capitalize(),
                          '\U00010427\U0001044F')
         self.assertEqual('\U0001044F\U00010427'.capitalize(),
                          '\U00010427\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
                          'X\U0001044Fx\U0001044F')
+        self.assertEqual('h\u0130'.capitalize(), 'H\u0069\u0307')
+        exp = '\u0399\u0308\u0300\u0069\u0307'
+        self.assertEqual('\u1fd2\u0130'.capitalize(), exp)
+        self.assertEqual('ﬁnnish'.capitalize(), 'FInnish')
+        self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
 
     def test_title(self):
         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
         self.assertEqual('\U0001044F'.title(), '\U00010427')
         self.assertEqual('\U0001044F\U0001044F'.title(),
                          '\U00010427\U0001044F')
         self.assertEqual('\U0001044F\U0001044F \U0001044F\U0001044F'.title(),
                          '\U00010427\U0001044F \U00010427\U0001044F')
@@ -592,16 +614,29 @@ class UnicodeTest(string_tests.CommonTes
         self.assertEqual('\U0001044F\U0001044F'.swapcase(),
                          '\U00010427\U00010427')
         self.assertEqual('\U00010427\U0001044F'.swapcase(),
                          '\U0001044F\U00010427')
         self.assertEqual('\U0001044F\U00010427'.swapcase(),
                          '\U00010427\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.swapcase(),
                          'x\U0001044FX\U00010427')
+        self.assertEqual('ﬁ'.swapcase(), 'FI')
+        self.assertEqual('\u0130'.swapcase(), '\u0069\u0307')
+        # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+        self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+        self.assertEqual('\u0345\u03a3'.swapcase(), '\u0345\u03c3')
+        self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0345\u03c2')
+        self.assertEqual('A\u0345\u03a3a'.swapcase(), 'a\u0345\u03c3A')
+        self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0345\u03c2')
+        self.assertEqual('A\u03a3\u0345'.swapcase(), 'a\u03c2\u0345')
+        self.assertEqual('\u03a3\u0345 '.swapcase(), '\u03c3\u0345 ')
+        self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+        self.assertEqual('ß'.swapcase(), 'SS')
+        self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300')
 
     def test_contains(self):
         # Testing Unicode contains method
         self.assertIn('a', 'abdb')
         self.assertIn('a', 'bdab')
         self.assertIn('a', 'bdaba')
         self.assertIn('a', 'bdba')
         self.assertNotIn('a', 'bdb')
diff -r 694110bc91d8 Lib/test/test_unicodedata.py
--- a/Lib/test/test_unicodedata.py	Sat Jan 07 18:34:24 2012 +0100
+++ b/Lib/test/test_unicodedata.py	Sat Jan 07 22:54:05 2012 -0500
@@ -16,17 +16,17 @@ encoding = 'utf-8'
 errors = 'surrogatepass'
 
 
 ### Run tests
 
 class UnicodeMethodsTest(unittest.TestCase):
 
     # update this, if the database changes
-    expectedchecksum = '21b90f1aed00081b81ca7942b22196af090015a0'
+    expectedchecksum = '33f9b16f5e82c9e46a5d3f2da5d3ea611f5e8d80'
 
     def test_method_checksum(self):
         h = hashlib.sha1()
         for i in range(0x10000):
             char = chr(i)
             data = [
                 # Predicates (single char)
                 "01"[char.isalnum()],
diff -r 694110bc91d8 Objects/unicodectype.c
--- a/Objects/unicodectype.c	Sat Jan 07 18:34:24 2012 +0100
+++ b/Objects/unicodectype.c	Sat Jan 07 22:54:05 2012 -0500
@@ -16,18 +16,21 @@
 #define LOWER_MASK 0x08
 #define LINEBREAK_MASK 0x10
 #define SPACE_MASK 0x20
 #define TITLE_MASK 0x40
 #define UPPER_MASK 0x80
 #define XID_START_MASK 0x100
 #define XID_CONTINUE_MASK 0x200
 #define PRINTABLE_MASK 0x400
-#define NODELTA_MASK 0x800
-#define NUMERIC_MASK 0x1000
+#define NUMERIC_MASK 0x800
+#define CASE_IGNORABLE_MASK 0x1000
+#define CASED_MASK 0x2000
+#define EXTRA_UPPER_MASK 0x4000
+#define EXTRA_LOWER_MASK 0x8000
 
 typedef struct {
     const Py_UCS4 upper;
     const Py_UCS4 lower;
     const Py_UCS4 title;
     const unsigned char decimal;
     const unsigned char digit;
     const unsigned short flags;
@@ -52,25 +55,17 @@ gettyperecord(Py_UCS4 code)
 }
 
 /* Returns the titlecase Unicode characters corresponding to ch or just
    ch if no titlecase mapping is known. */
 
 Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-    int delta = ctype->title;
-
-    if (ctype->flags & NODELTA_MASK)
-        return delta;
-
-    if (delta >= 32768)
-            delta -= 65536;
-
-    return ch + delta;
+    return ctype->title;
 }
 
 /* Returns 1 for Unicode characters having the category 'Lt', 0
    otherwise. */
 
 int _PyUnicode_IsTitlecase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
@@ -183,36 +178,105 @@ int _PyUnicode_IsUppercase(Py_UCS4 ch)
 }
 
 /* Returns the uppercase Unicode characters corresponding to ch or just
    ch if no uppercase mapping is known. */
 
 Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-    int delta = ctype->upper;
-    if (ctype->flags & NODELTA_MASK)
-        return delta;
-    if (delta >= 32768)
-            delta -= 65536;
-    return ch + delta;
+    Py_UCS4 res = ctype->upper;
+
+    if (res) {
+        if (ctype->flags & (EXTRA_UPPER_MASK | EXTRA_LOWER_MASK))
+            res &= 0xFFFF;
+    }
+    else {
+        res = ch;
+    }
+    return res;
 }
 
 /* Returns the lowercase Unicode characters corresponding to ch or just
    ch if no lowercase mapping is known. */
 
 Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
-    int delta = ctype->lower;
-    if (ctype->flags & NODELTA_MASK)
-        return delta;
-    if (delta >= 32768)
-            delta -= 65536;
-    return ch + delta;
+    Py_UCS4 res = ctype->lower;
+
+    if (res) {
+        if (ctype->flags & (EXTRA_UPPER_MASK | EXTRA_LOWER_MASK))
+            res &= 0xFFFF;
+    }
+    else {
+        res = ch;
+    }
+    return res;
+}
+
+int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    res[0] = ctype->upper;
+    if (res[0]) {
+        if (ctype->flags & (EXTRA_UPPER_MASK | EXTRA_LOWER_MASK))
+            res[0] &= 0xFFFF;
+        if (ctype->flags & EXTRA_UPPER_MASK) {
+            res[1] = ctype->upper >> 16;
+            if (ctype->lower & 0xFFFF0000) {
+                res[2] = ctype->lower >> 16;
+                return 3;
+            }
+            return 2;
+        }
+    }
+    else {
+        res[0] = ch;
+    }
+    return 1;
+}
+
+int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    res[0] = ctype->lower;
+    if (res[0]) {
+        if (ctype->flags & (EXTRA_UPPER_MASK | EXTRA_LOWER_MASK))
+            res[0] &= 0xFFFF;
+        if (ctype->flags & EXTRA_LOWER_MASK) {
+            res[0] &= 0xFFFF;
+            res[1] = ctype->lower >> 16;
+            if (ctype->upper & 0xFFFF0000) {
+                res[2] = ctype->upper >> 16;
+                return 3;
+            }
+            return 2;
+        }
+    }
+    else {
+        res[0] = ch;
+    }
+    return 1;
+}
+
+int _PyUnicode_IsCased(Py_UCS4 ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & CASED_MASK) != 0;
+}
+
+int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch)
+{
+    const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+
+    return (ctype->flags & CASE_IGNORABLE_MASK) != 0;
 }
 
 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
    'Lo' or 'Lm',  0 otherwise. */
 
 int _PyUnicode_IsAlpha(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
diff -r 694110bc91d8 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Sat Jan 07 18:34:24 2012 +0100
+++ b/Objects/unicodeobject.c	Sat Jan 07 22:54:05 2012 -0500
@@ -9423,152 +9423,137 @@ fixup(PyObject *self,
     else {
         copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
     }
     Py_DECREF(u);
     assert(_PyUnicode_CheckConsistency(v, 1));
     return v;
 }
 
+static PyObject *
+ascii_upper_or_lower(PyObject *self, int lower)
+{
+    Py_ssize_t len = PyUnicode_GET_LENGTH(self), i;
+    char *resdata;
+    PyObject *res;
+    char lo, hi, diff;
+
+    if (lower) {
+        lo = 'A';
+        diff = 'A' - 'a';
+    }
+    else {
+        lo = 'a';
+        diff = 'a' - 'A';
+    }
+    hi = lo + 25;
+
+    res = _PyUnicode_Copy(self);
+    if (res == NULL)
+        return NULL;
+    resdata = PyUnicode_DATA(res);
+
+    for (i = 0; i < len; i++) {
+        char c = resdata[i];
+        if (lo <= c && c <= hi)
+            resdata[i] = c - diff;
+    }
+    return res;
+}
+
 static Py_UCS4
-fixupper(PyObject *self)
-{
-    /* No need to call PyUnicode_READY(self) because this function is only
-       called as a callback from fixup() which does it already. */
-    const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
-    const int kind = PyUnicode_KIND(self);
-    void *data = PyUnicode_DATA(self);
-    int touched = 0;
-    Py_UCS4 maxchar = 0;
-    Py_ssize_t i;
-
-    for (i = 0; i < len; ++i) {
-        const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        const Py_UCS4 up = Py_UNICODE_TOUPPER(ch);
-        if (up != ch) {
-            if (up > maxchar)
-                maxchar = up;
-            PyUnicode_WRITE(kind, data, i, up);
-            touched = 1;
-        }
-        else if (ch > maxchar)
-            maxchar = ch;
-    }
-
-    if (touched)
-        return maxchar;
-    else
-        return 0;
-}
-
-static Py_UCS4
-fixlower(PyObject *self)
-{
-    /* No need to call PyUnicode_READY(self) because fixup() which does it. */
-    const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
-    const int kind = PyUnicode_KIND(self);
-    void *data = PyUnicode_DATA(self);
-    int touched = 0;
-    Py_UCS4 maxchar = 0;
-    Py_ssize_t i;
-
-    for(i = 0; i < len; ++i) {
-        const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch);
-        if (lo != ch) {
-            if (lo > maxchar)
-                maxchar = lo;
-            PyUnicode_WRITE(kind, data, i, lo);
-            touched = 1;
-        }
-        else if (ch > maxchar)
-            maxchar = ch;
-    }
-
-    if (touched)
-        return maxchar;
-    else
-        return 0;
-}
-
-static Py_UCS4
-fixswapcase(PyObject *self)
-{
-    /* No need to call PyUnicode_READY(self) because fixup() which does it. */
-    const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
-    const int kind = PyUnicode_KIND(self);
-    void *data = PyUnicode_DATA(self);
-    int touched = 0;
-    Py_UCS4 maxchar = 0;
-    Py_ssize_t i;
-
-    for(i = 0; i < len; ++i) {
-        const Py_UCS4 ch = PyUnicode_READ(kind, data, i);
-        Py_UCS4 nu = 0;
-
-        if (Py_UNICODE_ISUPPER(ch))
-            nu = Py_UNICODE_TOLOWER(ch);
-        else if (Py_UNICODE_ISLOWER(ch))
-            nu = Py_UNICODE_TOUPPER(ch);
-
-        if (nu != 0) {
-            if (nu > maxchar)
-                maxchar = nu;
-            PyUnicode_WRITE(kind, data, i, nu);
-            touched = 1;
-        }
-        else if (ch > maxchar)
-            maxchar = ch;
-    }
-
-    if (touched)
-        return maxchar;
-    else
-        return 0;
-}
-
-static Py_UCS4
-fixcapitalize(PyObject *self)
-{
-    /* No need to call PyUnicode_READY(self) because fixup() which does it. */
-    const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
-    const int kind = PyUnicode_KIND(self);
-    void *data = PyUnicode_DATA(self);
-    int touched = 0;
-    Py_UCS4 maxchar = 0;
-    Py_ssize_t i = 0;
-    Py_UCS4 ch;
-
-    if (len == 0)
-        return 0;
-
-    ch = PyUnicode_READ(kind, data, i);
-    if (!Py_UNICODE_ISUPPER(ch)) {
-        maxchar = Py_UNICODE_TOUPPER(ch);
-        PyUnicode_WRITE(kind, data, i, maxchar);
-        touched = 1;
-    }
-    ++i;
-    for(; i < len; ++i) {
-        ch = PyUnicode_READ(kind, data, i);
-        if (!Py_UNICODE_ISLOWER(ch)) {
-            const Py_UCS4 lo = Py_UNICODE_TOLOWER(ch);
-            if (lo > maxchar)
-                maxchar = lo;
-            PyUnicode_WRITE(kind, data, i, lo);
-            touched = 1;
-        }
-        else if (ch > maxchar)
-            maxchar = ch;
-    }
-
-    if (touched)
-        return maxchar;
-    else
-        return 0;
+handle_capital_sigma(int kind, void *data, Py_ssize_t length, Py_ssize_t i)
+{
+    Py_ssize_t j;
+    int final_sigma;
+    Py_UCS4 c;
+    /* U+03A3 is in the Final_Sigma context when, it is found like this:
+
+     \p{cased}\p{case-ignorable}*\u03A3!(\p{case-ignorable}*\p{cased})
+
+    where ! is a negation and \p{xxx} is a character with property xxx.
+    */
+    for (j = i - 1; j >= 0; j--) {
+        c = PyUnicode_READ(kind, data, j);
+        if (!_PyUnicode_IsCaseIgnorable(c))
+            break;
+    }
+    final_sigma = j >= 0 && _PyUnicode_IsCased(c);
+    if (final_sigma) {
+        for (j = i + 1; j < length; j++) {
+            c = PyUnicode_READ(kind, data, j);
+            if (!_PyUnicode_IsCaseIgnorable(c))
+                break;
+        }
+        final_sigma = j == length || !_PyUnicode_IsCased(c);
+    }
+    return (final_sigma) ? 0x3C2 : 0x3C3;
+}
+
+static int
+lower_ucs4(int kind, void *data, Py_ssize_t length, Py_ssize_t i,
+           Py_UCS4 c, Py_UCS4 *mapped)
+{
+    /* Obscure special case. */
+    if (c == 0x3A3) {
+        mapped[0] = handle_capital_sigma(kind, data, length, i);
+        return 1;
+    }
+    return _PyUnicode_ToLowerFull(c, mapped);
+}
+
+static int
+upper_ucs4(int kind, void *data, Py_ssize_t length, Py_ssize_t i,
+           Py_UCS4 c, Py_UCS4 *mapped)
+{
+    return _PyUnicode_ToUpperFull(c, mapped);
+}
+
+static PyObject *
+unicode_upper_or_lower(PyObject *self, int lower)
+{
+    PyObject *res;
+    Py_ssize_t i, k, length, newlength = 0;
+    int kind, outkind;
+    int j, n_res;
+    void *data, *outdata;
+    Py_UCS4 mapped[3], c, maxchar = 0;
+    int (*casemap)(int, void *, Py_ssize_t, Py_ssize_t, Py_UCS4, Py_UCS4 *);
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    if (PyUnicode_IS_ASCII(self))
+        return ascii_upper_or_lower(self, lower);
+
+    casemap = (lower) ? lower_ucs4 : upper_ucs4;
+    kind = PyUnicode_KIND(self);
+    data = PyUnicode_DATA(self);
+    length = PyUnicode_GET_LENGTH(self);
+    for (i = 0; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        n_res = casemap(kind, data, length, i, c, mapped);
+        for (j = 0; j < n_res; j++)
+            if (mapped[j] > maxchar)
+                maxchar = mapped[j];
+        newlength += n_res;
+    }
+    res = PyUnicode_New(newlength, maxchar);
+    if (!res)
+        return NULL;
+    outkind = PyUnicode_KIND(res);
+    outdata = PyUnicode_DATA(res);
+    k = 0;
+    for (i = 0; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        n_res = casemap(kind, data, length, i, c, mapped);
+        for (j = 0; j < n_res; j++)
+            PyUnicode_WRITE(outkind, outdata, k++, mapped[j]);
+    }
+    return res;
 }
 
 static Py_UCS4
 fixtitle(PyObject *self)
 {
     /* No need to call PyUnicode_READY(self) because fixup() which does it. */
     const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
     const int kind = PyUnicode_KIND(self);
@@ -10452,17 +10437,58 @@ PyDoc_STRVAR(capitalize__doc__,
              "S.capitalize() -> str\n\
 \n\
 Return a capitalized version of S, i.e. make the first character\n\
 have upper case and the rest lower case.");
 
 static PyObject*
 unicode_capitalize(PyObject *self)
 {
-    return fixup(self, fixcapitalize);
+    Py_ssize_t length, newlength, i, k = 0;
+    int kind, newkind, n_res, j;
+    void *data, *newdata;
+    Py_UCS4 c, maxchar = 0, mapped[3];
+    PyObject *res;
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    kind = PyUnicode_KIND(self);
+    data = PyUnicode_DATA(self);
+    length = PyUnicode_GET_LENGTH(self);
+    if (length == 0)
+        return unicode_result_unchanged(self);
+    c = PyUnicode_READ(kind, data, 0);
+    n_res = newlength = _PyUnicode_ToUpperFull(c, mapped);
+    for (j = 0; j < n_res; j++)
+        if (mapped[j] > maxchar)
+            maxchar = mapped[j];
+    for (i = 1; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        n_res = lower_ucs4(kind, data, length, i, c, mapped);
+        for (j = 0; j < n_res; j++)
+            if (mapped[j] > maxchar)
+                maxchar = mapped[j];
+        newlength += n_res;
+    }
+    res = PyUnicode_New(newlength, maxchar);
+    if (res == NULL)
+        return NULL;
+    newkind = PyUnicode_KIND(res);
+    newdata = PyUnicode_DATA(res);
+    n_res = _PyUnicode_ToUpperFull(PyUnicode_READ(kind, data, 0), mapped);
+    for (j = 0; j < n_res; j++, k++)
+        PyUnicode_WRITE(newkind, newdata, k, mapped[j]);
+    for (i = 1; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        n_res = lower_ucs4(kind, data, length, i, c, mapped);
+        for (j = 0; j < n_res; j++, k++)
+            PyUnicode_WRITE(newkind, newdata, k, mapped[j]);
+    }
+    return res;
 }
 
 #if 0
 PyDoc_STRVAR(capwords__doc__,
              "S.capwords() -> str\n\
 \n\
 Apply .capitalize() to all words in S and return the result with\n\
 normalized whitespace (all whitespace strings are replaced by ' ').");
@@ -11710,17 +11736,17 @@ unicode_ljust(PyObject *self, PyObject *
 PyDoc_STRVAR(lower__doc__,
              "S.lower() -> str\n\
 \n\
 Return a copy of the string S converted to lowercase.");
 
 static PyObject*
 unicode_lower(PyObject *self)
 {
-    return fixup(self, fixlower);
+    return unicode_upper_or_lower(self, 1);
 }
 
 #define LEFTSTRIP 0
 #define RIGHTSTRIP 1
 #define BOTHSTRIP 2
 
 /* Arrays indexed by above */
 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
@@ -12599,17 +12625,66 @@ PyDoc_STRVAR(swapcase__doc__,
              "S.swapcase() -> str\n\
 \n\
 Return a copy of S with uppercase characters converted to lowercase\n\
 and vice versa.");
 
 static PyObject*
 unicode_swapcase(PyObject *self)
 {
-    return fixup(self, fixswapcase);
+    Py_ssize_t length, newlength = 0, i, k = 0;
+    int kind, newkind, n_res, j;
+    void *data, *newdata;
+    Py_UCS4 c, maxchar = 0, mapped[3];
+    PyObject *res;
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    kind = PyUnicode_KIND(self);
+    data = PyUnicode_DATA(self);
+    length = PyUnicode_GET_LENGTH(self);
+    for (i = 0; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        if (Py_UNICODE_ISUPPER(c)) {
+            n_res = lower_ucs4(kind, data, length, i, c, mapped);
+        }
+        else if (Py_UNICODE_ISLOWER(c)) {
+            n_res = _PyUnicode_ToUpperFull(c, mapped);
+        }
+        else {
+            n_res = 1;
+            mapped[0] = c;
+        }
+        newlength += n_res;
+        for (j = 0; j < n_res; j++)
+            if (mapped[j] > maxchar)
+                maxchar = mapped[j];
+    }
+    res = PyUnicode_New(newlength, maxchar);
+    if (res == NULL)
+        return NULL;
+    newkind = PyUnicode_KIND(res);
+    newdata = PyUnicode_DATA(res);
+    for (i = 0; i < length; i++) {
+        c = PyUnicode_READ(kind, data, i);
+        if (Py_UNICODE_ISUPPER(c)) {
+            n_res = lower_ucs4(kind, data, length, i, c, mapped);
+        }
+        else if (Py_UNICODE_ISLOWER(c)) {
+            n_res = _PyUnicode_ToUpperFull(c, mapped);
+        }
+        else {
+            n_res = 1;
+            mapped[0] = c;
+        }
+        for (j = 0; j < n_res; j++, k++)
+            PyUnicode_WRITE(newkind, newdata, k, mapped[j]);
+    }
+    return res;
 }
 
 PyDoc_STRVAR(maketrans__doc__,
              "str.maketrans(x[, y[, z]]) -> dict (static method)\n\
 \n\
 Return a translation table usable for str.translate().\n\
 If there is only one argument, it must be a dictionary mapping Unicode\n\
 ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
@@ -12745,17 +12820,17 @@ unicode_translate(PyObject *self, PyObje
 PyDoc_STRVAR(upper__doc__,
              "S.upper() -> str\n\
 \n\
 Return a copy of S converted to uppercase.");
 
 static PyObject*
 unicode_upper(PyObject *self)
 {
-    return fixup(self, fixupper);
+    return unicode_upper_or_lower(self, 0);
 }
 
 PyDoc_STRVAR(zfill__doc__,
              "S.zfill(width) -> str\n\
 \n\
 Pad a numeric string S with zeros on the left, to fill a field\n\
 of the specified width. The string S is never truncated.");
 
diff -r 694110bc91d8 Tools/unicode/makeunicodedata.py
--- a/Tools/unicode/makeunicodedata.py	Sat Jan 07 18:34:24 2012 +0100
+++ b/Tools/unicode/makeunicodedata.py	Sat Jan 07 22:54:05 2012 -0500
@@ -17,16 +17,17 @@
 # 2002-10-18 mvl  update to Unicode 3.2
 # 2002-10-22 mvl  generate NFC tables
 # 2002-11-24 mvl  expand all ranges, sort names version-independently
 # 2002-11-25 mvl  add UNIDATA_VERSION
 # 2004-05-29 perky add east asian width information
 # 2006-03-10 mvl  update to Unicode 4.1; add UCD 3.2 delta
 # 2008-06-11 gb   add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
 # 2011-10-21 ezio add support for name aliases and named sequences
+# 2012-01    benjamin add full case mappings
 #
 # written by Fredrik Lundh (fredrik@pythonware.com)
 #
 
 import os
 import sys
 import zipfile
 
@@ -42,16 +43,17 @@ UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
 UNIHAN = "Unihan%s.zip"
 DERIVED_CORE_PROPERTIES = "DerivedCoreProperties%s.txt"
 DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
 LINE_BREAK = "LineBreak%s.txt"
 NAME_ALIASES = "NameAliases%s.txt"
 NAMED_SEQUENCES = "NamedSequences%s.txt"
+SPECIAL_CASING = "SpecialCasing%s.txt"
 
 # Private Use Areas -- in planes 1, 15, 16
 PUA_1 = range(0xE000, 0xF900)
 PUA_15 = range(0xF0000, 0xFFFFE)
 PUA_16 = range(0x100000, 0x10FFFE)
 
 # we use this ranges of PUA_15 to store name aliases and named sequences
 NAME_ALIASES_START = 0xF0000
@@ -79,18 +81,21 @@ DIGIT_MASK = 0x04
 LOWER_MASK = 0x08
 LINEBREAK_MASK = 0x10
 SPACE_MASK = 0x20
 TITLE_MASK = 0x40
 UPPER_MASK = 0x80
 XID_START_MASK = 0x100
 XID_CONTINUE_MASK = 0x200
 PRINTABLE_MASK = 0x400
-NODELTA_MASK = 0x800
-NUMERIC_MASK = 0x1000
+NUMERIC_MASK = 0x800
+CASE_IGNORABLE_MASK = 0x1000
+CASED_MASK = 0x2000
+EXTRA_UPPER_MASK = 0x4000
+EXTRA_LOWER_MASK = 0x8000
 
 # these ranges need to match unicodedata.c:is_unified_ideograph
 cjk_ranges = [
     ('3400', '4DB5'),
     ('4E00', '9FCB'),
     ('20000', '2A6D6'),
     ('2A700', '2B734'),
     ('2B740', '2B81D')
@@ -409,45 +414,78 @@ def makeunicodetype(unicode, trace):
             if category == "Lu":
                 flags |= UPPER_MASK
             if char == ord(" ") or category[0] not in ("C", "Z"):
                 flags |= PRINTABLE_MASK
             if "XID_Start" in properties:
                 flags |= XID_START_MASK
             if "XID_Continue" in properties:
                 flags |= XID_CONTINUE_MASK
-            # use delta predictor for upper/lower/title if it fits
-            if record[12]:
-                upper = int(record[12], 16)
+            if "Cased" in properties:
+                flags |= CASED_MASK
+            if "Case_Ignorable" in properties:
+                flags |= CASE_IGNORABLE_MASK
+            sc = unicode.special_casing.get(char)
+            if sc is None:
+                if record[12]:
+                    upper = int(record[12], 16)
+                else:
+                    upper = char
+                if record[13]:
+                    lower = int(record[13], 16)
+                else:
+                    lower = char
+                if record[14]:
+                    title = int(record[14], 16)
+                else:
+                    title = upper
             else:
-                upper = char
-            if record[13]:
-                lower = int(record[13], 16)
-            else:
-                lower = char
-            if record[14]:
-                title = int(record[14], 16)
-            else:
-                # UCD.html says that a missing title char means that
-                # it defaults to the uppercase character, not to the
-                # character itself. Apparently, in the current UCD (5.x)
-                # this feature is never used
-                title = upper
-            upper_d = upper - char
-            lower_d = lower - char
-            title_d = title - char
-            if -32768 <= upper_d <= 32767 and \
-               -32768 <= lower_d <= 32767 and \
-               -32768 <= title_d <= 32767:
-                # use deltas
-                upper = upper_d & 0xffff
-                lower = lower_d & 0xffff
-                title = title_d & 0xffff
-            else:
-                flags |= NODELTA_MASK
+                # This happens when some character maps to more than one
+                # character in uppercase or lowercase. I employ an evil hack to
+                # store the mappings: a mapping to 2 characters is stored in the
+                # same Py_UCS4 instance as a one character mapping. A 3
+                # character mapping uses that and the Py_UCS4 instance for the
+                # opposite case mapping of the character. For this hack to work,
+                # a lot of conditions, have to hold.
+                lower_len = len(sc[0])
+                upper_len = len(sc[1])
+                no_hacks = ValueError("upper/lower hacks just failed")
+                if lower_len > 1 and upper_len > 1:
+                    raise no_hacks
+                if lower_len > 1:
+                    split = sc[0]
+                    other = sc[1][0]
+                    flags |= EXTRA_LOWER_MASK
+                else:
+                    assert upper_len > 1
+                    split = sc[1]
+                    other = sc[0][0]
+                    flags |= EXTRA_UPPER_MASK
+                if record[14]:
+                    title = int(record[14], 16)
+                elif record[12]:
+                    title = int(record[12], 16)
+                else:
+                    title = char
+                if other > 0xFFFF:
+                    raise no_hacks
+                for c in split:
+                    if c > 0xFFFF:
+                        raise no_hacks
+                final = split[0]
+                if len(split) >= 2:
+                    final |= split[1] << 16
+                    if len(split) == 3:
+                        other |= split[2] << 16
+                if lower_len > 1:
+                    lower = final
+                    upper = other
+                else:
+                    upper = final
+                    lower = other
             # decimal digit, integer digit
             decimal = 0
             if record[6]:
                 flags |= DECIMAL_MASK
                 decimal = int(record[6])
             digit = 0
             if record[7]:
                 flags |= DIGIT_MASK
@@ -1065,16 +1103,31 @@ class UnicodeData:
             if tag not in ('kAccountingNumeric', 'kPrimaryNumeric',
                            'kOtherNumeric'):
                 continue
             value = value.strip().replace(',', '')
             i = int(code[2:], 16)
             # Patch the numeric field
             if table[i] is not None:
                 table[i][8] = value
+        sc = self.special_casing = {}
+        with open_data(SPECIAL_CASING, version) as file:
+            for s in file:
+                s = s[:-1].split('#', 1)[0]
+                if not s:
+                    continue
+                data = s.split("; ")
+                if data[4]:
+                    # We ignore all conditionals (since they depend on
+                    # languages) except for one, which is hardcoded.
+                    continue
+                c = int(data[0], 16)
+                lower = [int(char, 16) for char in data[1].split()]
+                upper = [int(char, 16) for char in data[3].split()]
+                sc[c] = (lower, upper)
 
     def uselatin1(self):
         # restrict character range to ISO Latin 1
         self.chars = list(range(256))
 
 # hash table tools
 
 # this is a straight-forward reimplementation of Python's built-in