Index: Misc/NEWS
===================================================================
--- Misc/NEWS	(revision 79088)
+++ Misc/NEWS	(working copy)
@@ -285,6 +285,8 @@
 Library
 -------
 
+- Issue #8024: Update the Unicode database to 5.2.
+
 - Issue #8168: py_compile now handles files with utf-8 BOMS.
 
 - ``tokenize.detect_encoding`` now returns ``'utf-8-sig'`` when a UTF-8 BOM is
Index: Tools/unicode/makeunicodedata.py
===================================================================
--- Tools/unicode/makeunicodedata.py	(revision 79088)
+++ Tools/unicode/makeunicodedata.py	(working copy)
@@ -31,7 +31,7 @@
 VERSION = "2.6"
 
 # The Unicode Database
-UNIDATA_VERSION = "5.1.0"
+UNIDATA_VERSION = "5.2.0"
 UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
Index: Lib/test/test_bigmem.py
===================================================================
--- Lib/test/test_bigmem.py	(revision 79087)
+++ Lib/test/test_bigmem.py	(working copy)
@@ -618,7 +618,7 @@
     @precisionbigmemtest(size=_4G // 5, memuse=character_size * (6 + 1))
     def test_unicode_repr_overflow(self, size):
         try:
-            s = "\uAAAA"*size
+            s = "\uDCBA"*size
             r = repr(s)
         except MemoryError:
             pass # acceptable on 32-bit
@@ -679,22 +679,24 @@
 
     @bigmemtest(minsize=2**32 / 5, memuse=character_size * 7)
     def test_unicode_repr(self, size):
-        s = "\uAAAA" * size
+        # Use an assigned, but not printable code point.
+        # It is in the range of the low surrogates \uDC00-\uDFFF.
+        s = "\uDCBA" * size
         for f in (repr, ascii):
             r = f(s)
             self.assertTrue(len(r) > size)
-            self.assertTrue(r.endswith(r"\uaaaa'"), r[-10:])
+            self.assertTrue(r.endswith(r"\udcba'"), r[-10:])
             del r
 
     # The character takes 4 bytes even in UCS-2 builds because it will
     # be decomposed into surrogates.
     @bigmemtest(minsize=2**32 / 5, memuse=4 + character_size * 9)
     def test_unicode_repr_wide(self, size):
-        s = "\U0001AAAA" * size
+        s = "\U0001DCBA" * size
         for f in (repr, ascii):
             r = f(s)
             self.assertTrue(len(r) > size)
-            self.assertTrue(r.endswith(r"\U0001aaaa'"), r[-12:])
+            self.assertTrue(r.endswith(r"\U0001dcba'"), r[-12:])
             del r
 
 
Index: Lib/test/test_unicodedata.py
===================================================================
--- Lib/test/test_unicodedata.py	(revision 79088)
+++ Lib/test/test_unicodedata.py	(working copy)
@@ -21,7 +21,7 @@
 class UnicodeMethodsTest(unittest.TestCase):
 
     # update this, if the database changes
-    expectedchecksum = '0b915116051f3ed029a98542c2b7df63c9646272'
+    expectedchecksum = '4504dffd035baea02c5b9de82bebc3d65e0e0baf'
 
     def test_method_checksum(self):
         h = hashlib.sha1()
@@ -80,7 +80,7 @@
 class UnicodeFunctionsTest(UnicodeDatabaseTest):
 
     # update this, if the database changes
-    expectedchecksum = 'd4169ccff998ebbd1ec007a0b3fbd66e5ccf0229'
+    expectedchecksum = '6ccf1b1a36460d2694f9b0b0f0324942fe70ede6'
 
     def test_function_checksum(self):
         data = []