diff -r cf70f030a744 Lib/json/tests/test_fail.py --- a/Lib/json/tests/test_fail.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/json/tests/test_fail.py Mon Jun 23 22:24:50 2014 +0300 @@ -69,7 +69,7 @@ # http://json.org/JSON_checker/test/fail33.json '["mismatch"}', # http://code.google.com/p/simplejson/issues/detail?id=3 - u'["A\u001FZ control characters in string"]', + u'["A\x1FZ control characters in string"]', ] SKIPS = { diff -r cf70f030a744 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/pickletester.py Mon Jun 23 22:24:50 2014 +0300 @@ -6,7 +6,7 @@ import pickletools import copy_reg -from test.test_support import TestFailed, verbose, have_unicode, TESTFN +from test.test_support import TestFailed, verbose, requires_unicode, u, TESTFN try: from test.test_support import _2G, _1M, precisionbigmemtest except ImportError: @@ -548,22 +548,24 @@ buf = "S" + s + "\012p0\012." self.assertRaises(ValueError, self.loads, buf) - if have_unicode: - def test_unicode(self): - endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>', - u'<\\>', u'<\\\U00012345>'] - for proto in protocols: - for u in endcases: - p = self.dumps(u, proto) - u2 = self.loads(p) - self.assertEqual(u2, u) + @requires_unicode + def test_unicode(self): + endcases = [u'', u'<\\u>', u'<\n>', u'<\\>', + u(r'<\\\u1234>'), + u(r'<\\\U00012345>')] + for proto in protocols: + for u1 in endcases: + p = self.dumps(u1, proto) + u2 = self.loads(p) + self.assertEqual(u2, u1) - def test_unicode_high_plane(self): - t = u'\U00012345' - for proto in protocols: - p = self.dumps(t, proto) - t2 = self.loads(p) - self.assertEqual(t2, t) + @requires_unicode + def test_unicode_high_plane(self): + t = u(r'\U00012345') + for proto in protocols: + p = self.dumps(t, proto) + t2 = self.loads(p) + self.assertEqual(t2, t) def test_ints(self): import sys @@ -1052,7 +1054,7 @@ sample = "hello" class MyUnicode(unicode): - sample = u"hello \u1234" + sample = u"hello " + unichr(0x1234) class MyTuple(tuple): sample = (1, 2, 3) diff -r cf70f030a744 Lib/test/re_tests.py --- a/Lib/test/re_tests.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/re_tests.py Mon Jun 23 22:24:50 2014 +0300 @@ -663,7 +663,7 @@ try: u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") -except SyntaxError: +except (SyntaxError, ValueError): pass else: tests.extend([ diff -r cf70f030a744 Lib/test/string_tests.py --- a/Lib/test/string_tests.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/string_tests.py Mon Jun 23 22:24:50 2014 +0300 @@ -1332,27 +1332,28 @@ s2 = t().join([s1]) self.assertTrue(s1 is s2) - # Should also test mixed-type join. - if t is unicode: - s1 = subclass("abcd") - s2 = "".join([s1]) - self.assertTrue(s1 is not s2) - self.assertTrue(type(s2) is t) + if test_support.have_unicode: + # Should also test mixed-type join. + if t is unicode: + s1 = subclass("abcd") + s2 = "".join([s1]) + self.assertTrue(s1 is not s2) + self.assertTrue(type(s2) is t) - s1 = t("abcd") - s2 = "".join([s1]) - self.assertTrue(s1 is s2) + s1 = t("abcd") + s2 = "".join([s1]) + self.assertTrue(s1 is s2) - elif t is str: - s1 = subclass("abcd") - s2 = u"".join([s1]) - self.assertTrue(s1 is not s2) - self.assertTrue(type(s2) is unicode) # promotes! + elif t is str: + s1 = subclass("abcd") + s2 = u"".join([s1]) + self.assertTrue(s1 is not s2) + self.assertTrue(type(s2) is unicode) # promotes! - s1 = t("abcd") - s2 = u"".join([s1]) - self.assertTrue(s1 is not s2) - self.assertTrue(type(s2) is unicode) # promotes! + s1 = t("abcd") + s2 = u"".join([s1]) + self.assertTrue(s1 is not s2) + self.assertTrue(type(s2) is unicode) # promotes! - else: - self.fail("unexpected type for MixinStrUnicodeTest %r" % t) + else: + self.fail("unexpected type for MixinStrUnicodeTest %r" % t) diff -r cf70f030a744 Lib/test/test_argparse.py --- a/Lib/test/test_argparse.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_argparse.py Mon Jun 23 22:24:50 2014 +0300 @@ -1521,7 +1521,7 @@ def __eq__(self, other): if other not in self.seen: text = 'Check that file is writable.' - if 'b' in other.mode: + if test_support.have_unicode and 'b' in other.mode: text = text.encode('ascii') other.write(text) other.close() @@ -4464,6 +4464,7 @@ # File encoding tests # =================== +@test_support.requires_unicode class TestEncoding(TestCase): def _test_module_encoding(self, path): diff -r cf70f030a744 Lib/test/test_array.py --- a/Lib/test/test_array.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_array.py Mon Jun 23 22:24:50 2014 +0300 @@ -18,7 +18,9 @@ array.array.__init__(self, typecode) tests = [] # list to accumulate all tests -typecodes = "cubBhHiIlLfd" +typecodes = "cbBhHiIlLfd" +if test_support.have_unicode: + typecodes += "u" class BadConstructorTest(unittest.TestCase): @@ -827,6 +829,7 @@ self.assertEqual(s.color, "red") self.assertEqual(s.__dict__.keys(), ["color"]) + @test_support.requires_unicode def test_nounicode(self): a = array.array(self.typecode, self.example) self.assertRaises(ValueError, a.fromunicode, unicode('')) diff -r cf70f030a744 Lib/test/test_ast.py --- a/Lib/test/test_ast.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_ast.py Mon Jun 23 22:24:50 2014 +0300 @@ -368,6 +368,7 @@ ast2 = mod.loads(mod.dumps(ast, protocol)) self.assertEqual(to_tuple(ast2), to_tuple(ast)) + @test_support.requires_unicode def test_invalid_identitifer(self): m = ast.Module([ast.Expr(ast.Name(u"x", ast.Load()))]) ast.fix_missing_locations(m) diff -r cf70f030a744 Lib/test/test_bigmem.py --- a/Lib/test/test_bigmem.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_bigmem.py Mon Jun 23 22:24:50 2014 +0300 @@ -81,6 +81,7 @@ self.assertEqual(s.count('i'), 1) self.assertEqual(s.count('j'), 0) + @test_support.requires_unicode @bigmemtest(minsize=_2G + 2, memuse=3) def test_decode(self, size): s = '.' * size @@ -93,10 +94,12 @@ s = c * size self.assertEqual(len(s.encode(enc)), expectedsize) + @test_support.requires_unicode @bigmemtest(minsize=_2G + 2, memuse=3) def test_encode(self, size): return self.basic_encode_test(size, 'utf-8') + @test_support.requires_unicode @precisionbigmemtest(size=_4G // 6 + 2, memuse=2) def test_encode_raw_unicode_escape(self, size): try: @@ -104,6 +107,7 @@ except MemoryError: pass # acceptable on 32-bit + @test_support.requires_unicode @precisionbigmemtest(size=_4G // 5 + 70, memuse=3) def test_encode_utf7(self, size): try: @@ -111,6 +115,7 @@ except MemoryError: pass # acceptable on 32-bit + @test_support.requires_unicode @precisionbigmemtest(size=_4G // 4 + 5, memuse=6) def test_encode_utf32(self, size): try: @@ -118,15 +123,17 @@ except MemoryError: pass # acceptable on 32-bit + @test_support.requires_unicode @precisionbigmemtest(size=_2G-1, memuse=4) def test_decodeascii(self, size): return self.basic_encode_test(size, 'ascii', c='A') + @test_support.requires_unicode @precisionbigmemtest(size=_4G // 5, memuse=6+2) def test_unicode_repr_oflw(self, size): self.skipTest("test crashes - see issue #14904") try: - s = u"\uAAAA"*size + s = unichr(0xAAAA)*size r = repr(s) except MemoryError: pass # acceptable on 32-bit @@ -516,9 +523,10 @@ self.assertEqual(s.count('\\'), size) self.assertEqual(s.count('0'), size * 2) + @test_support.requires_unicode @bigmemtest(minsize=2**32 // 5, memuse=6+2) def test_unicode_repr(self, size): - s = u"\uAAAA" * size + s = unichr(0xAAAA) * size self.assertTrue(len(repr(s)) > size) # This test is meaningful even with size < 2G, as long as the diff -r cf70f030a744 Lib/test/test_builtin.py --- a/Lib/test/test_builtin.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_builtin.py Mon Jun 23 22:24:50 2014 +0300 @@ -3,7 +3,7 @@ import platform import unittest from test.test_support import fcmp, have_unicode, TESTFN, unlink, \ - run_unittest, check_py3k_warnings + run_unittest, check_py3k_warnings, requires_unicode import warnings from operator import neg @@ -1365,18 +1365,18 @@ self.assertEqual(type(''), type('123')) self.assertNotEqual(type(''), type(())) + @requires_unicode def test_unichr(self): - if have_unicode: - self.assertEqual(unichr(32), unicode(' ')) - self.assertEqual(unichr(65), unicode('A')) - self.assertEqual(unichr(97), unicode('a')) - self.assertEqual( - unichr(sys.maxunicode), - unicode('\\U%08x' % (sys.maxunicode), 'unicode-escape') - ) - self.assertRaises(ValueError, unichr, sys.maxunicode+1) - self.assertRaises(TypeError, unichr) - self.assertRaises((OverflowError, ValueError), unichr, 2**32) + self.assertEqual(unichr(32), unicode(' ')) + self.assertEqual(unichr(65), unicode('A')) + self.assertEqual(unichr(97), unicode('a')) + self.assertEqual( + unichr(sys.maxunicode), + unicode('\\U%08x' % (sys.maxunicode,), 'unicode-escape') + ) + self.assertRaises(ValueError, unichr, sys.maxunicode+1) + self.assertRaises(TypeError, unichr) + self.assertRaises((OverflowError, ValueError), unichr, 2**32) # We don't want self in vars(), so these are static methods diff -r cf70f030a744 Lib/test/test_bytes.py --- a/Lib/test/test_bytes.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_bytes.py Mon Jun 23 22:24:50 2014 +0300 @@ -79,7 +79,8 @@ self.assertEqual(self.type2test(5), b'\x00\x00\x00\x00\x00') self.assertRaises(ValueError, self.type2test, -1) - self.assertEqual(self.type2test('0', 'ascii'), b'0') + if test.test_support.have_unicode: + self.assertEqual(self.type2test('0', 'ascii'), b'0') self.assertEqual(self.type2test(b'0'), b'0') self.assertRaises(OverflowError, self.type2test, sys.maxsize + 1) @@ -128,6 +129,7 @@ self.assertFalse(b3 < b2) self.assertFalse(b3 <= b2) + @test.test_support.requires_unicode @check_bytes_warnings def test_compare_to_str(self): # Byte comparisons with unicode should always fail! @@ -177,8 +179,9 @@ for step in indices[1:]: self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step])) + @test.test_support.requires_unicode def test_encoding(self): - sample = u"Hello world\n\u1234\u5678\u9abc\udef0" + sample = test.test_support.u(r"Hello world\n\u1234\u5678\u9abc\udef0") for enc in ("utf8", "utf16"): b = self.type2test(sample, enc) self.assertEqual(b, self.type2test(sample.encode(enc))) @@ -186,8 +189,9 @@ b = self.type2test(sample, "latin1", "ignore") self.assertEqual(b, self.type2test(sample[:-4], "utf-8")) + @test.test_support.requires_unicode def test_decode(self): - sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0" + sample = test.test_support.u(r"Hello world\n\u1234\u5678\u9abc\def0\def0") for enc in ("utf8", "utf16"): b = self.type2test(sample, enc) self.assertEqual(b.decode(enc), sample) @@ -212,8 +216,9 @@ self.assertEqual(b1 + b2, b"abcdef") self.assertEqual(b1 + bytes(b"def"), b"abcdef") self.assertEqual(bytes(b"def") + b1, b"defabc") - self.assertRaises(TypeError, lambda: b1 + u"def") - self.assertRaises(TypeError, lambda: u"abc" + b2) + if test.test_support.have_unicode: + self.assertRaises(TypeError, lambda: b1 + u"def") + self.assertRaises(TypeError, lambda: u"abc" + b2) def test_repeat(self): for b in b"abc", self.type2test(b"abc"): @@ -238,7 +243,8 @@ self.assertRaises(ValueError, lambda: -1 in b) self.assertRaises(TypeError, lambda: None in b) self.assertRaises(TypeError, lambda: float(ord('a')) in b) - self.assertRaises(TypeError, lambda: u"a" in b) + if test.test_support.have_unicode: + self.assertRaises(TypeError, lambda: u"a" in b) for f in bytes, bytearray: self.assertIn(f(b""), b) self.assertIn(f(b"a"), b) @@ -359,6 +365,7 @@ self.assertEqual(self.type2test(b' a bb c ').split(None, 2), [b'a', b'bb', b'c ']) self.assertEqual(self.type2test(b' a bb c ').split(None, 3), [b'a', b'bb', b'c']) + @test.test_support.requires_unicode def test_split_string_error(self): self.assertRaises(TypeError, self.type2test(b'a b').split, u' ') @@ -384,6 +391,7 @@ self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 2), [b' a', b'bb', b'c']) self.assertEqual(self.type2test(b' a bb c ').rsplit(None, 3), [b'a', b'bb', b'c']) + @test.test_support.requires_unicode def test_rsplit_string_error(self): self.assertRaises(TypeError, self.type2test(b'a b').rsplit, u' ') @@ -446,6 +454,7 @@ self.assertEqual(self.type2test(b'abc').lstrip(memoryview(b'ac')), b'bc') self.assertEqual(self.type2test(b'abc').rstrip(memoryview(b'ac')), b'ab') + @test.test_support.requires_unicode def test_strip_string_error(self): self.assertRaises(TypeError, self.type2test(b'abc').strip, u'b') self.assertRaises(TypeError, self.type2test(b'abc').lstrip, u'b') @@ -647,10 +656,15 @@ b[1:] = list(b'uuuu') # this works only on Python2 self.assertEqual(b, bytearray([102, 117, 117, 117, 117])) - for elem in [5, -5, 0, long(10e20), u'str', 2.3, [u'a', u'b'], [[]]]: + for elem in [5, -5, 0, long(10e20), 2.3, [[]]]: with self.assertRaises(TypeError): b[3:4] = elem + if test.test_support.have_unicode: + for elem in [u'str', [u'a', u'b']]: + with self.assertRaises(TypeError): + b[3:4] = elem + for elem in [[254, 255, 256], [-256, 9000]]: with self.assertRaises(ValueError): b[3:4] = elem @@ -691,12 +705,13 @@ self.assertTrue(b is b1) b += b"xyz" self.assertEqual(b, b"abcdefxyz") - try: - b += u"" - except TypeError: - pass - else: - self.fail("bytes += unicode didn't raise TypeError") + if test.test_support.have_unicode: + try: + b += u"" + except TypeError: + pass + else: + self.fail("bytes += unicode didn't raise TypeError") def test_irepeat(self): b = bytearray(b"abc") @@ -762,12 +777,14 @@ self.assertEqual(b, b'heo') self.assertRaises(ValueError, lambda: b.remove(ord('l'))) self.assertRaises(ValueError, lambda: b.remove(400)) - self.assertRaises(TypeError, lambda: b.remove(u'e')) + if test.test_support.have_unicode: + self.assertRaises(TypeError, lambda: b.remove(u'e')) # remove first and last b.remove(ord('o')) b.remove(ord('h')) self.assertEqual(b, b'e') - self.assertRaises(TypeError, lambda: b.remove(u'e')) + if test.test_support.have_unicode: + self.assertRaises(TypeError, lambda: b.remove(u'e')) b.remove(Indexable(ord('e'))) self.assertEqual(b, b'') @@ -792,7 +809,8 @@ b = bytearray() b.append(ord('A')) self.assertEqual(len(b), 1) - self.assertRaises(TypeError, lambda: b.append(u'o')) + if test.test_support.have_unicode: + self.assertRaises(TypeError, lambda: b.append(u'o')) b = bytearray() b.append(Indexable(ord('A'))) self.assertEqual(b, b'A') @@ -879,6 +897,9 @@ # PyByteArray_AS_STRING() C macro. self.assertRaises(ValueError, int, bytearray(b'')) +# XXX pickling needs Latin1 encoding +ByteArrayTest.test_pickling = test.test_support.requires_unicode(ByteArrayTest.test_pickling) + class AssortedBytesTest(unittest.TestCase): # @@ -1025,7 +1046,7 @@ class FixedStringTest(test.string_tests.BaseTest): def fixtype(self, obj): - if isinstance(obj, str): + if test.test_support.have_unicode and isinstance(obj, unicode): return obj.encode("utf-8") return super(FixedStringTest, self).fixtype(obj) @@ -1048,6 +1069,33 @@ class ByteArrayAsStringTest(FixedStringTest): type2test = bytearray + def test_ljust(self): + self.checkequal('abc ', 'abc', 'ljust', 10) + self.checkequal('abc ', 'abc', 'ljust', 6) + self.checkequal('abc', 'abc', 'ljust', 3) + self.checkequal('abc', 'abc', 'ljust', 2) + # XXX fillchar must be bytes, not bytearray + #self.checkequal('abc*******', 'abc', 'ljust', 10, '*') + self.checkraises(TypeError, 'abc', 'ljust') + + def test_rjust(self): + self.checkequal(' abc', 'abc', 'rjust', 10) + self.checkequal(' abc', 'abc', 'rjust', 6) + self.checkequal('abc', 'abc', 'rjust', 3) + self.checkequal('abc', 'abc', 'rjust', 2) + # XXX fillchar must be bytes, not bytearray + #self.checkequal('*******abc', 'abc', 'rjust', 10, '*') + self.checkraises(TypeError, 'abc', 'rjust') + + def test_center(self): + self.checkequal(' abc ', 'abc', 'center', 10) + self.checkequal(' abc ', 'abc', 'center', 6) + self.checkequal('abc', 'abc', 'center', 3) + self.checkequal('abc', 'abc', 'center', 2) + # XXX fillchar must be bytes, not bytearray + #self.checkequal('***abc****', 'abc', 'center', 10, '*') + self.checkraises(TypeError, 'abc', 'center') + class ByteArraySubclass(bytearray): pass @@ -1091,6 +1139,8 @@ s3 = s1.join([b"abcd"]) self.assertTrue(type(s3) is bytearray) + # XXX pickling needs Latin1 encoding + @test.test_support.requires_unicode def test_pickle(self): a = ByteArraySubclass(b"abcd") a.x = 10 @@ -1104,6 +1154,7 @@ self.assertEqual(type(a), type(b)) self.assertEqual(type(a.y), type(b.y)) + @test.test_support.requires_unicode def test_copy(self): a = ByteArraySubclass(b"abcd") a.x = 10 diff -r cf70f030a744 Lib/test/test_calendar.py --- a/Lib/test/test_calendar.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_calendar.py Mon Jun 23 22:24:50 2014 +0300 @@ -195,6 +195,7 @@ result_2004_text.strip() ) + @test_support.requires_unicode def test_output_htmlcalendar(self): self.assertEqual( calendar.HTMLCalendar().formatyearpage(2004).strip(), @@ -250,6 +251,7 @@ # verify it "acts like a sequence" in two forms of iteration self.assertEqual(value[::-1], list(reversed(value))) + @test_support.requires_unicode def test_localecalendars(self): # ensure that Locale{Text,HTML}Calendar resets the locale properly # (it is still not thread-safe though) diff -r cf70f030a744 Lib/test/test_collections.py --- a/Lib/test/test_collections.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_collections.py Mon Jun 23 22:24:50 2014 +0300 @@ -382,8 +382,9 @@ self.validate_isinstance(Iterable, '__iter__') def test_Iterator(self): - non_samples = [None, 42, 3.14, 1j, "".encode('ascii'), "", (), [], - {}, set()] + non_samples = [None, 42, 3.14, 1j, "", (), [], {}, set()] + if test_support.have_unicode: + non_samples.append(u"") for x in non_samples: self.assertNotIsInstance(x, Iterator) self.assertFalse(issubclass(type(x), Iterator), repr(type(x))) @@ -450,10 +451,12 @@ def test_Callable(self): non_samples = [None, 42, 3.14, 1j, - "", "".encode('ascii'), (), [], {}, set(), + "", (), [], {}, set(), (lambda: (yield))(), (x for x in []), ] + if test_support.have_unicode: + non_samples.append(u"") for x in non_samples: self.assertNotIsInstance(x, Callable) self.assertFalse(issubclass(type(x), Callable), repr(type(x))) diff -r cf70f030a744 Lib/test/test_compile.py --- a/Lib/test/test_compile.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_compile.py Mon Jun 23 22:24:50 2014 +0300 @@ -391,6 +391,7 @@ l = lambda: "foo" self.assertIsNone(l.__doc__) + @test_support.requires_unicode def test_unicode_encoding(self): code = u"# -*- coding: utf-8 -*-\npass\n" self.assertRaises(SyntaxError, compile, code, "tmp", "exec") diff -r cf70f030a744 Lib/test/test_copy.py --- a/Lib/test/test_copy.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_copy.py Mon Jun 23 22:24:50 2014 +0300 @@ -83,8 +83,10 @@ def f(): pass tests = [None, 42, 2L**100, 3.14, True, False, 1j, - "hello", u"hello\u1234", f.func_code, + "hello", f.func_code, NewStyle, xrange(10), Classic, max] + if test_support.have_unicode: + tests.append(u"hello" + unichr(0x1234)) for x in tests: self.assertTrue(copy.copy(x) is x, repr(x)) @@ -256,8 +258,10 @@ def f(): pass tests = [None, 42, 2L**100, 3.14, True, False, 1j, - "hello", u"hello\u1234", f.func_code, + "hello", f.func_code, NewStyle, xrange(10), Classic, max] + if test_support.have_unicode: + tests.append(u"hello" + unichr(0x1234)) for x in tests: self.assertTrue(copy.deepcopy(x) is x, repr(x)) diff -r cf70f030a744 Lib/test/test_csv.py --- a/Lib/test/test_csv.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_csv.py Mon Jun 23 22:24:50 2014 +0300 @@ -918,11 +918,12 @@ self.assertEqual(str(cm.exception), '"delimiter" must be an 1-character string') - mydialect.delimiter = u"," - with self.assertRaises(csv.Error) as cm: - mydialect() - self.assertEqual(str(cm.exception), - '"delimiter" must be string, not unicode') + if test_support.have_unicode: + mydialect.delimiter = u"," + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"delimiter" must be string, not unicode') mydialect.delimiter = 4 with self.assertRaises(csv.Error) as cm: diff -r cf70f030a744 Lib/test/test_descr.py --- a/Lib/test/test_descr.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_descr.py Mon Jun 23 22:24:50 2014 +0300 @@ -1730,7 +1730,6 @@ # only listing the ones I can remember outside of typeobject.c, since it # does it right. specials = [ - ("__unicode__", unicode, hello, set(), {}), ("__reversed__", reversed, empty_seq, set(), {}), ("__length_hint__", list, zero, set(), {"__iter__" : iden, "next" : stop}), @@ -1746,6 +1745,8 @@ ("__format__", format, format_impl, set(), {}), ("__dir__", dir, empty_seq, set(), {}), ] + if test_support.have_unicode: + specials.append(("__unicode__", unicode, hello, set(), {})) class Checker(object): def __getattr__(self, attr, test=self): @@ -2495,9 +2496,9 @@ else: self.assertEqual("super shouldn't accept keyword args") - def test_basic_inheritance(self): - # Testing inheritance from basic types... - + # Testing inheritance from basic types... + + def test_int_inheritance(self): class hexint(int): def __repr__(self): return hex(self) @@ -2518,6 +2519,7 @@ self.assertIs((hexint(0) << 12).__class__, int) self.assertIs((hexint(0) >> 12).__class__, int) + def test_long_inheritance(self): class octlong(long): __slots__ = [] def __str__(self): @@ -2567,6 +2569,7 @@ self.assertEqual(a.__dict__, {}) self.assertEqual(long(a), -1) # self.assertTrue PyNumber_Long() copies the sign bit + def test_float_inheritance(self): class precfloat(float): __slots__ = ['prec'] def __init__(self, value=0.0, prec=12): @@ -2581,6 +2584,7 @@ self.assertEqual(hash(a), hash(12345.0)) self.assertIs((+a).__class__, float) + def test_complex_inheritance(self): class madcomplex(complex): def __repr__(self): return "%.17gj%+.17g" % (self.imag, self.real) @@ -2607,6 +2611,7 @@ self.assertEqual((a / 1).__class__, complex) self.assertEqual(a / 1, base) + def test_tuple_inheritance(self): class madtuple(tuple): _rev = None def rev(self): @@ -2642,6 +2647,7 @@ self.assertIs((a * 2).__class__, tuple) self.assertIs(a[:].__class__, tuple) + def test_str_inheritance(self): class madstring(str): _rev = None def rev(self): @@ -2709,6 +2715,8 @@ self.assertIs(s.lower().__class__, str) self.assertEqual(s.lower(), base) + @test_support.requires_unicode + def test_unicode_inheritance(self): class madunicode(unicode): _rev = None def rev(self): @@ -2768,6 +2776,7 @@ self.assertIs(u[0:0].__class__, unicode) self.assertEqual(u[0:0], u"") + def test_list_inheritance(self): class sublist(list): pass a = sublist(range(5)) @@ -2790,6 +2799,7 @@ self.assertEqual(a[-1], 9) self.assertEqual(a[:5], range(5)) + def test_file_inheritance(self): class CountedInput(file): """Counts lines read by self.readline(). @@ -2840,13 +2850,16 @@ self.assertEqual(long(x=3), 3L) self.assertEqual(complex(imag=42, real=666), complex(666, 42)) self.assertEqual(str(object=500), '500') - self.assertEqual(unicode(string='abc', errors='strict'), u'abc') + if test_support.have_unicode: + self.assertEqual(unicode(string='abc', errors='strict'), u'abc') self.assertEqual(tuple(sequence=range(3)), (0, 1, 2)) self.assertEqual(list(sequence=(0, 1, 2)), range(3)) # note: as of Python 2.3, dict() no longer has an "items" keyword arg - for constructor in (int, float, long, complex, str, unicode, - tuple, list, file): + constructors = [int, float, long, complex, str, tuple, list, file] + if test_support.have_unicode: + constructors.append(unicode) + for constructor in constructors: try: constructor(bogus_keyword_arg=1) except TypeError: @@ -3555,13 +3568,14 @@ # PyArg_ParseTuple 't#' code. self.assertEqual(binascii.b2a_hex(m), binascii.b2a_hex(base)) - # It's not clear that unicode will continue to support the character - # buffer interface, and this test will fail if that's taken away. - class MyUni(unicode): - pass - base = u'abc' - m = MyUni(base) - self.assertEqual(binascii.b2a_hex(m), binascii.b2a_hex(base)) + if test_support.have_unicode: + # It's not clear that unicode will continue to support the character + # buffer interface, and this test will fail if that's taken away. + class MyUni(unicode): + pass + base = u'abc' + m = MyUni(base) + self.assertEqual(binascii.b2a_hex(m), binascii.b2a_hex(base)) class MyInt(int): pass diff -r cf70f030a744 Lib/test/test_exceptions.py --- a/Lib/test/test_exceptions.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_exceptions.py Mon Jun 23 22:24:50 2014 +0300 @@ -6,7 +6,7 @@ import pickle, cPickle from test.test_support import (TESTFN, unlink, run_unittest, captured_output, - check_warnings, cpython_only) + check_warnings, cpython_only, have_unicode, requires_unicode) from test.test_pep352 import ignore_deprecation_warnings # XXX This is not really enough, each *operation* should be tested! @@ -252,21 +252,24 @@ 'print_file_and_line' : None, 'msg' : 'msgStr', 'filename' : None, 'lineno' : None, 'offset' : None}), (UnicodeError, (), {'message' : '', 'args' : (),}), - (UnicodeEncodeError, ('ascii', u'a', 0, 1, 'ordinal not in range'), - {'message' : '', 'args' : ('ascii', u'a', 0, 1, - 'ordinal not in range'), - 'encoding' : 'ascii', 'object' : u'a', - 'start' : 0, 'reason' : 'ordinal not in range'}), - (UnicodeDecodeError, ('ascii', '\xff', 0, 1, 'ordinal not in range'), - {'message' : '', 'args' : ('ascii', '\xff', 0, 1, - 'ordinal not in range'), - 'encoding' : 'ascii', 'object' : '\xff', - 'start' : 0, 'reason' : 'ordinal not in range'}), - (UnicodeTranslateError, (u"\u3042", 0, 1, "ouch"), - {'message' : '', 'args' : (u'\u3042', 0, 1, 'ouch'), - 'object' : u'\u3042', 'reason' : 'ouch', - 'start' : 0, 'end' : 1}), ] + if have_unicode: + exceptionList += [ + (UnicodeEncodeError, ('ascii', u'a', 0, 1, 'ordinal not in range'), + {'message' : '', 'args' : ('ascii', u'a', 0, 1, + 'ordinal not in range'), + 'encoding' : 'ascii', 'object' : u'a', + 'start' : 0, 'reason' : 'ordinal not in range'}), + (UnicodeDecodeError, ('ascii', '\xff', 0, 1, 'ordinal not in range'), + {'message' : '', 'args' : ('ascii', '\xff', 0, 1, + 'ordinal not in range'), + 'encoding' : 'ascii', 'object' : '\xff', + 'start' : 0, 'reason' : 'ordinal not in range'}), + (UnicodeTranslateError, (unichr(0x3042), 0, 1, "ouch"), + {'message' : '', 'args' : (unichr(0x3042), 0, 1, 'ouch'), + 'object' : unichr(0x3042), 'reason' : 'ouch', + 'start' : 0, 'end' : 1}), + ] try: exceptionList.append( (WindowsError, (1, 'strErrorStr', 'filenameStr'), @@ -389,11 +392,13 @@ # Make sure both instances and classes have a str and unicode # representation. self.assertTrue(str(Exception)) - self.assertTrue(unicode(Exception)) self.assertTrue(str(Exception('a'))) - self.assertTrue(unicode(Exception(u'a'))) - self.assertTrue(unicode(Exception(u'\xe1'))) + if have_unicode: + self.assertTrue(unicode(Exception)) + self.assertTrue(unicode(Exception(u'a'))) + self.assertTrue(unicode(Exception(u'\xe1'))) + @requires_unicode def testUnicodeChangeAttributes(self): # See issue 7309. This was a crasher. @@ -431,6 +436,7 @@ u.start = 1000 self.assertEqual(str(u), "can't translate characters in position 1000-4: 965230951443685724997") + @requires_unicode def test_unicode_errors_no_object(self): # See issue #21134. klasses = UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError @@ -509,6 +515,7 @@ return self.msg +@requires_unicode class TestSameStrAndUnicodeMsg(unittest.TestCase): """unicode(err) should return the same message of str(err). See #6108""" @@ -528,7 +535,7 @@ KeyError('both should have the same quotes'), UnicodeDecodeError('ascii', '\xc3\xa0', 0, 1, 'ordinal not in range(128)'), - UnicodeEncodeError('ascii', u'\u1234', 0, 1, + UnicodeEncodeError('ascii', unichr(0x1234), 0, 1, 'ordinal not in range(128)') ] for exception in exceptions: diff -r cf70f030a744 Lib/test/test_fileinput.py --- a/Lib/test/test_fileinput.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_fileinput.py Mon Jun 23 22:24:50 2014 +0300 @@ -6,6 +6,7 @@ import unittest from test.test_support import verbose, TESTFN, run_unittest from test.test_support import unlink as safe_unlink +from test.test_support import have_unicode, requires_unicode, u import sys, re from StringIO import StringIO from fileinput import FileInput, hook_encoded @@ -154,6 +155,7 @@ finally: remove_tempfiles(t1, t2) + @requires_unicode def test_unicode_filenames(self): try: t1 = writeTmp(1, ["A\nB"]) @@ -210,13 +212,14 @@ self.fail("FileInput should check openhook for being callable") except ValueError: pass - try: - t1 = writeTmp(1, ["A\nB"], mode="wb") - fi = FileInput(files=t1, openhook=hook_encoded("rot13")) - lines = list(fi) - self.assertEqual(lines, ["N\n", "O"]) - finally: - remove_tempfiles(t1) + if have_unicode: + try: + t1 = writeTmp(1, ["A\nB"], mode="wb") + fi = FileInput(files=t1, openhook=hook_encoded("rot13")) + lines = list(fi) + self.assertEqual(lines, ["N\n", "O"]) + finally: + remove_tempfiles(t1) def test_readline(self): with open(TESTFN, 'wb') as f: @@ -232,12 +235,14 @@ # file being read when it shouldn't have been. self.assertEqual(fi.readline(), u'A\n') self.assertEqual(fi.readline(), u'B\r\n') - self.assertEqual(fi.readline(), u'C\r') - with self.assertRaises(UnicodeDecodeError): - # Read to the end of file. - list(fi) + if have_unicode: + self.assertEqual(fi.readline(), u'C\r') + with self.assertRaises(UnicodeDecodeError): + # Read to the end of file. + list(fi) fi.close() +@requires_unicode class Test_hook_encoded(unittest.TestCase): """Unit tests for fileinput.hook_encoded()""" @@ -254,10 +259,10 @@ fi.close() self.assertEqual(lines, expected_lines) - check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) - check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) - check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) - check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac']) + check('r', [u'A\n', u'B\r\n', u'C\r', u(r'D\u20ac')]) + check('rU', [u'A\n', u'B\r\n', u'C\r', u(r'D\u20ac')]) + check('U', [u'A\n', u'B\r\n', u'C\r', u(r'D\u20ac')]) + check('rb', [u'A\n', u'B\r\n', u'C\r', u(r'D\u20ac')]) def test_main(): run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded) diff -r cf70f030a744 Lib/test/test_float.py --- a/Lib/test/test_float.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_float.py Mon Jun 23 22:24:50 2014 +0300 @@ -50,7 +50,8 @@ # extra long strings should no longer be a problem # (in 2.6, long unicode inputs to float raised ValueError) float('.' + '1'*1000) - float(unicode('.' + '1'*1000)) + if test_support.have_unicode: + float(unicode('.' + '1'*1000)) def check_conversion_to_int(self, x): """Check that int(x) has the correct value and type, for a float x.""" @@ -152,18 +153,19 @@ def __float__(self): return float(str(self)) + 1 - class FooUnicode(unicode): - def __float__(self): - return float(unicode(self)) + 1 - self.assertAlmostEqual(float(Foo0()), 42.) self.assertAlmostEqual(float(Foo1()), 42.) self.assertAlmostEqual(float(Foo2()), 42.) self.assertAlmostEqual(float(Foo3(21)), 42.) self.assertRaises(TypeError, float, Foo4(42)) - self.assertAlmostEqual(float(FooUnicode('8')), 9.) self.assertAlmostEqual(float(FooStr('8')), 9.) + if test_support.have_unicode: + class FooUnicode(unicode): + def __float__(self): + return float(unicode(self)) + 1 + self.assertAlmostEqual(float(FooUnicode('8')), 9.) + def test_is_integer(self): self.assertFalse((1.1).is_integer()) self.assertTrue((1.).is_integer()) @@ -1035,13 +1037,16 @@ '0x.p0', # no hex digits before or after point '0x1,p0', # wrong decimal point character '0x1pa', - u'0x1p\uff10', # fullwidth Unicode digits - u'\uff10x1p0', - u'0x\uff11p0', - u'0x1.\uff10p0', '0x1p0 \n 0x2p0', '0x1p0\0 0x1p0', # embedded null byte is not end of string ] + if test_support.have_unicode: + invalid_inputs += [ + test_support.u(r'0x1p\uff10'), # fullwidth Unicode digits + test_support.u(r'\uff10x1p0'), + test_support.u(r'0x\uff11p0'), + test_support.u(r'0x1.\uff10p0'), + ] for x in invalid_inputs: try: result = fromHex(x) diff -r cf70f030a744 Lib/test/test_future.py --- a/Lib/test/test_future.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_future.py Mon Jun 23 22:24:50 2014 +0300 @@ -109,7 +109,8 @@ def test_unicode_literals_exec(self): scope = {} exec "from __future__ import unicode_literals; x = ''" in scope - self.assertIsInstance(scope["x"], unicode) + if test_support.have_unicode: + self.assertIsInstance(scope["x"], unicode) def test_main(): diff -r cf70f030a744 Lib/test/test_future4.py --- a/Lib/test/test_future4.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_future4.py Mon Jun 23 22:24:50 2014 +0300 @@ -3,6 +3,7 @@ import unittest from test import test_support +@test_support.requires_unicode class TestFuture(unittest.TestCase): def assertType(self, obj, typ): self.assertTrue(type(obj) is typ, diff -r cf70f030a744 Lib/test/test_future5.py --- a/Lib/test/test_future5.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_future5.py Mon Jun 23 22:24:50 2014 +0300 @@ -8,6 +8,7 @@ class TestMultipleFeatures(unittest.TestCase): + @test_support.requires_unicode def test_unicode_literals(self): self.assertIsInstance("", unicode) diff -r cf70f030a744 Lib/test/test_gdb.py --- a/Lib/test/test_gdb.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_gdb.py Mon Jun 23 22:24:50 2014 +0300 @@ -10,7 +10,7 @@ import unittest import sysconfig -from test.test_support import run_unittest, findfile +from test.test_support import run_unittest, findfile, requires_unicode, u try: gdb_version, _ = subprocess.Popen(["gdb", "--version"], @@ -261,6 +261,7 @@ self.assertGdbRepr((1,)) self.assertGdbRepr(('foo', 'bar', 'baz')) + @requires_unicode def test_unicode(self): 'Verify the pretty-printing of unicode values' # Test the empty unicode string: @@ -270,12 +271,12 @@ # Test printing a single character: # U+2620 SKULL AND CROSSBONES - self.assertGdbRepr(u'\u2620') + self.assertGdbRepr(u(r'\u2620')) # Test printing a Japanese unicode string # (I believe this reads "mojibake", using 3 characters from the CJK # Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE) - self.assertGdbRepr(u'\u6587\u5b57\u5316\u3051') + self.assertGdbRepr(u(r'\u6587\u5b57\u5316\u3051')) # Test a character outside the BMP: # U+1D121 MUSICAL SYMBOL C CLEF @@ -283,7 +284,7 @@ # UTF-8: 0xF0 0x9D 0x84 0xA1 # UTF-16: 0xD834 0xDD21 # This will only work on wide-unicode builds: - self.assertGdbRepr(u"\U0001D121") + self.assertGdbRepr(u(r"\U0001D121")) def test_sets(self): 'Verify the pretty-printing of sets' diff -r cf70f030a744 Lib/test/test_getargs.py --- a/Lib/test/test_getargs.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_getargs.py Mon Jun 23 22:24:50 2014 +0300 @@ -20,6 +20,7 @@ # If the encoding succeeds using the current default encoding, # this test will fail because it does not test the right part of the # PyArg_ParseTuple() implementation. + @test_support.requires_unicode def test_with_marshal(self): arg = unicode(r'\222', 'unicode-escape') self.assertRaises(UnicodeError, marshal.loads, arg) diff -r cf70f030a744 Lib/test/test_glob.py --- a/Lib/test/test_glob.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_glob.py Mon Jun 23 22:24:50 2014 +0300 @@ -4,7 +4,7 @@ import sys import unittest -from test.test_support import run_unittest, TESTFN +from test.test_support import run_unittest, TESTFN, have_unicode, requires_unicode def fsdecode(s): @@ -50,9 +50,10 @@ p = os.path.join(self.tempdir, pattern) res = glob.glob(p) self.assertEqual(list(glob.iglob(p)), res) - ures = [fsdecode(x) for x in res] - self.assertEqual(glob.glob(fsdecode(p)), ures) - self.assertEqual(list(glob.iglob(fsdecode(p))), ures) + if have_unicode: + ures = [fsdecode(x) for x in res] + self.assertEqual(glob.glob(fsdecode(p)), ures) + self.assertEqual(list(glob.iglob(fsdecode(p))), ures) return res def assertSequencesEqual_noorder(self, l1, l2): @@ -73,14 +74,15 @@ res = glob.glob(os.path.join(os.curdir, '*')) self.assertEqual({type(r) for r in res}, {str}) - # test return types are unicode, but only if os.listdir - # returns unicode filenames - tmp = os.listdir(fsdecode(os.curdir)) - if {type(x) for x in tmp} == {unicode}: - res = glob.glob(u'*') - self.assertEqual({type(r) for r in res}, {unicode}) - res = glob.glob(os.path.join(fsdecode(os.curdir), u'*')) - self.assertEqual({type(r) for r in res}, {unicode}) + if have_unicode: + # test return types are unicode, but only if os.listdir + # returns unicode filenames + tmp = os.listdir(fsdecode(os.curdir)) + if {type(x) for x in tmp} == {unicode}: + res = glob.glob(u'*') + self.assertEqual({type(r) for r in res}, {unicode}) + res = glob.glob(os.path.join(fsdecode(os.curdir), u'*')) + self.assertEqual({type(r) for r in res}, {unicode}) def test_glob_one_directory(self): eq = self.assertSequencesEqual_noorder @@ -128,6 +130,7 @@ {self.norm('aaa') + os.sep, self.norm('aab') + os.sep}, ]) + @requires_unicode def test_glob_unicode_directory_with_trailing_slash(self): # Same as test_glob_directory_with_trailing_slash, but with an # unicode argument. diff -r cf70f030a744 Lib/test/test_gzip.py --- a/Lib/test/test_gzip.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_gzip.py Mon Jun 23 22:24:50 2014 +0300 @@ -233,7 +233,7 @@ # RFC 1952 specifies that this is the name of the input file, if any. # However, the gzip module defaults to storing the name of the output # file in this field. - expected = self.filename.encode('Latin-1') + '\x00' + expected = self.filename + '\x00' nameBytes = fRead.read(len(expected)) self.assertEqual(nameBytes, expected) diff -r cf70f030a744 Lib/test/test_hmac.py --- a/Lib/test/test_hmac.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_hmac.py Mon Jun 23 22:24:50 2014 +0300 @@ -315,10 +315,11 @@ self.assertRaises(TypeError, hmac.compare_digest, a, b) a, b = b"foobar", 200 self.assertRaises(TypeError, hmac.compare_digest, a, b) - a, b = u"foobar", b"foobar" - self.assertRaises(TypeError, hmac.compare_digest, a, b) - a, b = b"foobar", u"foobar" - self.assertRaises(TypeError, hmac.compare_digest, a, b) + if test_support.have_unicode: + a, b = u"foobar", b"foobar" + self.assertRaises(TypeError, hmac.compare_digest, a, b) + a, b = b"foobar", u"foobar" + self.assertRaises(TypeError, hmac.compare_digest, a, b) # Testing bytes of different lengths a, b = b"foobar", b"foo" @@ -378,10 +379,11 @@ self.assertFalse(hmac.compare_digest(a, b)) # Testing error cases - a, b = u"foobar", b"foobar" - self.assertRaises(TypeError, hmac.compare_digest, a, b) - a, b = b"foobar", u"foobar" - self.assertRaises(TypeError, hmac.compare_digest, a, b) + if test_support.have_unicode: + a, b = u"foobar", b"foobar" + self.assertRaises(TypeError, hmac.compare_digest, a, b) + a, b = b"foobar", u"foobar" + self.assertRaises(TypeError, hmac.compare_digest, a, b) a, b = b"foobar", 1 self.assertRaises(TypeError, hmac.compare_digest, a, b) a, b = 100, 200 diff -r cf70f030a744 Lib/test/test_imghdr.py --- a/Lib/test/test_imghdr.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_imghdr.py Mon Jun 23 22:24:50 2014 +0300 @@ -2,7 +2,7 @@ import io import sys import unittest -from test.test_support import findfile, TESTFN, unlink, run_unittest +from test.test_support import findfile, TESTFN, unlink, run_unittest, have_unicode TEST_FILES = ( ('python.png', 'png'), @@ -39,8 +39,9 @@ for filename, expected in TEST_FILES: filename = findfile(filename, subdir='imghdrdata') self.assertEqual(imghdr.what(filename), expected) - ufilename = filename.decode(sys.getfilesystemencoding()) - self.assertEqual(imghdr.what(ufilename), expected) + if have_unicode: + ufilename = filename.decode(sys.getfilesystemencoding()) + self.assertEqual(imghdr.what(ufilename), expected) with open(filename, 'rb') as stream: self.assertEqual(imghdr.what(stream), expected) with open(filename, 'rb') as stream: diff -r cf70f030a744 Lib/test/test_locale.py --- a/Lib/test/test_locale.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_locale.py Mon Jun 23 22:24:50 2014 +0300 @@ -1,4 +1,4 @@ -from test.test_support import run_unittest, verbose +from test.test_support import run_unittest, verbose, have_unicode import unittest import locale import sys @@ -328,7 +328,7 @@ self._test_format("%-10d", 4200, grouping=True, out='4 200'.ljust(10)) def test_currency(self): - euro = u'\u20ac'.encode('utf-8') + euro = '\xe2\x82\xac' # U+20AC in UTF-8 self._test_currency(50000, "50000,00 " + euro) self._test_currency(50000, "50 000,00 " + euro, grouping=True) # XXX is the trailing space a bug? @@ -447,14 +447,14 @@ def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() - if enc: + if enc and have_unicode: # If encoding non-empty, make sure it is valid codecs.lookup(enc) - if hasattr(locale, "strcoll"): - def test_strcoll_3303(self): - # test crasher from bug #3303 - self.assertRaises(TypeError, locale.strcoll, u"a", None) + @unittest.skipUnless(hasattr(locale, "strcoll"), 'needs locale.strcoll()') + def test_strcoll_3303(self): + # test crasher from bug #3303 + self.assertRaises(TypeError, locale.strcoll, u"a", None) def test_setlocale_category(self): locale.setlocale(locale.LC_ALL) diff -r cf70f030a744 Lib/test/test_marshal.py --- a/Lib/test/test_marshal.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_marshal.py Mon Jun 23 22:24:50 2014 +0300 @@ -269,7 +269,10 @@ self.assertRaises(ValueError, marshal.loads, invalid_string) LARGE_SIZE = 2**31 -character_size = 4 if sys.maxunicode > 0xFFFF else 2 +if test_support.have_unicode: + character_size = 4 if sys.maxunicode > 0xFFFF else 2 +else: + character_size = 1 pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 @unittest.skipIf(LARGE_SIZE > sys.maxsize, "test cannot run on 32-bit systems") diff -r cf70f030a744 Lib/test/test_memoryview.py --- a/Lib/test/test_memoryview.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_memoryview.py Mon Jun 23 22:24:50 2014 +0300 @@ -160,11 +160,12 @@ self.assertTrue(m[0:6] == m[:]) self.assertFalse(m[0:5] == m) - # Comparison with objects which don't support the buffer API - self.assertFalse(m == u"abcdef") - self.assertTrue(m != u"abcdef") - self.assertFalse(u"abcdef" == m) - self.assertTrue(u"abcdef" != m) + if test_support.have_unicode: + # Comparison with objects which don't support the buffer API + self.assertFalse(m == u"abcdef") + self.assertTrue(m != u"abcdef") + self.assertFalse(u"abcdef" == m) + self.assertTrue(u"abcdef" != m) # Unordered comparisons are unimplemented, and therefore give # arbitrary results (they raise a TypeError in py3k) diff -r cf70f030a744 Lib/test/test_minidom.py --- a/Lib/test/test_minidom.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_minidom.py Mon Jun 23 22:24:50 2014 +0300 @@ -2,7 +2,7 @@ import pickle from StringIO import StringIO -from test.test_support import verbose, run_unittest, findfile +from test.test_support import verbose, run_unittest, findfile, requires_unicode, u import unittest import xml.dom @@ -982,14 +982,15 @@ text3.parentNode is elm1, "testSAX2DOM - parents") doc.unlink() + @requires_unicode def testEncodings(self): doc = parseString('') - self.confirm(doc.toxml() == u'\u20ac' - and doc.toxml('utf-8') == - '\xe2\x82\xac' - and doc.toxml('iso-8859-15') == - '\xa4', - "testEncodings - encoding EURO SIGN") + self.assertEqual(doc.toxml(), + u(r'\u20ac')) + self.assertEqual(doc.toxml('utf-8'), + '\xe2\x82\xac') + self.assertEqual(doc.toxml('iso-8859-15'), + '\xa4') # Verify that character decoding errors raise exceptions instead # of crashing diff -r cf70f030a744 Lib/test/test_module.py --- a/Lib/test/test_module.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_module.py Mon Jun 23 22:24:50 2014 +0300 @@ -1,6 +1,6 @@ # Test the module type import unittest -from test.test_support import run_unittest, gc_collect +from test.test_support import run_unittest, gc_collect, requires_unicode, u import sys ModuleType = type(sys) @@ -34,17 +34,19 @@ self.assertEqual(foo.__dict__, {"__name__": "foo", "__doc__": "foodoc"}) + @requires_unicode def test_unicode_docstring(self): # Unicode docstring - foo = ModuleType("foo", u"foodoc\u1234") + doc = u(r"foodoc\u1234") + foo = ModuleType("foo", doc) self.assertEqual(foo.__name__, "foo") - self.assertEqual(foo.__doc__, u"foodoc\u1234") + self.assertEqual(foo.__doc__, doc) self.assertEqual(foo.__dict__, - {"__name__": "foo", "__doc__": u"foodoc\u1234"}) + {"__name__": "foo", "__doc__": doc}) def test_reinit(self): # Reinitialization should not replace the __dict__ - foo = ModuleType("foo", u"foodoc\u1234") + foo = ModuleType("foo", "bardoc") foo.bar = 42 d = foo.__dict__ foo.__init__("foo", "foodoc") diff -r cf70f030a744 Lib/test/test_normalization.py --- a/Lib/test/test_normalization.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_normalization.py Mon Jun 23 22:24:50 2014 +0300 @@ -92,7 +92,7 @@ def test_bug_834676(self): # Check for bug 834676 - normalize('NFC', u'\ud55c\uae00') + normalize('NFC', unichr(0xd55c) + unichr(0xae00)) def test_main(): diff -r cf70f030a744 Lib/test/test_os.py --- a/Lib/test/test_os.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_os.py Mon Jun 23 22:24:50 2014 +0300 @@ -56,6 +56,7 @@ os.closerange(first, first + 2) self.assertRaises(OSError, os.write, first, "a") + @test_support.requires_unicode @test_support.cpython_only def test_rename(self): path = unicode(test_support.TESTFN) diff -r cf70f030a744 Lib/test/test_parser.py --- a/Lib/test/test_parser.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_parser.py Mon Jun 23 22:24:50 2014 +0300 @@ -27,6 +27,7 @@ def check_expr(self, s): self.roundtrip(parser.expr, s) + @support.requires_unicode def test_flags_passed(self): # The unicode literals flags has to be passed from the paser to AST # generation. @@ -562,6 +563,7 @@ st = parser.suite('1 = 3 + 4') self.assertRaises(SyntaxError, parser.compilest, st) + @support.requires_unicode def test_compile_badunicode(self): st = parser.suite('a = u"\U12345678"') self.assertRaises(SyntaxError, parser.compilest, st) diff -r cf70f030a744 Lib/test/test_peepholer.py --- a/Lib/test/test_peepholer.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_peepholer.py Mon Jun 23 22:24:50 2014 +0300 @@ -1,6 +1,7 @@ import dis import sys from cStringIO import StringIO +import test.test_support import unittest def disassemble(func): @@ -137,6 +138,7 @@ asm = dis_single('a="x"*1000') self.assertIn('(1000)', asm) + @test.test_support.requires_unicode def test_binary_subscr_on_unicode(self): # unicode strings don't get optimized asm = dis_single('u"foo"[0]') diff -r cf70f030a744 Lib/test/test_pep352.py --- a/Lib/test/test_pep352.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_pep352.py Mon Jun 23 22:24:50 2014 +0300 @@ -2,7 +2,7 @@ import __builtin__ import exceptions import warnings -from test.test_support import run_unittest, check_warnings +from test.test_support import run_unittest, check_warnings, have_unicode import os import sys from platform import system as platform_system @@ -25,6 +25,11 @@ return func(*args, **kw) return wrapper +if have_unicode: + ignored_exceptions = set() +else: + ignored_exceptions = set(('UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeTranslateError')) + class ExceptionClassTests(unittest.TestCase): """Tests for anything relating to exception objects themselves (e.g., @@ -69,6 +74,8 @@ if '[' in exc_name: left_bracket = exc_name.index('[') exc_name = exc_name[:left_bracket-1] # cover space + if exc_name in ignored_exceptions: + continue try: exc = getattr(__builtin__, exc_name) except AttributeError: @@ -85,7 +92,7 @@ self.verify_instance_interface(exc()) except TypeError: pass - self.assertIn(exc_name, exc_set) + self.assertIn(exc_name, exc_set | ignored_exceptions) exc_set.discard(exc_name) last_exc = exc last_depth = depth @@ -93,8 +100,9 @@ inheritance_tree.close() self.assertEqual(len(exc_set), 0, "%s not accounted for" % exc_set) - interface_tests = ("length", "args", "message", "str", "unicode", "repr", - "indexing") + interface_tests = ["length", "args", "message", "str", "repr", "indexing"] + if have_unicode: + interface_tests.append("unicode") def interface_test_driver(self, results): for test_name, (given, expected) in zip(self.interface_tests, results): @@ -106,10 +114,12 @@ # Make sure interface works properly when given a single argument arg = "spam" exc = Exception(arg) - results = ([len(exc.args), 1], [exc.args[0], arg], [exc.message, arg], - [str(exc), str(arg)], [unicode(exc), unicode(arg)], + results = [[len(exc.args), 1], [exc.args[0], arg], [exc.message, arg], + [str(exc), str(arg)], [repr(exc), exc.__class__.__name__ + repr(exc.args)], - [exc[0], arg]) + [exc[0], arg]] + if have_unicode: + results.append([unicode(exc), unicode(arg)]) self.interface_test_driver(results) @ignore_deprecation_warnings @@ -118,21 +128,24 @@ arg_count = 3 args = tuple(range(arg_count)) exc = Exception(*args) - results = ([len(exc.args), arg_count], [exc.args, args], + results = [[len(exc.args), arg_count], [exc.args, args], [exc.message, ''], [str(exc), str(args)], - [unicode(exc), unicode(args)], [repr(exc), exc.__class__.__name__ + repr(exc.args)], - [exc[-1], args[-1]]) + [exc[-1], args[-1]]] + if have_unicode: + results.append([unicode(exc), unicode(args)]) self.interface_test_driver(results) @ignore_deprecation_warnings def test_interface_no_arg(self): # Make sure that with no args that interface is correct exc = Exception() - results = ([len(exc.args), 0], [exc.args, tuple()], + results = [[len(exc.args), 0], [exc.args, tuple()], [exc.message, ''], - [str(exc), ''], [unicode(exc), u''], - [repr(exc), exc.__class__.__name__ + '()'], [True, True]) + [str(exc), ''], + [repr(exc), exc.__class__.__name__ + '()'], [True, True]] + if have_unicode: + results.append([unicode(exc), u'']) self.interface_test_driver(results) diff -r cf70f030a744 Lib/test/test_print.py --- a/Lib/test/test_print.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_print.py Mon Jun 23 22:24:50 2014 +0300 @@ -106,6 +106,7 @@ self.assertRaises(TypeError, print, '', end=3) self.assertRaises(AttributeError, print, '', file='') + @test_support.requires_unicode def test_mixed_args(self): # If an unicode arg is passed, sep and end should be unicode, too. class Recorder(object): diff -r cf70f030a744 Lib/test/test_pyexpat.py --- a/Lib/test/test_pyexpat.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_pyexpat.py Mon Jun 23 22:24:50 2014 +0300 @@ -20,6 +20,7 @@ [0, 0], ] + @test_support.requires_unicode def test_returns_unicode(self): for x, y in self.set_get_pairs: self.parser.returns_unicode = x @@ -159,6 +160,7 @@ self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") self.assertEqual(op[16], "End element: 'root'") + @test_support.requires_unicode def test_unicode(self): # Try the parse again, this time producing Unicode output out = self.Outputter() @@ -477,7 +479,7 @@ self.assertRaises(ValueError, f, 0) def test_unchanged_size(self): - xml1 = ("%s" % ('a' * 512)) + xml1 = ("%s" % ('a' * 512)) xml2 = 'a'*512 + '' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler @@ -500,7 +502,7 @@ def test_disabling_buffer(self): - xml1 = "%s" % ('a' * 512) + xml1 = "%s" % ('a' * 512) xml2 = ('b' * 1024) xml3 = "%s" % ('c' * 1024) parser = expat.ParserCreate() @@ -538,7 +540,7 @@ self.n += 1 def small_buffer_test(self, buffer_len): - xml = "%s" % ('a' * buffer_len) + xml = "%s" % ('a' * buffer_len) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 1024 @@ -549,7 +551,7 @@ return self.n def test_change_size_1(self): - xml1 = "%s" % ('a' * 1024) + xml1 = "%s" % ('a' * 1024) xml2 = "aaa%s" % ('a' * 1025) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler @@ -565,7 +567,7 @@ self.assertEqual(self.n, 2) def test_change_size_2(self): - xml1 = "a%s" % ('a' * 1023) + xml1 = "a%s" % ('a' * 1023) xml2 = "aaa%s" % ('a' * 1025) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler diff -r cf70f030a744 Lib/test/test_re.py --- a/Lib/test/test_re.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_re.py Mon Jun 23 22:24:50 2014 +0300 @@ -1,6 +1,6 @@ from test.test_support import verbose, run_unittest, import_module from test.test_support import precisionbigmemtest, _2G, cpython_only -from test.test_support import captured_stdout +from test.test_support import captured_stdout, have_unicode, requires_unicode, u import re from re import Scanner import sre_constants @@ -86,6 +86,7 @@ self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 'abc\ndef\n') + @requires_unicode def test_bug_1140(self): # re.sub(x, y, u'') should return u'', not '', and # re.sub(x, y, '') should return '', not u''. @@ -376,10 +377,11 @@ "abcd abc bcd bx", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.LOCALE).group(1), "bx") - self.assertEqual(re.search(r"\b(b.)\b", - "abcd abc bcd bx", re.UNICODE).group(1), "bx") - self.assertEqual(re.search(r"\B(b.)\B", - "abc bcd bc abxd", re.UNICODE).group(1), "bx") + if have_unicode: + self.assertEqual(re.search(r"\b(b.)\b", + "abcd abc bcd bx", re.UNICODE).group(1), "bx") + self.assertEqual(re.search(r"\B(b.)\B", + "abc bcd bc abxd", re.UNICODE).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) @@ -423,13 +425,14 @@ # Can match around the whitespace. self.assertEqual(len(re.findall(r"\B", " ")), 2) + @requires_unicode def test_bigcharset(self): - self.assertEqual(re.match(u"([\u2222\u2223])", - u"\u2222").group(1), u"\u2222") - self.assertEqual(re.match(u"([\u2222\u2223])", - u"\u2222", re.UNICODE).group(1), u"\u2222") + self.assertEqual(re.match(u(r"([\u2222\u2223])"), + unichr(0x2222)).group(1), unichr(0x2222)) + self.assertEqual(re.match(u(r"([\u2222\u2223])"), + unichr(0x2222), re.UNICODE).group(1), unichr(0x2222)) r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255))) - self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01") + self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01)) def test_big_codesize(self): # Issue #1160 @@ -503,8 +506,9 @@ self.assertEqual(m.group(), match) self.assertEqual(m.span(), span) + @requires_unicode def test_re_escape(self): - alnum_chars = string.ascii_letters + string.digits + alnum_chars = unicode(string.ascii_letters + string.digits) p = u''.join(unichr(i) for i in range(256)) for c in p: if c in alnum_chars: @@ -517,7 +521,7 @@ self.assertMatch(re.escape(p), p) def test_re_escape_byte(self): - alnum_chars = (string.ascii_letters + string.digits).encode('ascii') + alnum_chars = string.ascii_letters + string.digits p = ''.join(chr(i) for i in range(256)) for b in p: if b in alnum_chars: @@ -529,20 +533,21 @@ self.assertMatch(re.escape(b), b) self.assertMatch(re.escape(p), p) + @requires_unicode def test_re_escape_non_ascii(self): - s = u'xxx\u2620\u2620\u2620xxx' + s = u(r'xxx\u2620\u2620\u2620xxx') s_escaped = re.escape(s) - self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') + self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx')) self.assertMatch(s_escaped, s) - self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, - u'x\u2620\u2620\u2620x', (2, 7), re.search) + self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s, + u(r'x\u2620\u2620\u2620x'), (2, 7), re.search) def test_re_escape_non_ascii_bytes(self): - b = u'y\u2620y\u2620y'.encode('utf-8') + b = b'y\xe2\x98\xa0y\xe2\x98\xa0y' b_escaped = re.escape(b) self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') self.assertMatch(b_escaped, b) - res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) + res = re.findall(re.escape(b'\xe2\x98\xa0'), b) self.assertEqual(len(res), 2) def test_pickling(self): @@ -621,8 +626,9 @@ # non-recursive scheme was implemented. self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) + @requires_unicode def test_bug_612074(self): - pat=u"["+re.escape(u"\u2039")+u"]" + pat=u"["+re.escape(unichr(0x2039))+u"]" self.assertEqual(re.compile(pat) and 1, 1) def test_stack_overflow(self): @@ -743,23 +749,24 @@ self.assertEqual(iter.next().span(), (4, 4)) self.assertRaises(StopIteration, iter.next) + @requires_unicode def test_bug_6561(self): # '\d' should match characters in Unicode category 'Nd' # (Number, Decimal Digit), but not those in 'Nl' (Number, # Letter) or 'No' (Number, Other). decimal_digits = [ - u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' - u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' - u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' + unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd' + unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd' + unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' ] for x in decimal_digits: self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) not_decimal_digits = [ - u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' - u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' - u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' - u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' + unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl' + unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' + unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No' + unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' ] for x in not_decimal_digits: self.assertIsNone(re.match('^\d$', x, re.UNICODE)) @@ -767,11 +774,15 @@ def test_empty_array(self): # SF buf 1647541 import array - for typecode in 'cbBuhHiIlLfd': + typecodes = 'cbBhHiIlLfd' + if have_unicode: + typecodes += 'u' + for typecode in typecodes: a = array.array(typecode) self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ()) + @requires_unicode def test_inline_flags(self): # Bug #1700 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow @@ -906,9 +917,10 @@ pattern = '.' + reps + mod + 'yz' self.assertEqual(re.compile(pattern, re.S).findall('xyz'), ['xyz'], msg=pattern) - pattern = pattern.encode() - self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'), - [b'xyz'], msg=pattern) + if have_unicode: + pattern = unicode(pattern) + self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'), + [u'xyz'], msg=pattern) def test_bug_2537(self): diff -r cf70f030a744 Lib/test/test_set.py --- a/Lib/test/test_set.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_set.py Mon Jun 23 22:24:50 2014 +0300 @@ -36,6 +36,10 @@ self.hash_count += 1 return int.__hash__(self) +testtypes = [set, frozenset, dict.fromkeys, str, list, tuple] +if test_support.have_unicode: + testtypes.append(unicode) + class TestJointOps(unittest.TestCase): # Tests common to both set and frozenset @@ -75,7 +79,7 @@ self.assertEqual(type(u), self.thetype) self.assertRaises(PassThru, self.s.union, check_pass_thru()) self.assertRaises(TypeError, self.s.union, [[]]) - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: self.assertEqual(self.thetype('abcba').union(C('cdc')), set('abcd')) self.assertEqual(self.thetype('abcba').union(C('efgfe')), set('abcefg')) self.assertEqual(self.thetype('abcba').union(C('ccb')), set('abc')) @@ -104,7 +108,7 @@ self.assertEqual(self.s, self.thetype(self.word)) self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.intersection, check_pass_thru()) - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: self.assertEqual(self.thetype('abcba').intersection(C('cdc')), set('cc')) self.assertEqual(self.thetype('abcba').intersection(C('efgfe')), set('')) self.assertEqual(self.thetype('abcba').intersection(C('ccb')), set('bc')) @@ -124,7 +128,7 @@ for larg in '', 'a', 'ab', 'abc', 'ababac', 'cdc', 'cc', 'efgfe', 'ccb', 'ef': s1 = self.thetype(larg) for rarg in '', 'a', 'ab', 'abc', 'ababac', 'cdc', 'cc', 'efgfe', 'ccb', 'ef': - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s2 = C(rarg) actual = s1.isdisjoint(s2) expected = f(s1, s2) @@ -150,7 +154,7 @@ self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.difference, check_pass_thru()) self.assertRaises(TypeError, self.s.difference, [[]]) - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: self.assertEqual(self.thetype('abcba').difference(C('cdc')), set('ab')) self.assertEqual(self.thetype('abcba').difference(C('efgfe')), set('abc')) self.assertEqual(self.thetype('abcba').difference(C('ccb')), set('a')) @@ -177,7 +181,7 @@ self.assertEqual(type(i), self.thetype) self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru()) self.assertRaises(TypeError, self.s.symmetric_difference, [[]]) - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: self.assertEqual(self.thetype('abcba').symmetric_difference(C('cdc')), set('abd')) self.assertEqual(self.thetype('abcba').symmetric_difference(C('efgfe')), set('abcefg')) self.assertEqual(self.thetype('abcba').symmetric_difference(C('ccb')), set('a')) @@ -436,13 +440,13 @@ self.assertRaises(PassThru, self.s.update, check_pass_thru()) self.assertRaises(TypeError, self.s.update, [[]]) for p, q in (('cdc', 'abcd'), ('efgfe', 'abcefg'), ('ccb', 'abc'), ('ef', 'abcef')): - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s = self.thetype('abcba') self.assertEqual(s.update(C(p)), None) self.assertEqual(s, set(q)) for p in ('cdc', 'efgfe', 'ccb', 'ef', 'abcda'): q = 'ahi' - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s = self.thetype('abcba') self.assertEqual(s.update(C(p), C(q)), None) self.assertEqual(s, set(s) | set(p) | set(q)) @@ -463,7 +467,7 @@ self.assertRaises(PassThru, self.s.intersection_update, check_pass_thru()) self.assertRaises(TypeError, self.s.intersection_update, [[]]) for p, q in (('cdc', 'c'), ('efgfe', ''), ('ccb', 'bc'), ('ef', '')): - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s = self.thetype('abcba') self.assertEqual(s.intersection_update(C(p)), None) self.assertEqual(s, set(q)) @@ -493,7 +497,7 @@ self.assertRaises(TypeError, self.s.difference_update, [[]]) self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]]) for p, q in (('cdc', 'ab'), ('efgfe', 'abc'), ('ccb', 'a'), ('ef', 'abc')): - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s = self.thetype('abcba') self.assertEqual(s.difference_update(C(p)), None) self.assertEqual(s, set(q)) @@ -529,7 +533,7 @@ self.assertRaises(PassThru, self.s.symmetric_difference_update, check_pass_thru()) self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]]) for p, q in (('cdc', 'abd'), ('efgfe', 'abcefg'), ('ccb', 'a'), ('ef', 'abcef')): - for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple: + for C in testtypes: s = self.thetype('abcba') self.assertEqual(s.symmetric_difference_update(C(p)), None) self.assertEqual(s, set(q)) diff -r cf70f030a744 Lib/test/test_socket.py --- a/Lib/test/test_socket.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_socket.py Mon Jun 23 22:24:50 2014 +0300 @@ -278,8 +278,9 @@ s.bind(('', 0)) sockname = s.getsockname() # 2 args - with self.assertRaises(UnicodeEncodeError): - s.sendto(u'\u2620', sockname) + if test_support.have_unicode: + with self.assertRaises(UnicodeEncodeError): + s.sendto(unichr(0x2620), sockname) with self.assertRaises(TypeError) as cm: s.sendto(5j, sockname) self.assertIn('not complex', str(cm.exception)) @@ -287,8 +288,9 @@ s.sendto('foo', None) self.assertIn('not NoneType', str(cm.exception)) # 3 args - with self.assertRaises(UnicodeEncodeError): - s.sendto(u'\u2620', 0, sockname) + if test_support.have_unicode: + with self.assertRaises(UnicodeEncodeError): + s.sendto(unichr(0x2620), 0, sockname) with self.assertRaises(TypeError) as cm: s.sendto(5j, 0, sockname) self.assertIn('not complex', str(cm.exception)) diff -r cf70f030a744 Lib/test/test_str.py --- a/Lib/test/test_str.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_str.py Mon Jun 23 22:24:50 2014 +0300 @@ -61,17 +61,18 @@ def __str__(self): return "foo" - class Foo3(object): - def __str__(self): - return u"foo" + if test_support.have_unicode: + class Foo3(object): + def __str__(self): + return u"foo" - class Foo4(unicode): - def __str__(self): - return u"foo" + class Foo4(unicode): + def __str__(self): + return u"foo" - class Foo5(str): - def __str__(self): - return u"foo" + class Foo5(str): + def __str__(self): + return u"foo" class Foo6(str): def __str__(self): @@ -80,11 +81,12 @@ def __unicode__(self): return u"foou" - class Foo7(unicode): - def __str__(self): - return "foos" - def __unicode__(self): - return u"foou" + if test_support.have_unicode: + class Foo7(unicode): + def __str__(self): + return "foos" + def __unicode__(self): + return u"foou" class Foo8(str): def __new__(cls, content=""): @@ -101,14 +103,17 @@ self.assertTrue(str(Foo0()).startswith("<")) # this is different from __unicode__ self.assertEqual(str(Foo1()), "foo") self.assertEqual(str(Foo2()), "foo") - self.assertEqual(str(Foo3()), "foo") - self.assertEqual(str(Foo4("bar")), "foo") - self.assertEqual(str(Foo5("bar")), "foo") + if test_support.have_unicode: + self.assertEqual(str(Foo3()), "foo") + self.assertEqual(str(Foo4("bar")), "foo") + self.assertEqual(str(Foo5("bar")), "foo") self.assertEqual(str(Foo6("bar")), "foos") - self.assertEqual(str(Foo7("bar")), "foos") + if test_support.have_unicode: + self.assertEqual(str(Foo7("bar")), "foos") self.assertEqual(str(Foo8("foo")), "foofoo") self.assertEqual(str(Foo9("foo")), "string") - self.assertEqual(unicode(Foo9("foo")), u"not unicode") + if test_support.have_unicode: + self.assertEqual(unicode(Foo9("foo")), u"not unicode") # This test only affects 32-bit platforms because expandtabs can only take # an int as the max value, not a 64-bit C long. If expandtabs is changed @@ -431,6 +436,7 @@ def test_buffer_is_readonly(self): self.assertRaises(TypeError, sys.stdin.readinto, b"") + @test_support.requires_unicode def test_encode_and_decode_kwargs(self): self.assertEqual('abcde'.encode('ascii', 'replace'), 'abcde'.encode('ascii', errors='replace')) @@ -442,10 +448,11 @@ 'Andr\202 x'.decode(encoding='ascii', errors='replace')) def test_startswith_endswith_errors(self): - with self.assertRaises(UnicodeDecodeError): - '\xff'.startswith(u'x') - with self.assertRaises(UnicodeDecodeError): - '\xff'.endswith(u'x') + if test_support.have_unicode: + with self.assertRaises(UnicodeDecodeError): + '\xff'.startswith(u'x') + with self.assertRaises(UnicodeDecodeError): + '\xff'.endswith(u'x') for meth in ('foo'.startswith, 'foo'.endswith): with self.assertRaises(TypeError) as cm: meth(['f']) diff -r cf70f030a744 Lib/test/test_sys.py --- a/Lib/test/test_sys.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_sys.py Mon Jun 23 22:24:50 2014 +0300 @@ -428,6 +428,7 @@ def test_clear_type_cache(self): sys._clear_type_cache() + @test.test_support.requires_unicode def test_ioencoding(self): import subprocess env = dict(os.environ) @@ -539,12 +540,13 @@ check(get_cell().func_code, size('4i8Pi3P')) # BaseException check(BaseException(), size('3P')) - # UnicodeEncodeError - check(UnicodeEncodeError("", u"", 0, 0, ""), size('5P2PP')) - # UnicodeDecodeError - check(UnicodeDecodeError("", "", 0, 0, ""), size('5P2PP')) - # UnicodeTranslateError - check(UnicodeTranslateError(u"", 0, 1, ""), size('5P2PP')) + if test.test_support.have_unicode: + # UnicodeEncodeError + check(UnicodeEncodeError("", u"", 0, 0, ""), size('5P2PP')) + # UnicodeDecodeError + check(UnicodeDecodeError("", "", 0, 0, ""), size('5P2PP')) + # UnicodeTranslateError + check(UnicodeTranslateError(u"", 0, 1, ""), size('5P2PP')) # method_descriptor (descriptor object) check(str.lower, size('2PP')) # classmethod_descriptor (descriptor object) @@ -574,10 +576,11 @@ check({}.iteritems(), size('P2PPP')) # ellipses check(Ellipsis, size('')) - # EncodingMap - import codecs, encodings.iso8859_3 - x = codecs.charmap_build(encodings.iso8859_3.decoding_table) - check(x, size('32B2iB')) + if test.test_support.have_unicode: + # EncodingMap + import codecs, encodings.iso8859_3 + x = codecs.charmap_build(encodings.iso8859_3.decoding_table) + check(x, size('32B2iB')) # enumerate check(enumerate([]), size('l3P')) # file @@ -707,13 +710,14 @@ # NotImplementedType import types check(types.NotImplementedType, s) - # unicode - usize = len(u'\0'.encode('unicode-internal')) - samples = [u'', u'1'*100] - # we need to test for both sizes, because we don't know if the string - # has been cached - for s in samples: - check(s, size('PPlP') + usize * (len(s) + 1)) + if test.test_support.have_unicode: + # unicode + usize = len(u'\0'.encode('unicode-internal')) + samples = [u'', u'1'*100] + # we need to test for both sizes, because we don't know if the string + # has been cached + for s in samples: + check(s, size('PPlP') + usize * (len(s) + 1)) # weakref import weakref check(weakref.ref(int), size('2Pl2P')) diff -r cf70f030a744 Lib/test/test_tempfile.py --- a/Lib/test/test_tempfile.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_tempfile.py Mon Jun 23 22:24:50 2014 +0300 @@ -142,13 +142,13 @@ pid = os.fork() if not pid: os.close(read_fd) - os.write(write_fd, next(self.r).encode("ascii")) + os.write(write_fd, next(self.r)) os.close(write_fd) # bypass the normal exit handlers- leave those to # the parent. os._exit(0) parent_value = next(self.r) - child_value = os.read(read_fd, len(parent_value)).decode("ascii") + child_value = os.read(read_fd, len(parent_value)) finally: if pid: # best effort to ensure the process can't bleed out diff -r cf70f030a744 Lib/test/test_threading.py --- a/Lib/test/test_threading.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_threading.py Mon Jun 23 22:24:50 2014 +0300 @@ -565,7 +565,7 @@ p = subprocess.Popen([sys.executable, "-c", script], stdout=subprocess.PIPE) rc = p.wait() - data = p.stdout.read().decode().replace('\r', '') + data = p.stdout.read().replace('\r', '') self.assertEqual(rc, 0, "Unexpected error") self.assertEqual(data, expected_output) diff -r cf70f030a744 Lib/test/test_traceback.py --- a/Lib/test/test_traceback.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_traceback.py Mon Jun 23 22:24:50 2014 +0300 @@ -4,7 +4,8 @@ import sys import unittest from imp import reload -from test.test_support import run_unittest, is_jython, Error, cpython_only +from test.test_support import run_unittest, is_jython, Error, cpython_only, \ + requires_unicode import traceback @@ -168,6 +169,7 @@ err = traceback.format_exception_only(None, None) self.assertEqual(err, ['None\n']) + @requires_unicode def test_unicode(self): err = AssertionError('\xff') lines = traceback.format_exception_only(type(err), err) diff -r cf70f030a744 Lib/test/test_types.py --- a/Lib/test/test_types.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_types.py Mon Jun 23 22:24:50 2014 +0300 @@ -299,7 +299,8 @@ assert type(i) == int assert type(format_spec) == str self.assertEqual(i.__format__(format_spec), result) - self.assertEqual(i.__format__(unicode(format_spec)), result) + if have_unicode: + self.assertEqual(i.__format__(unicode(format_spec)), result) test(123456789, 'd', '123456789') test(123456789, 'd', '123456789') @@ -453,7 +454,8 @@ assert type(i) == long assert type(format_spec) == str self.assertEqual(i.__format__(format_spec), result) - self.assertEqual(i.__format__(unicode(format_spec)), result) + if have_unicode: + self.assertEqual(i.__format__(unicode(format_spec)), result) test(10**100, 'd', '1' + '0' * 100) test(10**100+100, 'd', '1' + '0' * 97 + '100') @@ -591,7 +593,8 @@ assert type(f) == float assert type(format_spec) == str self.assertEqual(f.__format__(format_spec), result) - self.assertEqual(f.__format__(unicode(format_spec)), result) + if have_unicode: + self.assertEqual(f.__format__(unicode(format_spec)), result) test(0.0, 'f', '0.000000') diff -r cf70f030a744 Lib/test/test_univnewlines.py --- a/Lib/test/test_univnewlines.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_univnewlines.py Mon Jun 23 22:24:50 2014 +0300 @@ -45,7 +45,7 @@ def setUp(self): data = self.DATA - if "b" in self.WRITEMODE: + if support.have_unicode and "b" in self.WRITEMODE: data = data.encode("ascii") with self.open(support.TESTFN, self.WRITEMODE) as fp: fp.write(data) diff -r cf70f030a744 Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_urlparse.py Mon Jun 23 22:24:50 2014 +0300 @@ -524,6 +524,7 @@ self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) + @test_support.requires_unicode def test_caching(self): # Test case for bug #1313119 uri = "http://example.com/doc/" diff -r cf70f030a744 Lib/test/test_weakref.py --- a/Lib/test/test_weakref.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_weakref.py Mon Jun 23 22:24:50 2014 +0300 @@ -210,6 +210,7 @@ self.assertEqual(L3[:5], p3[:5]) self.assertEqual(L3[2:5], p3[2:5]) + @test_support.requires_unicode def test_proxy_unicode(self): # See bug 5037 class C(object): diff -r cf70f030a744 Lib/test/test_xpickle.py --- a/Lib/test/test_xpickle.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/test/test_xpickle.py Mon Jun 23 22:24:50 2014 +0300 @@ -162,7 +162,7 @@ # This is a cut-down version of pickletester's test_unicode. Backwards # compatibility was explicitly broken in r67934 to fix a bug. def test_unicode(self): - endcases = [u'', u'<\\u>', u'<\\\u1234>', u'<\n>', u'<\\>'] + endcases = [u'', u'<\\u>', u'<\\%c>' % 0x1234, u'<\n>', u'<\\>'] for proto in pickletester.protocols: for u in endcases: p = self.dumps(u, proto) diff -r cf70f030a744 Lib/unittest/test/test_assertions.py --- a/Lib/unittest/test/test_assertions.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/unittest/test/test_assertions.py Mon Jun 23 22:24:50 2014 +0300 @@ -1,5 +1,5 @@ import datetime - +from test import test_support import unittest @@ -142,10 +142,11 @@ # This blows up if _formatMessage uses string concatenation self.testableTrue._formatMessage(object(), 'foo') + @test_support.requires_unicode def test_formatMessage_unicode_error(self): one = ''.join(chr(i) for i in range(255)) # this used to cause a UnicodeDecodeError constructing msg - self.testableTrue._formatMessage(one, u'\uFFFD') + self.testableTrue._formatMessage(one, unichr(0xFFFD)) def assertMessages(self, methodName, args, errors): def getMethod(i): diff -r cf70f030a744 Lib/unittest/test/test_case.py --- a/Lib/unittest/test/test_case.py Wed Jun 18 23:07:46 2014 -0400 +++ b/Lib/unittest/test/test_case.py Mon Jun 23 22:24:50 2014 +0300 @@ -496,11 +496,13 @@ with self.assertRaises(self.failureException): self.assertDictContainsSubset({'a': 1, 'c': 1}, {'a': 1}) - with test_support.check_warnings(("", UnicodeWarning)): - one = ''.join(chr(i) for i in range(255)) - # this used to cause a UnicodeDecodeError constructing the failure msg - with self.assertRaises(self.failureException): - self.assertDictContainsSubset({'foo': one}, {'foo': u'\uFFFD'}) + if test_support.have_unicode: + with test_support.check_warnings(("", UnicodeWarning)): + one = ''.join(chr(i) for i in range(255)) + # this used to cause a UnicodeDecodeError constructing the failure msg + with self.assertRaises(self.failureException): + self.assertDictContainsSubset({'foo': one}, + {'foo': unichr(0xFFFD)}) def testAssertEqual(self): equal_pairs = [ @@ -683,7 +685,8 @@ s = u'x' * (2**7) with self.assertRaises(self.failureException) as cm: self.assertEqual(s + 'a', s + 'b') - self.assertIn('^', str(cm.exception)) + if test_support.have_unicode: + self.assertIn('^', str(cm.exception)) self.assertEqual(s + 'a', s + 'a') # over the threshold: diff not used and marker (^) not in error message @@ -916,18 +919,29 @@ + own implementation that does not subclass from TestCase, of course. """ self.maxDiff = None - for type_changer in (lambda x: x, lambda x: x.decode('utf8')): + try: + self.assertMultiLineEqual(sample_text, + revised_sample_text) + except self.failureException, e: + # need to remove the first line of the error message + error = str(e).split('\n', 1)[1] + + # assertMultiLineEqual is hooked up as the default for + # unicode strings - so we can't use it for this check + self.assertTrue(sample_text_error == error) + if test_support.have_unicode: try: - self.assertMultiLineEqual(type_changer(sample_text), - type_changer(revised_sample_text)) + self.assertMultiLineEqual(unicode(sample_text), + unicode(revised_sample_text)) except self.failureException, e: # need to remove the first line of the error message - error = str(e).encode('utf8').split('\n', 1)[1] + error = str(e).split('\n', 1)[1] # assertMultiLineEqual is hooked up as the default for # unicode strings - so we can't use it for this check self.assertTrue(sample_text_error == error) + @test_support.requires_unicode def testAsertEqualSingleLine(self): sample_text = u"laden swallows fly slowly" revised_sample_text = u"unladen swallows fly quickly"