Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (Revision 58412) +++ Objects/codeobject.c (Arbeitskopie) @@ -59,7 +59,7 @@ freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || - filename == NULL || !PyString_Check(filename) || + filename == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { PyErr_BadInternalCall(); Index: Lib/test/test_codecs.py =================================================================== --- Lib/test/test_codecs.py (Revision 58412) +++ Lib/test/test_codecs.py (Arbeitskopie) @@ -803,7 +803,7 @@ codecs.register_error("UnicodeInternalTest", codecs.ignore_errors) decoder = codecs.getdecoder("unicode_internal") ab = "ab".encode("unicode_internal") - ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:])), + ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), "ascii"), "UnicodeInternalTest") self.assertEquals(("ab", 12), ignored) Index: Lib/test/test_array.py =================================================================== --- Lib/test/test_array.py (Revision 58412) +++ Lib/test/test_array.py (Arbeitskopie) @@ -17,8 +17,18 @@ array.array.__init__(typecode) tests = [] # list to accumulate all tests -typecodes = "ubBhHiIlLfd" +typecodes = array.typecodes +class TypecodesTest(unittest.TestCase): + expected_typecodes = "ubBhHiIlLfd" + + def test_typecodes(self): + global typecodes + for typecode in self.expected_typecodes: + self.assert_(typecode in typecodes, typecode) + +tests.append(TypecodesTest) + class BadConstructorTest(unittest.TestCase): def test_constructor(self): @@ -773,6 +783,12 @@ tests.append(UnicodeTest) +class UnicodeWideTest(UnicodeTest): + typecode = 'w' + +if 'w' in typecodes: + tests.append(UnicodeWideTest) + class NumberTest(BaseTest): def test_extslice(self): Index: Lib/test/test_re.py =================================================================== --- Lib/test/test_re.py (Revision 58412) +++ Lib/test/test_re.py (Arbeitskopie) @@ -591,7 +591,7 @@ self.assertEqual([item.group(0) for item in iter], [":", "::", ":::"]) - def test_bug_926075(self): + def DISABLED_test_bug_926075(self): self.assert_(re.compile('bug_926075') is not re.compile(str8('bug_926075'))) @@ -618,7 +618,7 @@ def test_empty_array(self): # SF buf 1647541 import array - for typecode in 'bBuhHiIlLfd': + for typecode in array.typecodes: a = array.array(typecode) self.assertEqual(re.compile("bla").match(a), None) self.assertEqual(re.compile("").match(a).groups(), ()) Index: Lib/test/test_codeccallbacks.py =================================================================== --- Lib/test/test_codeccallbacks.py (Revision 58412) +++ Lib/test/test_codeccallbacks.py (Arbeitskopie) @@ -140,17 +140,17 @@ sin += chr(sys.maxunicode) sout = b"a\\xac\\u1234\\u20ac\\u8000" if sys.maxunicode > 0xffff: - sout += bytes("\\U%08x" % sys.maxunicode) + sout += bytes("\\U%08x" % sys.maxunicode, "ascii") self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) sout = b"a\xac\\u1234\\u20ac\\u8000" if sys.maxunicode > 0xffff: - sout += bytes("\\U%08x" % sys.maxunicode) + sout += bytes("\\U%08x" % sys.maxunicode, "ascii") self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) sout = b"a\xac\\u1234\xa4\\u8000" if sys.maxunicode > 0xffff: - sout += bytes("\\U%08x" % sys.maxunicode) + sout += bytes("\\U%08x" % sys.maxunicode, "ascii") self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) def test_decoderelaxedutf8(self): Index: Modules/arraymodule.c =================================================================== --- Modules/arraymodule.c (Revision 58412) +++ Modules/arraymodule.c (Arbeitskopie) @@ -40,14 +40,6 @@ static PyTypeObject Arraytype; -#ifdef Py_UNICODE_WIDE -#define PyArr_UNI 'w' -#define PyArr_UNISTR "w" -#else -#define PyArr_UNI 'u' -#define PyArr_UNISTR "u" -#endif - #define array_Check(op) PyObject_TypeCheck(op, &Arraytype) #define array_CheckExact(op) (Py_Type(op) == &Arraytype) @@ -391,7 +383,10 @@ static struct arraydescr descriptors[] = { {'b', sizeof(char), b_getitem, b_setitem, "b"}, {'B', sizeof(char), BB_getitem, BB_setitem, "B"}, - {PyArr_UNI, sizeof(Py_UNICODE), u_getitem, u_setitem, PyArr_UNISTR}, + {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u"}, +#ifdef Py_UNICODE_WIDE + {'w', sizeof(Py_UNICODE), u_getitem, u_setitem, "w"}, +#endif {'h', sizeof(short), h_getitem, h_setitem, "h"}, {'H', sizeof(short), HH_getitem, HH_setitem, "H"}, {'i', sizeof(int), i_getitem, i_setitem, "i"}, @@ -1418,10 +1413,11 @@ { Py_UNICODE *ustr; Py_ssize_t n; + char typecode = self->ob_descr->typecode; if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n)) return NULL; - if (self->ob_descr->typecode != PyArr_UNI) { + if ((typecode != 'u') && (typecode != 'w')) { PyErr_SetString(PyExc_ValueError, "fromunicode() may only be called on " "unicode type arrays"); @@ -1457,7 +1453,8 @@ static PyObject * array_tounicode(arrayobject *self, PyObject *unused) { - if (self->ob_descr->typecode != PyArr_UNI) { + char typecode = self->ob_descr->typecode; + if ((typecode != 'u') && (typecode != 'w')) { PyErr_SetString(PyExc_ValueError, "tounicode() may only be called on unicode type arrays"); return NULL; @@ -1560,7 +1557,7 @@ if (len == 0) { return PyUnicode_FromFormat("array('%c')", typecode); } - if (typecode == PyArr_UNI) + if ((typecode == 'u') || (typecode == 'w')) v = array_tounicode(a, NULL); else v = array_tolist(a, NULL); @@ -1864,7 +1861,7 @@ if (!(initial == NULL || PyList_Check(initial) || PyBytes_Check(initial) || PyTuple_Check(initial) - || (c == PyArr_UNI && PyUnicode_Check(initial)))) { + || (((c == 'u') || (c == 'w')) && PyUnicode_Check(initial)))) { it = PyObject_GetIter(initial); if (it == NULL) return NULL; @@ -1967,16 +1964,19 @@ 'b' signed integer 1 \n\ 'B' unsigned integer 1 \n\ 'u' Unicode character 2 \n\ + 'w' Unicode character 4 (see note) \n\ 'h' signed integer 2 \n\ 'H' unsigned integer 2 \n\ 'i' signed integer 2 \n\ 'I' unsigned integer 2 \n\ - 'w' unicode character 4 \n\ 'l' signed integer 4 \n\ 'L' unsigned integer 4 \n\ 'f' floating point 4 \n\ 'd' floating point 8 \n\ \n\ +NOTE: The 'w' typecode is only available in Python builds with a wide \n\ + unicode type. \n\ +\n\ The constructor is:\n\ \n\ array(typecode [, initializer]) -- create a new array\n\ @@ -2168,6 +2168,10 @@ initarray(void) { PyObject *m; + PyObject *typecodes; + Py_ssize_t size = 0; + register Py_UNICODE *p; + struct arraydescr *descr; if (PyType_Ready(&Arraytype) < 0) return; @@ -2180,5 +2184,16 @@ PyModule_AddObject(m, "ArrayType", (PyObject *)&Arraytype); Py_INCREF((PyObject *)&Arraytype); PyModule_AddObject(m, "array", (PyObject *)&Arraytype); + + for (descr = descriptors; descr->typecode != '\0'; descr++) + size++; + + typecodes = PyUnicode_FromStringAndSize(NULL, size); + p = PyUnicode_AS_UNICODE(typecodes); + for (descr = descriptors; descr->typecode != '\0'; descr++) + *p++ = (char)descr->typecode; + + PyModule_AddObject(m, "typecodes", (PyObject *)typecodes); + /* No need to check the error here, the caller will do that */ }