diff -r 9abb316f1593 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sun Oct 02 13:08:47 2016 +0300 +++ b/Lib/test/test_unicode.py Sun Oct 02 18:53:16 2016 +0800 @@ -2594,6 +2594,23 @@ self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') + # Test PyUnicode_AsUCS4() + @support.cpython_only + def test_asucs4(self): + from _testcapi import unicode_asucs4 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\ud800b\udfffc', + '\ud834\udd1e', 'a\U0001f600']: + l = len(s) + self.assertEqual(unicode_asucs4(s, l, 1), s+'\0') + self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff') + self.assertRaises(ValueError, unicode_asucs4, s, l-1, 1) + self.assertRaises(ValueError, unicode_asucs4, s, l-2, 0) + s = '\0'.join([s, s]) + self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') + self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + def test_subclass_add(self): class S(str): def __add__(self, o): diff -r 9abb316f1593 Modules/_testcapimodule.c --- a/Modules/_testcapimodule.c Sun Oct 02 13:08:47 2016 +0300 +++ b/Modules/_testcapimodule.c Sun Oct 02 18:53:16 2016 +0800 @@ -1829,6 +1829,36 @@ } static PyObject * +unicode_asucs4(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_UCS4 *buffer; + int copy_null; + Py_ssize_t str_len, buf_len; + + if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { + return NULL; + } + + buf_len = str_len + 1; + buffer = PyMem_NEW(Py_UCS4, buf_len); + if (buffer == NULL) { + return PyErr_NoMemory(); + } + memset(buffer, 0, sizeof(Py_UCS4)*buf_len); + buffer[str_len] = 0xffffU; + + if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { + PyMem_FREE(buffer); + return NULL; + } + + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); + PyMem_FREE(buffer); + return result; +} + +static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { Py_UNICODE *unicode; @@ -4030,6 +4060,7 @@ {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, + {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, diff -r 9abb316f1593 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Oct 02 13:08:47 2016 +0300 +++ b/Objects/unicodeobject.c Sun Oct 02 18:53:16 2016 +0800 @@ -2437,7 +2437,7 @@ } else { if (targetsize < targetlen) { - PyErr_Format(PyExc_SystemError, + PyErr_Format(PyExc_ValueError, "string is longer than the buffer"); if (copy_null && 0 < targetsize) target[0] = 0;