diff -r d1c72f5c15bd Doc/c-api/unicode.rst --- a/Doc/c-api/unicode.rst Wed Oct 05 22:54:27 2016 -0700 +++ b/Doc/c-api/unicode.rst Fri Oct 07 02:26:33 2016 +0800 @@ -578,7 +578,7 @@ Copy characters from one Unicode object into another. This function performs character conversion when necessary and falls back to :c:func:`memcpy` if possible. Returns ``-1`` and sets an exception on error, otherwise returns - ``0``. + the number of copied characters. .. versionadded:: 3.3 diff -r d1c72f5c15bd Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Wed Oct 05 22:54:27 2016 -0700 +++ b/Lib/test/test_unicode.py Fri Oct 07 02:26:33 2016 +0800 @@ -4,7 +4,7 @@ (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -"""#" +""" import _string import codecs import itertools @@ -2735,6 +2735,25 @@ self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + # Test PyUnicode_CopyCharacters() + @support.cpython_only + def test_copycharacters(self): + from _testcapi import unicode_copycharacters + s = 'abcdefghijklmnopqrstuvwxyz' + for start in range(len(s)): + self.assertEqual( + unicode_copycharacters(s, start, 10), + (s[start:start+10].ljust(10, '\0'), min(len(s)-start, 10)) + ) + self.assertRaises(IndexError, unicode_copycharacters, s, -1, 10) + self.assertRaises(IndexError, unicode_copycharacters, s, 30, 10) + self.assertRaises(SystemError, unicode_copycharacters, b'', 0, 10) + self.assertRaises(SystemError, unicode_copycharacters, s, 0, 11) + self.assertRaises(SystemError, unicode_copycharacters, s, 0, -1) + for s in ['\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + self.assertRaises(SystemError, unicode_copycharacters, s, 0, 10) + @support.cpython_only def test_encode_decimal(self): from _testcapi import unicode_encodedecimal diff -r d1c72f5c15bd Modules/_testcapimodule.c --- a/Modules/_testcapimodule.c Wed Oct 05 22:54:27 2016 -0700 +++ b/Modules/_testcapimodule.c Fri Oct 07 02:26:33 2016 +0800 @@ -1859,6 +1859,36 @@ } static PyObject * +unicode_copycharacters(PyObject *self, PyObject *args) +{ + PyObject *from, *to; + Py_ssize_t start, how_many, copied; + + if (!PyArg_ParseTuple(args, "Onn:unicode_copycharacters", &from, + &start, &how_many)) { + return NULL; + } + + to = PyUnicode_New(10, 127); + if (!to) { + return NULL; + } + assert(PyUnicode_IS_COMPACT_ASCII(to)); + if (PyUnicode_Fill(to, 0, 127, 0U) < 0) { + Py_DECREF(to); + return NULL; + } + + copied = PyUnicode_CopyCharacters(to, 0, from, start, how_many); + if (copied < 0) { + Py_DECREF(to); + return NULL; + } + + return Py_BuildValue("(Nn)", to, copied); +} + +static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { Py_UNICODE *unicode; @@ -4061,6 +4091,7 @@ {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, diff -r d1c72f5c15bd Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Oct 05 22:54:27 2016 -0700 +++ b/Objects/unicodeobject.c Fri Oct 07 02:26:33 2016 +0800 @@ -1549,15 +1549,19 @@ if (PyUnicode_READY(to) == -1) return -1; - if (from_start < 0) { + if (from_start < 0 || from_start > PyUnicode_GET_LENGTH(from)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return -1; } - if (to_start < 0) { + if (to_start < 0 || to_start > PyUnicode_GET_LENGTH(to)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return -1; } - how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); + if (how_many < 0) { + PyErr_SetString(PyExc_SystemError, "how_many cannot be negative"); + return -1; + } + how_many = Py_MIN(PyUnicode_GET_LENGTH(from)-from_start, how_many); if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { PyErr_Format(PyExc_SystemError, "Cannot write %zi characters at %zi "