diff -r 07d8272d61e7 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Mon Nov 28 12:06:13 2016 +0100 +++ b/Lib/test/test_unicode.py Tue Nov 29 00:01:50 2016 +0800 @@ -2728,6 +2728,36 @@ self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + # Test PyUnicode_FindChar() + @support.cpython_only + def test_findchar(self): + from _testcapi import unicode_findchar + + str = "bye;\xe0 bient\xf4t;\u518d\u89c1;\U0001F44B" * 2 + + self.assertEqual(unicode_findchar(str, 'b', 0, len(str), 1), 0) + self.assertEqual(unicode_findchar(str, 'b', 0, len(str), -1), 24) + self.assertEqual(unicode_findchar(str, '\xe0', 0, len(str), 1), 4) + self.assertEqual(unicode_findchar(str, '\xe0', 0, len(str), -1), 22) + self.assertEqual(unicode_findchar(str, '\u518d', 0, len(str), 1), 14) + self.assertEqual(unicode_findchar(str, '\u518d', 0, len(str), -1), 32) + self.assertEqual(unicode_findchar(str, '\U0001F44B', 0, len(str), 1), 17) + self.assertEqual(unicode_findchar(str, '\U0001F44B', 0, len(str), -1), 35) + + # start < end + self.assertEqual(unicode_findchar(str, 'b', 1, len(str), 1), 6) + self.assertEqual(unicode_findchar(str, 'b', 1, len(str)+1, 1), 6) + self.assertEqual(unicode_findchar(str, 'b', 1, 5, 1), -1) + # start > end + self.assertEqual(unicode_findchar(str, 'b', len(str), 0, 1), -1) + # start = end + self.assertEqual(unicode_findchar(str, 'b', 0, 0, 1), -1) + # negative + self.assertEqual(unicode_findchar(str, 'b', -len(str), len(str), 1), 0) + self.assertEqual(unicode_findchar(str, 'b', 0, -1, 1), 0) + self.assertEqual(unicode_findchar(str, 'b', -len(str), -1, 1), 0) + self.assertEqual(unicode_findchar(str, 'b', -1, -len(str), 1), -1) + # Test PyUnicode_CopyCharacters() @support.cpython_only def test_copycharacters(self): diff -r 07d8272d61e7 Modules/_testcapimodule.c --- a/Modules/_testcapimodule.c Mon Nov 28 12:06:13 2016 +0100 +++ b/Modules/_testcapimodule.c Tue Nov 29 00:01:50 2016 +0800 @@ -1875,6 +1875,26 @@ } static PyObject * +unicode_findchar(PyObject *self, PyObject *args) +{ + PyObject *str; + int ch, direction; + Py_ssize_t result; + Py_ssize_t start, end; + + if (!PyArg_ParseTuple(args, "UCnni:unicode_findchar", &str, &ch, + &start, &end, &direction)) { + return NULL; + } + + result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction); + if (result == -2) + return NULL; + else + return PyLong_FromSsize_t(result); +} + +static PyObject * unicode_copycharacters(PyObject *self, PyObject *args) { PyObject *from, *to, *to_copy; @@ -4107,6 +4127,7 @@ {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, + {"unicode_findchar", unicode_findchar, METH_VARARGS}, {"unicode_copycharacters", unicode_copycharacters, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, diff -r 07d8272d61e7 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 28 12:06:13 2016 +0100 +++ b/Objects/unicodeobject.c Tue Nov 29 00:01:50 2016 +0800 @@ -9461,16 +9461,12 @@ int direction) { int kind; - Py_ssize_t result; + Py_ssize_t len, result; if (PyUnicode_READY(str) == -1) return -2; - if (start < 0 || end < 0) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -2; - } - if (end > PyUnicode_GET_LENGTH(str)) - end = PyUnicode_GET_LENGTH(str); - if (start >= end) + len = PyUnicode_GET_LENGTH(str); + ADJUST_INDICES(start, end, len); + if (end - start < 1) return -1; kind = PyUnicode_KIND(str); result = findchar(PyUnicode_1BYTE_DATA(str) + kind*start,