diff -r a93efd445982 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Oct 09 13:45:10 2012 +0300 +++ b/Objects/unicodeobject.c Wed Oct 10 18:34:04 2012 +0200 @@ -9881,6 +9881,92 @@ anylib_count(int kind, PyObject *sstr, v return 0; } +static void +replace_1char(PyObject *self, Py_ssize_t maxcount, Py_ssize_t pos, char *sbuf, + Py_UCS4 u1, Py_UCS4 u2, PyObject *u) +{ + Py_ssize_t index; + char *src; + Py_ssize_t len = PyUnicode_GET_LENGTH(self); + int skind = PyUnicode_KIND(self); + int rkind = PyUnicode_KIND(u); + + if (skind == 1 && rkind == 1) { + char *sdata = PyUnicode_DATA(self); + char *udata = PyUnicode_DATA(u); + char *uend = udata + len; + char *pos; + + memcpy(udata, sdata, len); + +#if 0 +/* +# Replace 100%: 12.5x slower +$ ./python -m timeit -s "a='a'; b='b'; text=a*100000" "text.replace(a, b)" +1000 loops, best of 3: 771 usec per loop + +# Replace 50%: 3.3x slower +$ ./python -m timeit -s "a='a'; b='b'; text=(a+' ')*(100000//2)" "text.replace(a, b)" +1000 loops, best of 3: 392 usec per loop + +# Replace 0.001%: 10.4x faster +$ ./python -m timeit -s "a='a'; b='b'; text=a+' '*100000+a" "text.replace(a, b)" +100000 loops, best of 3: 8.65 usec per loop + */ + while (udata < uend) { + pos = memchr(udata, u1, len); + if (pos == NULL) + return; + if (--maxcount < 0) + return; + *pos = u2; + pos++; + len -= (pos - udata); + udata = pos; + } +#else +/* +# Replace 100% +$ make && echo && ./python -m timeit -s "a='a'; b='b'; text=a+' '*100000+a" "text.replace(a, b)" +10000 loops, best of 3: 61.4 usec per loop + +# Replace 50% +$ ./python -m timeit -s "a='a'; b='b'; text=a*100000" "text.replace(a, b)" +10000 loops, best of 3: 118 usec per loop + +# Replace 0.001% +$ ./python -m timeit -s "a='a'; b='b'; text=(a+' ')*(100000//2)" "text.replace(a, b)" +10000 loops, best of 3: 90 usec per loop +*/ + for (; udata < uend; udata++) { + if (*udata == u1) { + if (--maxcount < 0) + return; + *udata = u2; + } + } +#endif + } + else { + _PyUnicode_FastCopyCharacters(u, 0, self, 0, len); + PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); + + index = 0; + src = sbuf; + while (--maxcount) + { + pos++; + src += pos * skind; + len -= pos; + index += pos; + pos = findchar(src, skind, len, u1, 1); + if (pos < 0) + break; + PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2); + } + } +} + static PyObject * replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) @@ -9924,9 +10010,7 @@ replace(PyObject *self, PyObject *str1, if (len1 == 1) { /* replace characters */ Py_UCS4 u1, u2; - int rkind; - Py_ssize_t index, pos; - char *src; + Py_ssize_t pos; u1 = PyUnicode_READ_CHAR(str1, 0); pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); @@ -9936,23 +10020,8 @@ replace(PyObject *self, PyObject *str1, u = PyUnicode_New(slen, maxchar); if (!u) goto error; - _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); - index = 0; - src = sbuf; - while (--maxcount) - { - pos++; - src += pos * PyUnicode_KIND(self); - slen -= pos; - index += pos; - pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); - if (pos < 0) - break; - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2); - } + + replace_1char(self, maxcount, pos, sbuf, u1, u2, u); } else { int rkind = skind;