diff -r b45675223885 Makefile.pre.in --- a/Makefile.pre.in Sun Apr 07 12:42:13 2013 -0500 +++ b/Makefile.pre.in Mon Apr 08 00:22:57 2013 +0300 @@ -726,6 +726,7 @@ $(srcdir)/Objects/stringlib/find_max_char.h \ $(srcdir)/Objects/stringlib/localeutil.h \ $(srcdir)/Objects/stringlib/partition.h \ + $(srcdir)/Objects/stringlib/replace.h \ $(srcdir)/Objects/stringlib/split.h \ $(srcdir)/Objects/stringlib/ucs1lib.h \ $(srcdir)/Objects/stringlib/ucs2lib.h \ diff -r b45675223885 Objects/stringlib/replace.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Objects/stringlib/replace.h Mon Apr 08 00:22:57 2013 +0300 @@ -0,0 +1,58 @@ +/* stringlib: replace implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +/* If the replacement character is quite rare, use the heavily optimized + find function to search for the next character. But if the + replacement character occurs quite often, the overhead of the + function call become too big and a dummy loop is faster. As the + frequency of the character is unknown and uneven, we use the adaptive + algorithm. If the character is not found in a dummy loop for a + limited number of iterations, call the find function. The + overhead of its calling divided by the number of scanned characters. + The optimal number of attempts depends on the ratio of the find + function overhead costs, a dummy loop speed and the find + function speed and is determined experimentally. +*/ + +Py_LOCAL_INLINE(void) +STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + *s = u2; + while (--maxcount && ++s != end) { + if (*s != u1) { + int attempts = 10; + /* search u1 in a dummy loop */ + while (1) { + if (++s == end) + return; + if (*s == u1) + break; + if (!--attempts) { + /* if u1 was not found for attempts iterations, + use FASTSEARCH() or memchr() */ +#if STRINGLIB_SIZEOF_CHAR == 1 + s++; + s = memchr(s, u1, end - s); + if (s == NULL) + return; +#else + Py_ssize_t i; + STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1; + s++; + i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH); + if (i < 0) + return; + s += i; +#endif + /* restart a dummy loop */ + break; + } + } + } + *s = u2; + } +} diff -r b45675223885 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Apr 07 12:42:13 2013 -0500 +++ b/Objects/unicodeobject.c Mon Apr 08 00:22:57 2013 +0300 @@ -585,6 +585,7 @@ #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -595,6 +596,7 @@ #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -605,6 +607,7 @@ #include "stringlib/split.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/replace.h" #include "stringlib/find_max_char.h" #include "stringlib/localeutil.h" #include "stringlib/undef.h" @@ -9858,6 +9861,31 @@ return 0; } +static void +replace_1char_inplace(PyObject *u, Py_ssize_t pos, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + int kind = PyUnicode_KIND(u); + void *data = PyUnicode_DATA(u); + Py_ssize_t len = PyUnicode_GET_LENGTH(u); + if (kind == PyUnicode_1BYTE_KIND) { + ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos, + (Py_UCS1 *)data + len, + u1, u2, maxcount); + } + else if (kind == PyUnicode_2BYTE_KIND) { + ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos, + (Py_UCS2 *)data + len, + u1, u2, maxcount); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos, + (Py_UCS4 *)data + len, + u1, u2, maxcount); + } +} + static PyObject * replace(PyObject *self, PyObject *str1, PyObject *str2, Py_ssize_t maxcount) @@ -9874,7 +9902,7 @@ Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1); Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2); int mayshrink; - Py_UCS4 maxchar, maxchar_str2; + Py_UCS4 maxchar, maxchar_str1, maxchar_str2; if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; @@ -9883,15 +9911,16 @@ if (str1 == str2) goto nothing; - if (skind < kind1) + + maxchar = PyUnicode_MAX_CHAR_VALUE(self); + maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1); + if (maxchar < maxchar_str1) /* substring too wide to be present */ goto nothing; - - maxchar = PyUnicode_MAX_CHAR_VALUE(self); maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); /* Replacing str1 with str2 may cause a maxchar reduction in the result string. */ - mayshrink = (maxchar_str2 < maxchar); + mayshrink = (maxchar_str2 < maxchar_str1); maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); if (len1 == len2) { @@ -9901,35 +9930,19 @@ if (len1 == 1) { /* replace characters */ Py_UCS4 u1, u2; - int rkind; - Py_ssize_t index, pos; - char *src; + Py_ssize_t pos; u1 = PyUnicode_READ_CHAR(str1, 0); - pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1); + pos = findchar(sbuf, skind, slen, u1, 1); if (pos < 0) goto nothing; u2 = PyUnicode_READ_CHAR(str2, 0); u = PyUnicode_New(slen, maxchar); if (!u) goto error; + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); - rkind = PyUnicode_KIND(u); - - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); - index = 0; - src = sbuf; - while (--maxcount) - { - pos++; - src += pos * PyUnicode_KIND(self); - slen -= pos; - index += pos; - pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1); - if (pos < 0) - break; - PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2); - } + replace_1char_inplace(u, pos, u1, u2, maxcount); } else { int rkind = skind; diff -r b45675223885 PC/VS9.0/pythoncore.vcproj --- a/PC/VS9.0/pythoncore.vcproj Sun Apr 07 12:42:13 2013 -0500 +++ b/PC/VS9.0/pythoncore.vcproj Mon Apr 08 00:22:57 2013 +0300 @@ -1587,6 +1587,10 @@ > + + diff -r b45675223885 PCbuild/pythoncore.vcxproj --- a/PCbuild/pythoncore.vcxproj Sun Apr 07 12:42:13 2013 -0500 +++ b/PCbuild/pythoncore.vcxproj Mon Apr 08 00:22:57 2013 +0300 @@ -475,6 +475,7 @@ + diff -r b45675223885 PCbuild/pythoncore.vcxproj.filters --- a/PCbuild/pythoncore.vcxproj.filters Sun Apr 07 12:42:13 2013 -0500 +++ b/PCbuild/pythoncore.vcxproj.filters Mon Apr 08 00:22:57 2013 +0300 @@ -378,6 +378,9 @@ Objects + + Objects + Objects