diff -r b45675223885 Makefile.pre.in
--- a/Makefile.pre.in Sun Apr 07 12:42:13 2013 -0500
+++ b/Makefile.pre.in Mon Apr 08 00:22:57 2013 +0300
@@ -726,6 +726,7 @@
$(srcdir)/Objects/stringlib/find_max_char.h \
$(srcdir)/Objects/stringlib/localeutil.h \
$(srcdir)/Objects/stringlib/partition.h \
+ $(srcdir)/Objects/stringlib/replace.h \
$(srcdir)/Objects/stringlib/split.h \
$(srcdir)/Objects/stringlib/ucs1lib.h \
$(srcdir)/Objects/stringlib/ucs2lib.h \
diff -r b45675223885 Objects/stringlib/replace.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Objects/stringlib/replace.h Mon Apr 08 00:22:57 2013 +0300
@@ -0,0 +1,58 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+/* If the replacement character is quite rare, use the heavily optimized
+ find function to search for the next character. But if the
+ replacement character occurs quite often, the overhead of the
+ function call become too big and a dummy loop is faster. As the
+ frequency of the character is unknown and uneven, we use the adaptive
+ algorithm. If the character is not found in a dummy loop for a
+ limited number of iterations, call the find function. The
+ overhead of its calling divided by the number of scanned characters.
+ The optimal number of attempts depends on the ratio of the find
+ function overhead costs, a dummy loop speed and the find
+ function speed and is determined experimentally.
+*/
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ *s = u2;
+ while (--maxcount && ++s != end) {
+ if (*s != u1) {
+ int attempts = 10;
+ /* search u1 in a dummy loop */
+ while (1) {
+ if (++s == end)
+ return;
+ if (*s == u1)
+ break;
+ if (!--attempts) {
+ /* if u1 was not found for attempts iterations,
+ use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+ s++;
+ s = memchr(s, u1, end - s);
+ if (s == NULL)
+ return;
+#else
+ Py_ssize_t i;
+ STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+ s++;
+ i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+ if (i < 0)
+ return;
+ s += i;
+#endif
+ /* restart a dummy loop */
+ break;
+ }
+ }
+ }
+ *s = u2;
+ }
+}
diff -r b45675223885 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Sun Apr 07 12:42:13 2013 -0500
+++ b/Objects/unicodeobject.c Mon Apr 08 00:22:57 2013 +0300
@@ -585,6 +585,7 @@
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -595,6 +596,7 @@
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -605,6 +607,7 @@
#include "stringlib/split.h"
#include "stringlib/count.h"
#include "stringlib/find.h"
+#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
@@ -9858,6 +9861,31 @@
return 0;
}
+static void
+replace_1char_inplace(PyObject *u, Py_ssize_t pos,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ int kind = PyUnicode_KIND(u);
+ void *data = PyUnicode_DATA(u);
+ Py_ssize_t len = PyUnicode_GET_LENGTH(u);
+ if (kind == PyUnicode_1BYTE_KIND) {
+ ucs1lib_replace_1char_inplace((Py_UCS1 *)data + pos,
+ (Py_UCS1 *)data + len,
+ u1, u2, maxcount);
+ }
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ ucs2lib_replace_1char_inplace((Py_UCS2 *)data + pos,
+ (Py_UCS2 *)data + len,
+ u1, u2, maxcount);
+ }
+ else {
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ucs4lib_replace_1char_inplace((Py_UCS4 *)data + pos,
+ (Py_UCS4 *)data + len,
+ u1, u2, maxcount);
+ }
+}
+
static PyObject *
replace(PyObject *self, PyObject *str1,
PyObject *str2, Py_ssize_t maxcount)
@@ -9874,7 +9902,7 @@
Py_ssize_t len1 = PyUnicode_GET_LENGTH(str1);
Py_ssize_t len2 = PyUnicode_GET_LENGTH(str2);
int mayshrink;
- Py_UCS4 maxchar, maxchar_str2;
+ Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
if (maxcount < 0)
maxcount = PY_SSIZE_T_MAX;
@@ -9883,15 +9911,16 @@
if (str1 == str2)
goto nothing;
- if (skind < kind1)
+
+ maxchar = PyUnicode_MAX_CHAR_VALUE(self);
+ maxchar_str1 = PyUnicode_MAX_CHAR_VALUE(str1);
+ if (maxchar < maxchar_str1)
/* substring too wide to be present */
goto nothing;
-
- maxchar = PyUnicode_MAX_CHAR_VALUE(self);
maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2);
/* Replacing str1 with str2 may cause a maxchar reduction in the
result string. */
- mayshrink = (maxchar_str2 < maxchar);
+ mayshrink = (maxchar_str2 < maxchar_str1);
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
if (len1 == len2) {
@@ -9901,35 +9930,19 @@
if (len1 == 1) {
/* replace characters */
Py_UCS4 u1, u2;
- int rkind;
- Py_ssize_t index, pos;
- char *src;
+ Py_ssize_t pos;
u1 = PyUnicode_READ_CHAR(str1, 0);
- pos = findchar(sbuf, PyUnicode_KIND(self), slen, u1, 1);
+ pos = findchar(sbuf, skind, slen, u1, 1);
if (pos < 0)
goto nothing;
u2 = PyUnicode_READ_CHAR(str2, 0);
u = PyUnicode_New(slen, maxchar);
if (!u)
goto error;
+
_PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
- rkind = PyUnicode_KIND(u);
-
- PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
- index = 0;
- src = sbuf;
- while (--maxcount)
- {
- pos++;
- src += pos * PyUnicode_KIND(self);
- slen -= pos;
- index += pos;
- pos = findchar(src, PyUnicode_KIND(self), slen, u1, 1);
- if (pos < 0)
- break;
- PyUnicode_WRITE(rkind, PyUnicode_DATA(u), index + pos, u2);
- }
+ replace_1char_inplace(u, pos, u1, u2, maxcount);
}
else {
int rkind = skind;
diff -r b45675223885 PC/VS9.0/pythoncore.vcproj
--- a/PC/VS9.0/pythoncore.vcproj Sun Apr 07 12:42:13 2013 -0500
+++ b/PC/VS9.0/pythoncore.vcproj Mon Apr 08 00:22:57 2013 +0300
@@ -1587,6 +1587,10 @@
>
+
+
diff -r b45675223885 PCbuild/pythoncore.vcxproj
--- a/PCbuild/pythoncore.vcxproj Sun Apr 07 12:42:13 2013 -0500
+++ b/PCbuild/pythoncore.vcxproj Mon Apr 08 00:22:57 2013 +0300
@@ -475,6 +475,7 @@
+
diff -r b45675223885 PCbuild/pythoncore.vcxproj.filters
--- a/PCbuild/pythoncore.vcxproj.filters Sun Apr 07 12:42:13 2013 -0500
+++ b/PCbuild/pythoncore.vcxproj.filters Mon Apr 08 00:22:57 2013 +0300
@@ -378,6 +378,9 @@
Objects
+
+ Objects
+
Objects