diff -r 6a347c0ffbfc Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Apr 05 11:56:37 2014 +0200 +++ b/Objects/unicodeobject.c Sat Apr 05 15:32:14 2014 +0300 @@ -8445,70 +8445,94 @@ return resunicode; } -/* Lookup the character ch in the mapping and put the result in result, - which must be decrefed by the caller. - Return 0 on success, -1 on error */ -static int -charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject **result) -{ - PyObject *w = PyLong_FromLong((long)c); - PyObject *x; - - if (w == NULL) - return -1; - x = PyObject_GetItem(mapping, w); - Py_DECREF(w); - if (x == NULL) { - if (PyErr_ExceptionMatches(PyExc_LookupError)) { - /* No mapping found means: use 1:1 mapping. */ - PyErr_Clear(); - *result = NULL; - return 0; - } else - return -1; - } - else if (x == Py_None) { - *result = x; - return 0; - } - else if (PyLong_Check(x)) { - long value = PyLong_AS_LONG(x); - if (value < 0 || value > MAX_UNICODE) { - PyErr_Format(PyExc_ValueError, - "character mapping must be in range(0x%x)", - MAX_UNICODE+1); +static PyObject no_mapping_sentinel; + +static void +charmaptranslatecache_init(PyObject *cache[128]) +{ + memset(cache, 0, 128 * sizeof(PyObject *)); +} + +static void +charmaptranslatecache_free(PyObject *cache[128]) +{ + int i; + for (i = 0; i < 128; i++) + if (cache[i] != NULL && cache[i] != &no_mapping_sentinel) + Py_DECREF(cache[i]); +} + +/* Lookup the character ch in the mapping and return the result, + which must be decrefed by the caller if it is not &no_mapping_sentinel. + Return NULL on error. */ +static PyObject * +charmaptranslate_lookup(Py_UCS4 c, PyObject *mapping, PyObject *cache[128]) +{ + PyObject *x = NULL; + + if (c < 128) { + x = cache[c]; + if (x == &no_mapping_sentinel) + return x; + } + if (x != NULL) + Py_INCREF(x); + else { + PyObject *w = PyLong_FromLong((long)c); + if (w == NULL) + return NULL; + x = PyObject_GetItem(mapping, w); + Py_DECREF(w); + if (x == NULL) { + if (PyErr_ExceptionMatches(PyExc_LookupError)) { + /* No mapping found means: use 1:1 mapping. */ + PyErr_Clear(); + x = &no_mapping_sentinel; + if (c < 128) + cache[c] = x; + return x; + } else + return NULL; + } + if (x == Py_None || PyUnicode_Check(x)) { + } + else if (PyLong_Check(x)) { + long value = PyLong_AS_LONG(x); + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_ValueError, + "character mapping must be in range(0x%x)", + MAX_UNICODE+1); + Py_DECREF(x); + return NULL; + } + } + else { + /* wrong return value */ + PyErr_SetString(PyExc_TypeError, + "character mapping must return integer, None or str"); Py_DECREF(x); - return -1; - } - *result = x; - return 0; - } - else if (PyUnicode_Check(x)) { - *result = x; - return 0; - } - else { - /* wrong return value */ - PyErr_SetString(PyExc_TypeError, - "character mapping must return integer, None or str"); - Py_DECREF(x); - return -1; - } + return NULL; + } + if (c < 128) { + Py_INCREF(x); + cache[c] = x; + } + } + return x; } /* lookup the character, write the result into the writer. Return 1 if the result was written into the writer, return 0 if the mapping was undefined, raise an exception return -1 on error. */ static int -charmaptranslate_output(Py_UCS4 ch, PyObject *mapping, +charmaptranslate_output(Py_UCS4 ch, PyObject *mapping, PyObject *cache[128], _PyUnicodeWriter *writer) { - PyObject *item; - - if (charmaptranslate_lookup(ch, mapping, &item)) - return -1; - - if (item == NULL) { + PyObject *item = charmaptranslate_lookup(ch, mapping, cache); + if (item == NULL) + return -1; + + if (item == &no_mapping_sentinel) { /* not found => default to 1:1 mapping */ if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { return -1; @@ -8538,7 +8562,18 @@ return -1; } - if (_PyUnicodeWriter_WriteStr(writer, item) < 0) { + if (PyUnicode_GET_SIZE(item) == 1) { + ch = PyUnicode_READ_CHAR(item, 0); + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { + Py_DECREF(item); + return -1; + } + Py_DECREF(item); + return 1; + } + + if (PyUnicode_GET_SIZE(item) && + _PyUnicodeWriter_WriteStr(writer, item) < 0) { Py_DECREF(item); return -1; } @@ -8549,13 +8584,12 @@ static int unicode_fast_translate_lookup(PyObject *mapping, Py_UCS1 ch, - Py_UCS1 *translate) -{ - PyObject *item; + PyObject *cache[128], Py_UCS1 *translate) +{ int ret = 0; - - item = NULL; - if (charmaptranslate_lookup(ch, mapping, &item)) { + PyObject *item = charmaptranslate_lookup(ch, mapping, cache); + + if (item == NULL) { return -1; } @@ -8564,7 +8598,7 @@ goto exit; } - if (item == NULL) { + if (item == &no_mapping_sentinel) { /* not found => default to 1:1 mapping */ translate[ch] = ch; return 1; @@ -8613,7 +8647,7 @@ was translated into writer, return 0 if the input string was partially translated into writer, raise an exception and return -1 on error. */ static int -unicode_fast_translate(PyObject *input, PyObject *mapping, +unicode_fast_translate(PyObject *input, PyObject *mapping, PyObject *cache[128], _PyUnicodeWriter *writer) { Py_UCS1 translate[128], ch, ch2; @@ -8640,7 +8674,7 @@ ch = *in; ch2 = translate[ch]; if (ch2 == 0xff) { - res = unicode_fast_translate_lookup(mapping, ch, translate); + res = unicode_fast_translate_lookup(mapping, ch, cache, translate); if (res < 0) return -1; if (res == 0) { @@ -8672,6 +8706,7 @@ PyObject *exc = NULL; int ignore; int res; + PyObject *cache[128]; if (mapping == NULL) { PyErr_BadArgument(); @@ -8695,13 +8730,15 @@ if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - res = unicode_fast_translate(input, mapping, &writer); - if (res < 0) { - _PyUnicodeWriter_Dealloc(&writer); - return NULL; - } - if (res == 1) + charmaptranslatecache_init(cache); + + res = unicode_fast_translate(input, mapping, cache, &writer); + if (res < 0) + goto onError; + if (res == 1) { + charmaptranslatecache_free(cache); return _PyUnicodeWriter_Finish(&writer); + } ignore = (errors != NULL && strcmp(errors, "ignore") == 0); @@ -8717,7 +8754,7 @@ Py_UCS4 ch; ch = PyUnicode_READ(kind, data, i); - translate = charmaptranslate_output(ch, mapping, &writer); + translate = charmaptranslate_output(ch, mapping, cache, &writer); if (translate < 0) goto onError; @@ -8735,8 +8772,11 @@ while (collend < size) { PyObject *x; ch = PyUnicode_READ(kind, data, collend); - if (charmaptranslate_lookup(ch, mapping, &x)) + x = charmaptranslate_lookup(ch, mapping, cache); + if (x == NULL) goto onError; + if (x == &no_mapping_sentinel) + break; Py_XDECREF(x); if (x != Py_None) break; @@ -8760,11 +8800,13 @@ i = newpos; } } + charmaptranslatecache_free(cache); Py_XDECREF(exc); Py_XDECREF(errorHandler); return _PyUnicodeWriter_Finish(&writer); onError: + charmaptranslatecache_free(cache); _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(exc); Py_XDECREF(errorHandler);