diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1777,20 +1777,9 @@ class StringModuleTest(unittest.TestCase b' 3.14 ') self.assertRaises(UnicodeEncodeError, unicode_encodedecimal, "123\u20ac", "strict") - self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"), - b'123?') - self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"), - b'123') - self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"), - b'123€') - self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"), - b'123\\u20ac') - self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"), - b'123? ') - self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"), - b'123??') - self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"), - b'123?0') + self.assertRaisesRegex( + ValueError, "decimal encoder does not support replace error handler", + unicode_encodedecimal, "123\u20ac", "replace") def test_transform_decimal(self): from _testcapi import unicode_transformdecimaltoascii as transform_decimal diff --git a/Misc/NEWS b/Misc/NEWS --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.2.3? Core and Builtins ----------------- +- Issue #13093: PyUnicode_EncodeDecimal() doesn't support error handlers + different than "strict" anymore. The caller was unable to compute the + size of the output buffer: it depends on the error handler. + - Issue #13338: Handle all enumerations in _Py_ANNOTATE_MEMORY_ORDER to allow compiling extension modules with -Wswitch-enum on gcc. Initial patch by Floris Bruynooghe. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6280,30 +6280,26 @@ int PyUnicode_EncodeDecimal(Py_UNICODE * const char *errors) { Py_UNICODE *p, *end; - PyObject *errorHandler = NULL; - PyObject *exc = NULL; - const char *encoding = "decimal"; - const char *reason = "invalid decimal Unicode string"; - /* the following variable is used for caching string comparisons - * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; if (output == NULL) { PyErr_BadArgument(); return -1; } + if (errors != NULL && strcmp(errors, "strict") != 0) { + PyErr_Format(PyExc_ValueError, + "decimal encoder does not support %s error handler", + errors); + return -1; + } + p = s; end = s + length; while (p < end) { register Py_UNICODE ch = *p; int decimal; - PyObject *repunicode; - Py_ssize_t repsize; - Py_ssize_t newpos; - Py_UNICODE *uni2; - Py_UNICODE *collstart; - Py_UNICODE *collend; + Py_ssize_t startpos; + PyObject *errorHandler, *exc; if (Py_UNICODE_ISSPACE(ch)) { *output++ = ' '; @@ -6322,90 +6318,19 @@ int PyUnicode_EncodeDecimal(Py_UNICODE * continue; } /* All other characters are considered unencodable */ - collstart = p; - for (collend = p+1; collend < end; collend++) { - if ((0 < *collend && *collend < 256) || - Py_UNICODE_ISSPACE(*collend) || - 0 <= Py_UNICODE_TODECIMAL(*collend)) - break; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason); - goto onError; - case 2: /* replace */ - for (p = collstart; p < collend; ++p) - *output++ = '?'; - /* fall through */ - case 3: /* ignore */ - p = collend; - break; - case 4: /* xmlcharrefreplace */ - /* generate replacement (temporarily (mis)uses p) */ - for (p = collstart; p < collend; ++p) - output += sprintf(output, "&#%d;", (int)*p); - p = collend; - break; - default: - repunicode = unicode_encode_call_errorhandler(errors, &errorHandler, - encoding, reason, s, length, &exc, - collstart-s, collend-s, &newpos); - if (repunicode == NULL) - goto onError; - if (!PyUnicode_Check(repunicode)) { - /* Byte results not supported, since they have no decimal property. */ - PyErr_SetString(PyExc_TypeError, "error handler should return unicode"); - Py_DECREF(repunicode); - goto onError; - } - /* generate replacement */ - repsize = PyUnicode_GET_SIZE(repunicode); - for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) { - Py_UNICODE ch = *uni2; - if (Py_UNICODE_ISSPACE(ch)) - *output++ = ' '; - else { - decimal = Py_UNICODE_TODECIMAL(ch); - if (decimal >= 0) - *output++ = '0' + decimal; - else if (0 < ch && ch < 256) - *output++ = (char)ch; - else { - Py_DECREF(repunicode); - raise_encode_exception(&exc, encoding, - s, length, collstart-s, collend-s, reason); - goto onError; - } - } - } - p = s + newpos; - Py_DECREF(repunicode); - } + startpos = p-s; + errorHandler = NULL; + exc = NULL; + raise_encode_exception(&exc, "decimal", s, length, + startpos, startpos+1, + "invalid decimal Unicode string"); + Py_XDECREF(exc); + Py_XDECREF(errorHandler); + return -1; } /* 0-terminate the output string */ *output++ = '\0'; - Py_XDECREF(exc); - Py_XDECREF(errorHandler); return 0; - - onError: - Py_XDECREF(exc); - Py_XDECREF(errorHandler); - return -1; } /* --- Helpers ------------------------------------------------------------ */