diff -r dc721353b2e2 Doc/c-api/unicode.rst --- a/Doc/c-api/unicode.rst Sat Oct 06 23:55:33 2012 +0200 +++ b/Doc/c-api/unicode.rst Sun Oct 07 23:01:43 2012 +0200 @@ -516,8 +516,7 @@ APIs: | | | :c:func:`PyObject_Repr`. | +-------------------+---------------------+--------------------------------+ - An unrecognized format character causes all the rest of the format string to be - copied as-is to the result string, and any extra arguments discarded. + Raise a :exc:`ValueError` if the format string is invalid. .. note:: @@ -530,6 +529,12 @@ APIs: .. versionchanged:: 3.3 Support for ``"%li"``, ``"%lli"`` and ``"%zi"`` added. + .. versionchanged:: 3.4 + Raise a :exc:`ValueError` if the format string is invalid, instead of + copying the rest of the format string as-is to the result string (and + discard any extra arguments). + + .. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs) diff -r dc721353b2e2 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sat Oct 06 23:55:33 2012 +0200 +++ b/Lib/test/test_unicode.py Sun Oct 07 23:01:43 2012 +0200 @@ -1746,7 +1746,6 @@ class UnicodeTest(string_tests.CommonTes self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') # test "%" - self.assertEqual(PyUnicode_FromFormat(b'%'), '%') self.assertEqual(PyUnicode_FromFormat(b'%%'), '%') self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s') self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]') @@ -1813,12 +1812,14 @@ class UnicodeTest(string_tests.CommonTes text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') self.assertEqual(text, 'repr=abc\ufffd') - # not supported: copy the raw format string. these tests are just here - # to check for crashs and should not be considered as specifications - self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s') - self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc') - self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i') - self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s') + # invalid format string + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%.s', b'abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%.3', b'abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%1%s', b'abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%1abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%+i', c_int(10)) + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%.%s', b'abc') # Test PyUnicode_AsWideChar() def test_aswidechar(self): diff -r dc721353b2e2 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Oct 06 23:55:33 2012 +0200 +++ b/Objects/unicodeobject.c Sun Oct 07 23:01:43 2012 +0200 @@ -2349,6 +2349,15 @@ unicode_fromformat_arg(_PyUnicodeWriter p = f; f++; + if (*f == '%') { + if (_PyUnicodeWriter_Prepare(writer, 1, '%') == 1) + return NULL; + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); + writer->pos++; + f++; + return f; + } + zeropad = 0; if (*f == '0') { zeropad = 1; @@ -2369,6 +2378,8 @@ unicode_fromformat_arg(_PyUnicodeWriter precision = 0; if (*f == '.') { f++; + if (!Py_ISDIGIT((unsigned)*f)) + goto invalid_format; while (Py_ISDIGIT((unsigned)*f)) { if (precision > (INT_MAX - ((int)*f - '0')) / 10) { PyErr_SetString(PyExc_ValueError, @@ -2378,14 +2389,6 @@ unicode_fromformat_arg(_PyUnicodeWriter precision = (precision*10) + (*f - '0'); f++; } - if (*f == '%') { - /* "%.3%s" => f points to "3" */ - f--; - } - } - if (*f == '\0') { - /* bogus format "%.123" => go backward, f points to "3" */ - f--; } /* Handle %ld, %lu, %lld and %llu. */ @@ -2411,7 +2414,7 @@ unicode_fromformat_arg(_PyUnicodeWriter ++f; } - if (f[1] == '\0') + if (f[0] != '\0' && f[1] == '\0') writer->overallocate = 0; switch (*f) { @@ -2626,27 +2629,18 @@ unicode_fromformat_arg(_PyUnicodeWriter break; } - case '%': - if (_PyUnicodeWriter_Prepare(writer, 1, '%') == 1) - return NULL; - PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '%'); - writer->pos++; - break; - default: - /* if we stumble upon an unknown formatting code, copy the rest - of the format string to the output string. (we cannot just - skip the code, since there's no way to know what's in the - argument list) */ - len = strlen(p); - if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) - return NULL; - f = p+len; - return f; + goto invalid_format; } f++; return f; + +invalid_format: + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() got an invalid format string near \"%.10s\"", + p); + return NULL; } PyObject *