diff -r e16ec3b468d1 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sat Oct 06 23:48:20 2012 +0200 +++ b/Lib/test/test_unicode.py Sat Oct 06 23:48:40 2012 +0200 @@ -1746,7 +1746,6 @@ class UnicodeTest(string_tests.CommonTes self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') # test "%" - self.assertEqual(PyUnicode_FromFormat(b'%'), '%') self.assertEqual(PyUnicode_FromFormat(b'%%'), '%') self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s') self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]') @@ -1813,12 +1812,12 @@ class UnicodeTest(string_tests.CommonTes text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff') self.assertEqual(text, 'repr=abc\ufffd') - # not supported: copy the raw format string. these tests are just here - # to check for crashs and should not be considered as specifications - self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s') - self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc') - self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i') - self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s') + # invalid format string + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%1%s', b'abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%1abc') + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%+i', c_int(10)) + self.assertRaises(ValueError, PyUnicode_FromFormat, b'%.%s', b'abc') # Test PyUnicode_AsWideChar() def test_aswidechar(self): diff -r e16ec3b468d1 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Oct 06 23:48:20 2012 +0200 +++ b/Objects/unicodeobject.c Sat Oct 06 23:48:40 2012 +0200 @@ -2374,14 +2374,6 @@ unicode_fromformat_arg(_PyUnicodeWriter precision = (precision*10) + (*f - '0'); f++; } - if (*f == '%') { - /* "%.3%s" => f points to "3" */ - f--; - } - } - if (*f == '\0') { - /* bogus format "%.123" => go backward, f points to "3" */ - f--; } /* Handle %ld, %lu, %lld and %llu. */ @@ -2407,6 +2399,11 @@ unicode_fromformat_arg(_PyUnicodeWriter ++f; } + if (f != p+1 && *f == '%') + goto invalid_format; + if (*f == '\0') + goto invalid_format; + if (f[1] == '\0') writer->overallocate = 0; @@ -2630,19 +2627,17 @@ unicode_fromformat_arg(_PyUnicodeWriter break; default: - /* if we stumble upon an unknown formatting code, copy the rest - of the format string to the output string. (we cannot just - skip the code, since there's no way to know what's in the - argument list) */ - len = strlen(p); - if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) - return NULL; - f = p+len; - return f; + goto invalid_format; } f++; return f; + +invalid_format: + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() got an invalid format string near \"%.10s\"", + p); + return NULL; } PyObject *