diff -r 9927781e457f Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Mon Dec 15 14:02:43 2014 +0200 +++ b/Lib/test/test_unicode.py Tue Dec 16 13:52:24 2014 +0200 @@ -1700,6 +1700,9 @@ class UnicodeTest( if sys.maxunicode > 0xffff: check_format(u'\U0010ffff', b'%c', c_int(0x10ffff)) + else: + with self.assertRaises(OverflowError): + PyUnicode_FromFormat(b'%c', c_int(0x10000)) with self.assertRaises(OverflowError): PyUnicode_FromFormat(b'%c', c_int(0x110000)) # Issue #18183 @@ -1750,8 +1753,45 @@ class UnicodeTest( b'%zu', c_size_t(123)) # test long output + min_long = -(2 ** (8 * sizeof(c_long) - 1)) + max_long = -min_long - 1 + check_format(unicode(min_long), + b'%ld', c_long(min_long)) + check_format(unicode(max_long), + b'%ld', c_long(max_long)) + max_ulong = 2 ** (8 * sizeof(c_ulong)) - 1 + check_format(unicode(max_ulong), + b'%lu', c_ulong(max_ulong)) PyUnicode_FromFormat(b'%p', c_void_p(-1)) + # test padding (width and/or precision) + check_format(u'123'.rjust(10, u'0'), + b'%010i', c_int(123)) + check_format(u'123'.rjust(100), + b'%100i', c_int(123)) + check_format(u'123'.rjust(100, u'0'), + b'%.100i', c_int(123)) + check_format(u'123'.rjust(80, u'0').rjust(100), + b'%100.80i', c_int(123)) + + check_format(u'123'.rjust(10, u'0'), + b'%010u', c_uint(123)) + check_format(u'123'.rjust(100), + b'%100u', c_uint(123)) + check_format(u'123'.rjust(100, u'0'), + b'%.100u', c_uint(123)) + check_format(u'123'.rjust(80, u'0').rjust(100), + b'%100.80u', c_uint(123)) + + check_format(u'123'.rjust(10, u'0'), + b'%010x', c_int(0x123)) + check_format(u'123'.rjust(100), + b'%100x', c_int(0x123)) + check_format(u'123'.rjust(100, u'0'), + b'%.100x', c_int(0x123)) + check_format(u'123'.rjust(80, u'0').rjust(100), + b'%100.80x', c_int(0x123)) + # test %V check_format(u'repr=abc', b'repr=%V', u'abc', b'xyz') diff -r 9927781e457f Misc/NEWS --- a/Misc/NEWS Mon Dec 15 14:02:43 2014 +0200 +++ b/Misc/NEWS Tue Dec 16 13:52:24 2014 +0200 @@ -10,6 +10,9 @@ What's New in Python 2.7.10? Core and Builtins ----------------- +- Issue #23055: Fixed a buffer overflow in PyUnicode_FromFormatV. Analysis + and fix by Guido Vranken. + - Issue #23048: Fix jumping out of an infinite while loop in the pdb. Library diff -r 9927781e457f Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Dec 15 14:02:43 2014 +0200 +++ b/Objects/unicodeobject.c Tue Dec 16 13:52:24 2014 +0200 @@ -735,15 +735,10 @@ PyUnicode_FromFormatV(const char *format * objects once during step 3 and put the result in an array) */ for (f = format; *f; f++) { if (*f == '%') { - if (*(f+1)=='%') - continue; - if (*(f+1)=='S' || *(f+1)=='R') - ++callcount; - while (isdigit((unsigned)*f)) - width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !isalpha((unsigned)*f)) - ; - if (*f == 's') + f++; + while (*f && *f != '%' && !isalpha((unsigned)*f)) + f++; + if (*f == 's' || *f=='S' || *f=='R') ++callcount; } } @@ -760,12 +755,16 @@ PyUnicode_FromFormatV(const char *format /* step 3: figure out how large a buffer we need */ for (f = format; *f; f++) { if (*f == '%') { - const char* p = f; + const char* p = f++; width = 0; while (isdigit((unsigned)*f)) width = (width*10) + *f++ - '0'; - while (*++f && *f != '%' && !isalpha((unsigned)*f)) - ; + precision = 0; + if (*f == '.') { + f++; + while (isdigit((unsigned)*f)) + precision = (precision*10) + *f++ - '0'; + } /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since * they don't affect the amount of space we reserve. @@ -800,6 +799,8 @@ PyUnicode_FromFormatV(const char *format break; case 'd': case 'u': case 'i': case 'x': (void) va_arg(count, int); + if (width < precision) + width = precision; /* 20 bytes is enough to hold a 64-bit integer. Decimal takes the most space. This isn't enough for octal.