Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 88435) +++ Objects/unicodeobject.c (working copy) @@ -676,12 +676,12 @@ int zeropad, int width, int precision, char c) { *fmt++ = '%'; - if (width) { + if (width >= 0) { if (zeropad) *fmt++ = '0'; fmt += sprintf(fmt, "%d", width); } - if (precision) + if (precision >= 0) fmt += sprintf(fmt, ".%d", precision); if (longflag) *fmt++ = 'l'; @@ -719,6 +719,45 @@ plus 1 for the sign. 53/22 is an upper bound for log10(256). */ #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) +static PyObject * +unicode_format_align(PyObject *unicode, int width, int precision) +{ + PyObject *result; + Py_UNICODE *u; + Py_ssize_t i; + + assert(PyUnicode_Check(unicode)); + + if (precision < 0 || precision > PyUnicode_GET_SIZE(unicode)) { + if (width <= PyUnicode_GET_SIZE(unicode)) { + Py_INCREF(unicode); + return unicode; + } + else + precision = PyUnicode_GET_SIZE(unicode); + } + + if (precision < PyUnicode_GET_SIZE(unicode) && width <= precision) + return PySequence_GetSlice((PyObject*)unicode, 0, precision); + + result = PyUnicode_FromUnicode(NULL, width); + if (!result) + return NULL; + + // Add left-pad spaces + u = PyUnicode_AS_UNICODE(result); + for (i = 0; i < width - precision; i++) { + *u = (Py_UNICODE)' '; + u++; + } + + Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result) + width - precision, + PyUnicode_AS_UNICODE(unicode), precision); + + return result; +} + + PyObject * PyUnicode_FromFormatV(const char *format, va_list vargs) { @@ -733,6 +772,7 @@ const char* f; Py_UNICODE *s; PyObject *string; + PyObject *formatted; /* used by sprintf */ char buffer[ITEM_BUFFER_LEN+1]; /* use abuffer instead of buffer, if we need more space @@ -752,13 +792,11 @@ if (*f == '%') { if (*(f+1)=='%') continue; - if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A') - ++callcount; while (Py_ISDIGIT((unsigned)*f)) width = (width*10) + *f++ - '0'; while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) ; - if (*f == 's') + if (*f == 's' || *f == 'S' || *f == 'R' || *f == 'A') ++callcount; } else if (128 <= (unsigned char)*f) { @@ -785,10 +823,11 @@ #ifdef HAVE_LONG_LONG int longlongflag = 0; #endif - const char* p = f; + const char* p = f++; width = 0; while (Py_ISDIGIT((unsigned)*f)) width = (width*10) + *f++ - '0'; + f--; while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)) ; @@ -845,7 +884,7 @@ PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); if (!str) goto fail; - n += PyUnicode_GET_SIZE(str); + n += width > PyUnicode_GET_SIZE(str) ? width : PyUnicode_GET_SIZE(str); /* Remember the str and switch to the next slot */ *callresult++ = str; break; @@ -854,7 +893,7 @@ { PyObject *obj = va_arg(count, PyObject *); assert(obj && PyUnicode_Check(obj)); - n += PyUnicode_GET_SIZE(obj); + n += width > PyUnicode_GET_SIZE(obj) ? width : PyUnicode_GET_SIZE(obj); break; } case 'V': @@ -864,9 +903,9 @@ assert(obj || str); assert(!obj || PyUnicode_Check(obj)); if (obj) - n += PyUnicode_GET_SIZE(obj); + n += width > PyUnicode_GET_SIZE(obj) ? width : PyUnicode_GET_SIZE(obj); else - n += strlen(str); + n += width > strlen(str) ? width : strlen(str); break; } case 'S': @@ -877,7 +916,7 @@ str = PyObject_Str(obj); if (!str) goto fail; - n += PyUnicode_GET_SIZE(str); + n += width > PyUnicode_GET_SIZE(str) ? width : PyUnicode_GET_SIZE(str); /* Remember the str and switch to the next slot */ *callresult++ = str; break; @@ -890,7 +929,7 @@ repr = PyObject_Repr(obj); if (!repr) goto fail; - n += PyUnicode_GET_SIZE(repr); + n += width > PyUnicode_GET_SIZE(repr) ? width : PyUnicode_GET_SIZE(repr); /* Remember the repr and switch to the next slot */ *callresult++ = repr; break; @@ -903,7 +942,7 @@ ascii = PyObject_ASCII(obj); if (!ascii) goto fail; - n += PyUnicode_GET_SIZE(ascii); + n += width > PyUnicode_GET_SIZE(ascii) ? width : PyUnicode_GET_SIZE(ascii); /* Remember the repr and switch to the next slot */ *callresult++ = ascii; break; @@ -961,14 +1000,20 @@ int size_tflag = 0; zeropad = (*f == '0'); /* parse the width.precision part */ - width = 0; - while (Py_ISDIGIT((unsigned)*f)) + width = -1; + while (Py_ISDIGIT((unsigned)*f)) { + if (width == -1) + width = 0; width = (width*10) + *f++ - '0'; - precision = 0; + } + precision = -1; if (*f == '.') { f++; - while (Py_ISDIGIT((unsigned)*f)) + while (Py_ISDIGIT((unsigned)*f)) { + if (precision == -1) + precision = 0; precision = (precision*10) + *f++ - '0'; + } } /* Handle %ld, %lu, %lld and %llu. */ if (*f == 'l') { @@ -1039,11 +1084,16 @@ { /* unused, since we already have the result */ (void) va_arg(vargs, char *); - Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), - PyUnicode_GET_SIZE(*callresult)); - s += PyUnicode_GET_SIZE(*callresult); + formatted = unicode_format_align(*callresult, width, + precision); + Py_DECREF(*callresult); + if (!formatted) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(formatted), + PyUnicode_GET_SIZE(formatted)); + s += PyUnicode_GET_SIZE(formatted); /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); + Py_DECREF(formatted); /* switch to next unicode()/repr() result */ ++callresult; break; @@ -1051,9 +1101,12 @@ case 'U': { PyObject *obj = va_arg(vargs, PyObject *); - Py_ssize_t size = PyUnicode_GET_SIZE(obj); - Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); - s += size; + formatted = unicode_format_align(obj, width, precision); + if (!formatted) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(formatted), + PyUnicode_GET_SIZE(formatted)); + s += PyUnicode_GET_SIZE(formatted); break; } case 'V': @@ -1061,11 +1114,16 @@ PyObject *obj = va_arg(vargs, PyObject *); const char *str = va_arg(vargs, const char *); if (obj) { - Py_ssize_t size = PyUnicode_GET_SIZE(obj); - Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); - s += size; + formatted = unicode_format_align(obj, width, precision); + if (!formatted) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(formatted), + PyUnicode_GET_SIZE(formatted)); + s += PyUnicode_GET_SIZE(formatted); } else { - appendstring(str); + makefmt(fmt, 0, 0, 0, zeropad, width, precision, 's'); + sprintf(realbuffer, fmt, str); + appendstring(realbuffer); } break; } @@ -1078,12 +1136,16 @@ Py_ssize_t upos; /* unused, since we already have the result */ (void) va_arg(vargs, PyObject *); - ucopy = PyUnicode_AS_UNICODE(*callresult); - usize = PyUnicode_GET_SIZE(*callresult); + formatted = unicode_format_align(*callresult, width, precision); + Py_DECREF(*callresult); + if (!formatted) + goto fail; + ucopy = PyUnicode_AS_UNICODE(formatted); + usize = PyUnicode_GET_SIZE(formatted); for (upos = 0; upos forget it */ - Py_DECREF(*callresult); + Py_DECREF(formatted); /* switch to next unicode()/repr() result */ ++callresult; break; Index: Lib/test/test_unicode.py =================================================================== --- Lib/test/test_unicode.py (revision 88435) +++ Lib/test/test_unicode.py (working copy) @@ -1456,6 +1456,45 @@ text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") + # following tests is from #7330 + # test width modifier and precision modifier with %S + text = PyUnicode_FromFormat(b'repr=%5S', 'xx') + self.assertEqual(text, "repr= xx") + text = PyUnicode_FromFormat(b'repr=%.2S', 'xxx') + self.assertEqual(text, "repr=xx") + text = PyUnicode_FromFormat(b'repr=%5.2S', 'xxx') + self.assertEqual(text, "repr= xx") + + # test width modifier and precision modifier with %R + text = PyUnicode_FromFormat(b'repr=%5R', 'xx') + self.assertEqual(text, "repr= 'xx'") + text = PyUnicode_FromFormat(b'repr=%.2R', 'xxx') + self.assertEqual(text, "repr='x") + text = PyUnicode_FromFormat(b'repr=%5.2R', 'xxx') + self.assertEqual(text, "repr= 'x") + + # test width modifier and precision modifier with %A + text = PyUnicode_FromFormat(b'repr=%5A', 'xx') + self.assertEqual(text, "repr= 'xx'") + text = PyUnicode_FromFormat(b'repr=%.2A', 'xxx') + self.assertEqual(text, "repr='x") + text = PyUnicode_FromFormat(b'repr=%5.2A', 'xxx') + self.assertEqual(text, "repr= 'x") + + # test width modifier and precision modifier with %s + text = PyUnicode_FromFormat(b'repr=%5s', b'xx') + self.assertEqual(text, "repr= xx") + text = PyUnicode_FromFormat(b'repr=%.2s', b'xxx') + self.assertEqual(text, "repr=xx") + text = PyUnicode_FromFormat(b'repr=%5.2s', b'xxx') + self.assertEqual(text, "repr= xx") + + text = PyUnicode_FromFormat(b'repr=%.s', b'xxx') + self.assertEqual(text, "repr=xxx") + + text = PyUnicode_FromFormat(b'repr=%s', b'xxx') + self.assertEqual(text, "repr=xxx") + # Test PyUnicode_AsWideChar() def test_aswidechar(self): from _testcapi import unicode_aswidechar