diff -r 6762b943ee59 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Tue Apr 17 21:42:07 2012 -0400 +++ b/Lib/test/test_unicode.py Mon Apr 23 16:25:13 2012 +0200 @@ -924,6 +924,14 @@ class UnicodeTest(string_tests.CommonTes self.assertRaises(ValueError, format, '', '#') self.assertRaises(ValueError, format, '', '#20') + # Non-ASCII + self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"), + 'ABC\u0410\u0411\u0412') + self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"), + 'ABC') + self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"), + '') + def test_format_map(self): self.assertEqual(''.format_map({}), '') self.assertEqual('a'.format_map({}), 'a') diff -r 6762b943ee59 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Apr 17 21:42:07 2012 -0400 +++ b/Objects/unicodeobject.c Mon Apr 23 16:25:13 2012 +0200 @@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, cons } } +Py_UCS4 +_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) +{ + enum PyUnicode_Kind kind; + void *startptr, *endptr; + + assert(PyUnicode_IS_READY(unicode)); + assert(0 <= start); + assert(end <= PyUnicode_GET_LENGTH(unicode)); + assert(start <= end); + + if (start == 0 && end == PyUnicode_GET_LENGTH(unicode)) + return PyUnicode_MAX_CHAR_VALUE(unicode); + + if (start == end) + return 127; + + kind = PyUnicode_KIND(unicode); + startptr = PyUnicode_DATA(unicode); + endptr = (char*)startptr + end * kind; + if (start) + startptr = (char*)startptr + start * kind; + switch(kind) + { + case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr); + case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr); + default: + case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr); + } +} + /* Ensure that a string uses the most efficient storage, if it is not the case: create a new string with of the right kind. Write NULL into *p_unicode on error. */ diff -r 6762b943ee59 Python/formatter_unicode.c --- a/Python/formatter_unicode.c Tue Apr 17 21:42:07 2012 -0400 +++ b/Python/formatter_unicode.c Mon Apr 23 16:25:13 2012 +0200 @@ -713,10 +713,10 @@ format_string_internal(PyObject *value, Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; - Py_ssize_t pos; + Py_ssize_t i, pos; Py_ssize_t len = PyUnicode_GET_LENGTH(value); PyObject *result = NULL; - Py_UCS4 maxchar = 127; + Py_UCS4 ch, maxchar = 127; /* sign is not allowed on strings */ if (format->sign != '\0') { @@ -752,8 +752,12 @@ format_string_internal(PyObject *value, if (lpad != 0 || rpad != 0) maxchar = Py_MAX(maxchar, format->fill_char); + ch = _PyUnicode_FindMaxChar(value, 0, len); + maxchar = Py_MAX(maxchar, ch); + /* allocate the resulting string */ result = PyUnicode_New(total, maxchar); + printf("maxchar = 0x%x\n", maxchar); if (result == NULL) goto done;