diff -r 60b18d7fbe24 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sat Apr 06 16:48:55 2013 +0200 +++ b/Lib/test/test_unicode.py Sat Apr 06 15:04:44 2013 -0400 @@ -929,16 +929,24 @@ class UnicodeTest(string_tests.CommonTes # Non-ASCII self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"), 'ABC\u0410\u0411\u0412') self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"), 'ABC') self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"), '') + self.assertEqual("{[{}]}".format({"{}" : "a"}), "a") + self.assertEqual("{[{]}".format({"{" : "a"}), "a") + self.assertEqual("{[}]}".format({"}" : "a"}), "a") + self.assertEqual("{[[]}".format({"[" : "a"}), "a") + self.assertRaises(ValueError, "{a{}b}".format, 42) + self.assertRaises(ValueError, "{a{b}".format, 42) + self.assertRaises(ValueError, "{[}".format, 42) + def test_format_map(self): self.assertEqual(''.format_map({}), '') self.assertEqual('a'.format_map({}), 'a') self.assertEqual('ab'.format_map({}), 'ab') self.assertEqual('a{{'.format_map({}), 'a{') self.assertEqual('a}}'.format_map({}), 'a}') self.assertEqual('{{b'.format_map({}), '{b') self.assertEqual('}}b'.format_map({}), '}b') diff -r 60b18d7fbe24 Objects/stringlib/unicode_format.h --- a/Objects/stringlib/unicode_format.h Sat Apr 06 16:48:55 2013 +0200 +++ b/Objects/stringlib/unicode_format.h Sat Apr 06 15:04:44 2013 -0400 @@ -538,78 +538,116 @@ render_field(PyObject *fieldobj, SubStri done: Py_XDECREF(format_spec_object); Py_XDECREF(result); return ok; } static int parse_field(SubString *str, SubString *field_name, SubString *format_spec, - Py_UCS4 *conversion) + int *format_spec_needs_expanding, Py_UCS4 *conversion) { /* Note this function works if the field name is zero length, which is good. Zero length field names are handled later, in field_name_split. */ Py_UCS4 c = 0; + int in_bracket = 0; /* initialize these, as they may be empty */ *conversion = '\0'; SubString_init(format_spec, NULL, 0, 0); /* Search for the field name. it's terminated by the end of the string, or a ':' or '!' */ field_name->str = str->str; field_name->start = str->start; while (str->start < str->end) { switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + if (!in_bracket) { + PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); + return 0; + } + continue; + case '}': + if (!in_bracket) + break; + continue; + case '[': + in_bracket = 1; + continue; + case ']': + in_bracket = 0; + continue; case ':': case '!': break; default: continue; } break; } + field_name->end = str->start - 1; if (c == '!' || c == ':') { + Py_ssize_t count; /* we have a format specifier and/or a conversion */ /* don't include the last character */ - field_name->end = str->start-1; - - /* the format specifier is the rest of the string */ - format_spec->str = str->str; - format_spec->start = str->start; - format_spec->end = str->end; /* see if there's a conversion specifier */ if (c == '!') { /* there must be another character present */ - if (format_spec->start >= format_spec->end) { + if (str->start >= str->end) { PyErr_SetString(PyExc_ValueError, - "end of format while looking for conversion " + "end of string while looking for conversion " "specifier"); return 0; } - *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + *conversion = PyUnicode_READ_CHAR(str->str, str->start++); - /* if there is another character, it must be a colon */ - if (format_spec->start < format_spec->end) { - c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + if (str->start < str->end) { + c = PyUnicode_READ_CHAR(str->str, str->start++); + if (c == '}') + return 1; if (c != ':') { PyErr_SetString(PyExc_ValueError, - "expected ':' after format specifier"); + "expected ':' after conversion specifier"); return 0; } } } + format_spec->str = str->str; + format_spec->start = str->start; + count = 1; + while (str->start < str->end) { + switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count == 0) { + format_spec->end = str->start - 1; + return 1; + } + break; + default: + break; + } + } + + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); + return 0; } - else - /* end of string, there's no format_spec or conversion */ - field_name->end = str->start; + else if (c != '}') { + PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); + return 0; + } return 1; } /************************************************************************/ /******* Output string allocation and escape-to-markup processing ******/ /************************************************************************/ @@ -636,17 +674,16 @@ static int MarkupIterator_next(MarkupIterator *self, SubString *literal, int *field_present, SubString *field_name, SubString *format_spec, Py_UCS4 *conversion, int *format_spec_needs_expanding) { int at_end; Py_UCS4 c = 0; Py_ssize_t start; - int count; Py_ssize_t len; int markup_follows = 0; /* initialize all of the output variables */ SubString_init(literal, NULL, 0, 0); SubString_init(field_name, NULL, 0, 0); SubString_init(format_spec, NULL, 0, 0); *conversion = '\0'; @@ -708,54 +745,22 @@ MarkupIterator_next(MarkupIterator *self /* record the literal text */ literal->str = self->str.str; literal->start = start; literal->end = start + len; if (!markup_follows) return 2; - /* this is markup, find the end of the string by counting nested - braces. note that this prohibits escaped braces, so that - format_specs cannot have braces in them. */ + /* this is markup; parse the field */ *field_present = 1; - count = 1; - - start = self->str.start; - - /* we know we can't have a zero length string, so don't worry - about that case */ - while (self->str.start < self->str.end) { - switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { - case '{': - /* the format spec needs to be recursively expanded. - this is an optimization, and not strictly needed */ - *format_spec_needs_expanding = 1; - count++; - break; - case '}': - count--; - if (count <= 0) { - /* we're done. parse and get out */ - SubString s; - - SubString_init(&s, self->str.str, start, self->str.start - 1); - if (parse_field(&s, field_name, format_spec, conversion) == 0) - return 0; - - /* success */ - return 2; - } - break; - } - } - - /* end of string while searching for matching '}' */ - PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); - return 0; + if (!parse_field(&self->str, field_name, format_spec, + format_spec_needs_expanding, conversion)) + return 0; + return 2; } /* do the !r or !s conversion on obj */ static PyObject * do_conversion(PyObject *obj, Py_UCS4 conversion) { /* XXX in pre-3.0, do we need to convert this to unicode, since it