diff -r 5e6964705419 -r 622d60957d78 Doc/library/string.rst --- a/Doc/library/string.rst Tue May 22 21:05:30 2012 +0300 +++ b/Doc/library/string.rst Wed May 23 22:22:35 2012 -0700 @@ -198,7 +198,7 @@ arg_name: [`identifier` | `integer`] attribute_name: `identifier` element_index: `integer` | `index_string` - index_string: + + index_string: + conversion: "r" | "s" | "a" format_spec: diff -r 5e6964705419 -r 622d60957d78 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Tue May 22 21:05:30 2012 +0300 +++ b/Lib/test/test_unicode.py Wed May 23 22:22:35 2012 -0700 @@ -778,6 +778,10 @@ self.assertEqual("{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz') self.assertEqual("{0[foo bar]}".format({'foo bar':'baz'}), 'baz') self.assertEqual("{0[ ]}".format({' ':3}), '3') + self.assertEqual("{0[:!]}".format({':!':3}), '3') + # weird field names, but format spec etc. still work + self.assertEqual("{0[!]!r}".format({'!':'3'}), repr('3')) + self.assertEqual("{0[:]::>3}".format({':':3}), '::3') self.assertEqual('{foo._x}'.format(foo=C(20)), '20') self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010') @@ -879,16 +883,16 @@ self.assertRaises(ValueError, "}{".format) self.assertRaises(ValueError, "abc{0:{}".format) self.assertRaises(ValueError, "{0".format) - self.assertRaises(IndexError, "{0.}".format) + self.assertRaises(ValueError, "{0.}".format) self.assertRaises(ValueError, "{0.}".format, 0) - self.assertRaises(IndexError, "{0[}".format) + self.assertRaises(ValueError, "{0[}".format) self.assertRaises(ValueError, "{0[}".format, []) - self.assertRaises(KeyError, "{0]}".format) + self.assertRaises(ValueError, "{0]}".format) self.assertRaises(ValueError, "{0.[]}".format, 0) self.assertRaises(ValueError, "{0..foo}".format, 0) self.assertRaises(ValueError, "{0[0}".format, 0) self.assertRaises(ValueError, "{0[0:foo}".format, 0) - self.assertRaises(KeyError, "{c]}".format) + self.assertRaises(ValueError, "{c]}".format) self.assertRaises(ValueError, "{{ {{{0}}".format, 0) self.assertRaises(ValueError, "{0}}".format, 0) self.assertRaises(KeyError, "{foo}".format, bar=3) @@ -902,13 +906,18 @@ big = "23098475029384702983476098230754973209482573" self.assertRaises(ValueError, ("{" + big + "}").format) self.assertRaises(ValueError, ("{[" + big + "]}").format, [0]) + # non-number, non-identifier arg name + self.assertRaises(ValueError, "{ [0]}".format, [0]) + # non-identifier field name + self.assertRaises(ValueError, "{0.4}".format, 0) # issue 6089 self.assertRaises(ValueError, "{0[0]x}".format, [None]) self.assertRaises(ValueError, "{0[0](10)}".format, [None]) - # can't have a replacement on the field name portion - self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4) + # can't have { or } in the field name portion + self.assertRaises(ValueError, '{0[{1}]}'.format, 'abcdefg', 4) + self.assertRaises(ValueError, '{0[{][}]}'.format, {'{':{'}':4}}) # exceed maximum recursion depth self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '') diff -r 5e6964705419 -r 622d60957d78 Misc/ACKS --- a/Misc/ACKS Tue May 22 21:05:30 2012 +0300 +++ b/Misc/ACKS Wed May 23 22:22:35 2012 -0700 @@ -1124,6 +1124,7 @@ Klaus-Juergen Wolf Dan Wolfe Richard Wolff +Ben Wolfson Adam Woodbeck Gordon Worley Darren Worrall diff -r 5e6964705419 -r 622d60957d78 Objects/stringlib/unicode_format.h --- a/Objects/stringlib/unicode_format.h Tue May 22 21:05:30 2012 +0300 +++ b/Objects/stringlib/unicode_format.h Wed May 23 22:22:35 2012 -0700 @@ -554,6 +554,47 @@ } static int +verify_integer(Py_ssize_t start, Py_ssize_t end, PyObject* str) +{ + Py_UCS4 c = PyUnicode_READ_CHAR(str, start); + if (start == end - 1 && c >= '0' && c <= '9') return 1; + if (c < '1' || c > '9') return 0; + while (start++ < end) { + c = PyUnicode_READ_CHAR(str, start); + if (c < '0' || c > '9') return 0; + } + return 1; +} + +static int +verify_identifier(Py_ssize_t start, Py_ssize_t end, PyObject* str) +{ + PyObject *ident; + int result; + ident = PyUnicode_Substring(str, start, end); + if (!ident) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) + PyErr_Clear(); + return 0; + } + result = PyUnicode_IsIdentifier(ident); + Py_DECREF(ident); + return result; +} + +static int +check_arg_name(Py_ssize_t start, Py_ssize_t end, PyObject* str) +{ + int result; + if (start == end) result = 1; + else result = verify_integer(start, end, str) || verify_identifier(start, end, str); + if (!result) + PyErr_SetString(PyExc_ValueError, "Argument name in format string " + "must be an identifier or an integer"); + return result; +} + +static int parse_field(SubString *str, SubString *field_name, SubString *format_spec, Py_UCS4 *conversion) { @@ -573,6 +614,12 @@ field_name->start = str->start; while (str->start < str->end) { switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '[': + /* we already know that only legal characters are in the + field_name, so just skip ahead to the matching ]. */ + while (str->start < str->end && PyUnicode_READ_CHAR(str->str, str->start) != ']') + str->start++; + continue; case ':': case '!': break; @@ -655,7 +702,10 @@ Py_ssize_t start; int count; Py_ssize_t len; + Py_ssize_t ident_start; int markup_follows = 0; + int arg_name_done = 0; + int field_name_done = 0; /* initialize all of the output variables */ SubString_init(literal, NULL, 0, 0); @@ -737,13 +787,82 @@ about that case */ while (self->str.start < self->str.end) { switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { + case '[': + if (!arg_name_done) { + arg_name_done = 1; + if (!check_arg_name(start, self->str.start-1, self->str.str)) + return 0; + } + while (self->str.start < self->str.end) { + switch (PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { + case '{': case '}': + PyErr_SetString(PyExc_ValueError, "Field index in format " + "string cannot contain '{' or '}'"); + return 0; + case ']': break; + default: continue; + } + break; + } + break; case '{': /* the format spec needs to be recursively expanded. this is an optimization, and not strictly needed */ *format_spec_needs_expanding = 1; count++; break; + case '.': + if (!arg_name_done) { + arg_name_done = 1; + if (!check_arg_name(start, self->str.start-1, self->str.str)) + return 0; + } + if (!field_name_done) { + ident_start = self->str.start; + switch(PyUnicode_READ_CHAR(self->str.str, self->str.start)) { + case '[': case '.': + case '!': case '}': + case ':': + PyErr_SetString(PyExc_ValueError, "Attribute name " + "in format string " + "must be nonempty"); + return 0; + } + while(self->str.start < self->str.end) { + switch(PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { + case '[': case '.': + case '!': case '}': + case ':': + if (!verify_identifier(ident_start, self->str.start-1, self->str.str)) { + PyErr_SetString(PyExc_ValueError, "Attribute name " + "in format string " + "must be an identifier"); + return 0; + } + self->str.start--; + break; + default: + continue; + } + break; + } + } + break; + case ':': + case '!': + if (!arg_name_done) { + arg_name_done = 1; + if (!check_arg_name(start, self->str.start-1, self->str.str)) + return 0; + } + field_name_done = 1; + break; case '}': + if (!arg_name_done) { + arg_name_done = 1; + if (!check_arg_name(start, self->str.start-1, self->str.str)) + return 0; + } count--; if (count <= 0) { /* we're done. parse and get out */