diff -r d428b4ee8b39 Lib/json/encoder.py --- a/Lib/json/encoder.py Fri Jan 09 16:00:30 2015 +0100 +++ b/Lib/json/encoder.py Sun Jan 11 06:06:13 2015 +0900 @@ -7,6 +7,10 @@ except ImportError: c_encode_basestring_ascii = None try: + from _json import encode_basestring as c_encode_basestring +except ImportError: + c_encode_basestring = None +try: from _json import make_encoder as c_make_encoder except ImportError: c_make_encoder = None @@ -30,7 +34,7 @@ INFINITY = float('inf') FLOAT_REPR = repr -def encode_basestring(s): +def py_encode_basestring(s): """Return a JSON representation of a Python string """ @@ -39,6 +43,9 @@ return '"' + ESCAPE.sub(replace, s) + '"' +encode_basestring = (c_encode_basestring or py_encode_basestring) + + def py_encode_basestring_ascii(s): """Return an ASCII-only JSON representation of a Python string diff -r d428b4ee8b39 Lib/test/test_json/test_encode_basestring.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_json/test_encode_basestring.py Sun Jan 11 06:06:13 2015 +0900 @@ -0,0 +1,44 @@ +from collections import OrderedDict +from test.test_json import PyTest, CTest + + +CASES = [ + # Left side is Copied from test_encode_basestring_ascii + ('/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'), + ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"'), + ('controls', '"controls"'), + ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + ('{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'), + (' s p a c e d ', '" s p a c e d "'), + ('\U0001d120', '"\U0001d120"'), + ('\u03b1\u03a9', '"\u03b1\u03a9"'), + ("`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), + ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), + ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\u0123\u4567\u89ab\ucdef\uabcd\uef4a"'), + # Edge cases + ('\u0000\u001f\u007f\u0080\uffff\U00010000\U0010ffff', '"\\u0000\\u001f\x7f\u0080\uffff\U00010000\U0010ffff"'), +] + +class TestEncodeBasestring: + def test_encode_basestring(self): + fname = self.json.encoder.encode_basestring.__name__ + for input_string, expect in CASES: + result = self.json.encoder.encode_basestring(input_string) + self.assertEqual(result, expect, + '{0!r} != {1!r} for {2}({3!r})'.format( + result, expect, fname, input_string)) + + def test_ordered_dict(self): + # See issue 6105 + items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] + s = self.dumps(OrderedDict(items), ensure_ascii=False) + self.assertEqual(s, '{"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}') + + def test_sorted_dict(self): + items = [('one', 1), ('two', 2), ('three', 3), ('four', 4), ('five', 5)] + s = self.dumps(dict(items), sort_keys=True, ensure_ascii=False) + self.assertEqual(s, '{"five": 5, "four": 4, "one": 1, "three": 3, "two": 2}') + + +class TestPyEncodeBasestring(TestEncodeBasestring, PyTest): pass +class TestCEncodeBasestring(TestEncodeBasestring, CTest): pass diff -r d428b4ee8b39 Lib/test/test_json/test_encode_basestring_ascii.py --- a/Lib/test/test_json/test_encode_basestring_ascii.py Fri Jan 09 16:00:30 2015 +0100 +++ b/Lib/test/test_json/test_encode_basestring_ascii.py Sun Jan 11 06:06:13 2015 +0900 @@ -11,9 +11,6 @@ (' s p a c e d ', '" s p a c e d "'), ('\U0001d120', '"\\ud834\\udd20"'), ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), - ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), ("`1~!@#$%^&*()_+-={':[,]}|;.?", '"`1~!@#$%^&*()_+-={\':[,]}|;.?"'), ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), diff -r d428b4ee8b39 Modules/_json.c --- a/Modules/_json.c Fri Jan 09 16:00:30 2015 +0100 +++ b/Modules/_json.c Sun Jan 11 06:06:13 2015 +0900 @@ -47,7 +47,7 @@ PyObject *item_separator; PyObject *sort_keys; PyObject *skipkeys; - int fast_encode; + PyCFunction fast_encode; int allow_nan; } PyEncoderObject; @@ -218,6 +218,97 @@ return rval; } +static PyObject * +escape_unicode(PyObject *pystr) +{ + /* Take a PyUnicode pystr and return a new escaped PyUnicode */ + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + void *input; + int kind; + Py_UCS4 maxchar; + + if (PyUnicode_READY(pystr) == -1) + return NULL; + + maxchar = PyUnicode_MAX_CHAR_VALUE(pystr); + input_chars = PyUnicode_GET_LENGTH(pystr); + input = PyUnicode_DATA(pystr); + kind = PyUnicode_KIND(pystr); + + /* Compute the output size */ + for (i = 0, output_size = 2; i < input_chars; i++) { + Py_UCS4 c = PyUnicode_READ(kind, input, i); + switch (c) { + case '\\': case '"': case '\b': case '\f': + case '\n': case '\r': case '\t': + output_size += 2; + break; + default: + if (c <= 0x1f) + output_size += 6; + else + output_size++; + } + } + + rval = PyUnicode_New(output_size, maxchar); + if (rval == NULL) + return NULL; + + kind = PyUnicode_KIND(rval); + +#define ENCODE_OUTPUT do { \ + chars = 0; \ + output[chars++] = '"'; \ + for (i = 0; i < input_chars; i++) { \ + Py_UCS4 c = PyUnicode_READ(kind, input, i); \ + switch (c) { \ + case '\\': output[chars++] = '\\'; output[chars++] = c; break; \ + case '"': output[chars++] = '\\'; output[chars++] = c; break; \ + case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \ + case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \ + case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \ + case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \ + case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \ + default: \ + if (c <= 0x1f) { \ + output[chars++] = '\\'; \ + output[chars++] = 'u'; \ + output[chars++] = '0'; \ + output[chars++] = '0'; \ + output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \ + output[chars++] = Py_hexdigits[(c ) & 0xf]; \ + } else { \ + output[chars++] = c; \ + } \ + } \ + } \ + output[chars++] = '"'; \ + } while (0) + + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval); + ENCODE_OUTPUT; + } else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval); + ENCODE_OUTPUT; + } else { + Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval); + assert(kind == PyUnicode_4BYTE_DATA); + ENCODE_OUTPUT; + } +#undef ENCODE_OUTPUT + +#ifdef Py_DEBUG + assert(_PyUnicode_CheckConsistency(rval, 1)); +#endif + return rval; +} + static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { @@ -530,6 +621,31 @@ return rval; } + +PyDoc_STRVAR(pydoc_encode_basestring, + "encode_basestring(string) -> string\n" + "\n" + "Return a JSON representation of a Python string" +); + +static PyObject * +py_encode_basestring(PyObject* self UNUSED, PyObject *pystr) +{ + PyObject *rval; + /* Return a JSON representation of a Python string */ + /* METH_O */ + if (PyUnicode_Check(pystr)) { + rval = escape_unicode(pystr); + } + else { + PyErr_Format(PyExc_TypeError, + "first argument must be a string, not %.80s", + Py_TYPE(pystr)->tp_name); + return NULL; + } + return rval; +} + static void scanner_dealloc(PyObject *self) { @@ -1223,7 +1339,14 @@ s->item_separator = item_separator; s->sort_keys = sort_keys; s->skipkeys = skipkeys; - s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); + s->fast_encode = NULL; + if (PyCFunction_Check(s->encoder)) { + PyCFunction f = PyCFunction_GetFunction(s->encoder); + if (f == (PyCFunction)py_encode_basestring_ascii || + f == (PyCFunction)py_encode_basestring) { + s->fast_encode = f; + } + } s->allow_nan = PyObject_IsTrue(allow_nan); Py_INCREF(s->markers); @@ -1372,7 +1495,7 @@ { /* Return the JSON representation of a string */ if (s->fast_encode) - return py_encode_basestring_ascii(NULL, obj); + return s->fast_encode(NULL, obj); else return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); } @@ -1840,6 +1963,10 @@ (PyCFunction)py_encode_basestring_ascii, METH_O, pydoc_encode_basestring_ascii}, + {"encode_basestring", + (PyCFunction)py_encode_basestring, + METH_O, + pydoc_encode_basestring}, {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,