diff -r 64bb01bce12c Doc/library/json.rst --- a/Doc/library/json.rst Sun Nov 30 20:39:04 2014 +0200 +++ b/Doc/library/json.rst Mon Dec 01 00:17:14 2014 +0200 @@ -250,7 +250,7 @@ Basic Usage will be passed to the constructor of the class. If the data being deserialized is not a valid JSON document, a - :exc:`ValueError` will be raised. + :exc:`JSONDecodeError` will be raised. .. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw) @@ -261,7 +261,7 @@ Basic Usage *encoding* which is ignored and deprecated. If the data being deserialized is not a valid JSON document, a - :exc:`ValueError` will be raised. + :exc:`JSONDecodeError` will be raised. Encoders and Decoders --------------------- @@ -334,13 +334,16 @@ Encoders and Decoders ``'\n'``, ``'\r'`` and ``'\0'``. If the data being deserialized is not a valid JSON document, a - :exc:`ValueError` will be raised. + :exc:`JSONDecodeError` will be raised. .. method:: decode(s) Return the Python representation of *s* (a :class:`str` instance containing a JSON document) + :exc:`JSONDecodeError` will be raised if the given JSON document is not + valid. + .. method:: raw_decode(s) Decode a JSON document from *s* (a :class:`str` beginning with a @@ -469,6 +472,36 @@ Encoders and Decoders mysocket.write(chunk) +Exceptions +---------- + +.. exception:: JSONDecodeError(msg, doc, pos, end=None) + + Subclass of :exc:`ValueError` with the following additional attributes: + + .. attribute:: msg + + The unformatted error message. + + .. attribute:: doc + + The JSON document being parsed. + + .. attribute:: pos + + The start index of *doc* where parsing failed. + + .. attribute:: lineno + + The line corresponding to *pos*. + + .. attribute:: colno + + The column corresponding to *pos*. + + .. versionadded:: 3.5 + + Standard Compliance and Interoperability ---------------------------------------- diff -r 64bb01bce12c Lib/json/__init__.py --- a/Lib/json/__init__.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/json/__init__.py Mon Dec 01 00:17:14 2014 +0200 @@ -98,12 +98,12 @@ Using json.tool from the shell to valida __version__ = '2.0.9' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', ] __author__ = 'Bob Ippolito ' -from .decoder import JSONDecoder +from .decoder import JSONDecoder, JSONDecodeError from .encoder import JSONEncoder _default_encoder = JSONEncoder( @@ -311,7 +311,8 @@ def loads(s, encoding=None, cls=None, ob raise TypeError('the JSON object must be str, not {!r}'.format( s.__class__.__name__)) if s.startswith(u'\ufeff'): - raise ValueError("Unexpected UTF-8 BOM (decode using utf-8-sig)") + raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", + s, 0) if (cls is None and object_hook is None and parse_int is None and parse_float is None and parse_constant is None and object_pairs_hook is None and not kw): diff -r 64bb01bce12c Lib/json/decoder.py --- a/Lib/json/decoder.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/json/decoder.py Mon Dec 01 00:17:14 2014 +0200 @@ -8,7 +8,7 @@ try: except ImportError: c_scanstring = None -__all__ = ['JSONDecoder'] +__all__ = ['JSONDecoder', 'JSONDecodeError'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL @@ -17,32 +17,30 @@ PosInf = float('inf') NegInf = float('-inf') -def linecol(doc, pos): - if isinstance(doc, bytes): - newline = b'\n' - else: - newline = '\n' - lineno = doc.count(newline, 0, pos) + 1 - if lineno == 1: - colno = pos + 1 - else: - colno = pos - doc.rindex(newline, 0, pos) - return lineno, colno +class JSONDecodeError(ValueError): + """Subclass of ValueError with the following additional properties: + msg: The unformatted error message + doc: The JSON document being parsed + pos: The start index of doc where parsing failed + lineno: The line corresponding to pos + colno: The column corresponding to pos -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _json - lineno, colno = linecol(doc, pos) - if end is None: - fmt = '{0}: line {1} column {2} (char {3})' - return fmt.format(msg, lineno, colno, pos) - #fmt = '%s: line %d column %d (char %d)' - #return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) + """ + # Note that this exception is used from _json + def __init__(self, msg, doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + colno = pos - doc.rfind('\n', 0, pos) + errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + ValueError.__init__(self, errmsg) + self.msg = msg + self.doc = doc + self.pos = pos + self.lineno = lineno + self.colno = colno + + def __reduce__(self): + return self.__class__, (self.msg, self.doc, self.pos) _CONSTANTS = { @@ -66,7 +64,7 @@ def _decode_uXXXX(s, pos): except ValueError: pass msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, pos)) + raise JSONDecodeError(msg, s, pos) def py_scanstring(s, end, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): @@ -84,8 +82,7 @@ def py_scanstring(s, end, strict=True, while 1: chunk = _m(s, end) if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError("Unterminated string starting at", s, begin) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters @@ -99,22 +96,21 @@ def py_scanstring(s, end, strict=True, if strict: #msg = "Invalid control character %r at" % (terminator,) msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError("Unterminated string starting at", s, begin) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: msg = "Invalid \\escape: {0!r}".format(esc) - raise ValueError(errmsg(msg, s, end)) + raise JSONDecodeError(msg, s, end) end += 1 else: uni = _decode_uXXXX(s, end) @@ -163,8 +159,8 @@ def JSONObject(s_and_end, strict, scan_o pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise ValueError(errmsg( - "Expecting property name enclosed in double quotes", s, end)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end) end += 1 while True: key, end = scanstring(s, end, strict) @@ -174,7 +170,7 @@ def JSONObject(s_and_end, strict, scan_o if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting ':' delimiter", s, end)) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 try: @@ -188,7 +184,7 @@ def JSONObject(s_and_end, strict, scan_o try: value, end = scan_once(s, end) except StopIteration as err: - raise ValueError(errmsg("Expecting value", s, err.value)) from None + raise JSONDecodeError("Expecting value", s, err.value) from None pairs_append((key, value)) try: nextchar = s[end] @@ -202,13 +198,13 @@ def JSONObject(s_and_end, strict, scan_o if nextchar == '}': break elif nextchar != ',': - raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) + raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar != '"': - raise ValueError(errmsg( - "Expecting property name enclosed in double quotes", s, end - 1)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", s, end - 1) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end @@ -232,7 +228,7 @@ def JSONArray(s_and_end, scan_once, _w=W try: value, end = scan_once(s, end) except StopIteration as err: - raise ValueError(errmsg("Expecting value", s, err.value)) from None + raise JSONDecodeError("Expecting value", s, err.value) from None _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -242,7 +238,7 @@ def JSONArray(s_and_end, scan_once, _w=W if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) + raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) try: if s[end] in _ws: end += 1 @@ -343,7 +339,7 @@ class JSONDecoder(object): obj, end = self.raw_decode(s, idx=_w(s, 0).end()) end = _w(s, end).end() if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) + raise JSONDecodeError("Extra data", s, end) return obj def raw_decode(self, s, idx=0): @@ -358,5 +354,5 @@ class JSONDecoder(object): try: obj, end = self.scan_once(s, idx) except StopIteration as err: - raise ValueError(errmsg("Expecting value", s, err.value)) from None + raise JSONDecodeError("Expecting value", s, err.value) from None return obj, end diff -r 64bb01bce12c Lib/test/test_json/__init__.py --- a/Lib/test/test_json/__init__.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/test/test_json/__init__.py Mon Dec 01 00:17:14 2014 +0200 @@ -9,12 +9,15 @@ from test import support # import json with and without accelerations cjson = support.import_fresh_module('json', fresh=['_json']) pyjson = support.import_fresh_module('json', blocked=['_json']) +# JSONDecodeError is cached inside the _json module +cjson.JSONDecodeError = cjson.decoder.JSONDecodeError = json.JSONDecodeError # create two base classes that will be used by the other tests class PyTest(unittest.TestCase): json = pyjson loads = staticmethod(pyjson.loads) dumps = staticmethod(pyjson.dumps) + JSONDecodeError = staticmethod(pyjson.JSONDecodeError) @unittest.skipUnless(cjson, 'requires _json') class CTest(unittest.TestCase): @@ -22,6 +25,7 @@ class CTest(unittest.TestCase): json = cjson loads = staticmethod(cjson.loads) dumps = staticmethod(cjson.dumps) + JSONDecodeError = staticmethod(cjson.JSONDecodeError) # test PyTest and CTest checking if the functions come from the right module class TestPyTest(PyTest): diff -r 64bb01bce12c Lib/test/test_json/test_decode.py --- a/Lib/test/test_json/test_decode.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/test/test_json/test_decode.py Mon Dec 01 00:17:14 2014 +0200 @@ -63,12 +63,12 @@ class TestDecode: def test_extra_data(self): s = '[1, 2, 3]5' msg = 'Extra data' - self.assertRaisesRegex(ValueError, msg, self.loads, s) + self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s) def test_invalid_escape(self): s = '["abc\\y"]' msg = 'escape' - self.assertRaisesRegex(ValueError, msg, self.loads, s) + self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s) def test_invalid_input_type(self): msg = 'the JSON object must be str' @@ -80,10 +80,10 @@ class TestDecode: def test_string_with_utf8_bom(self): # see #18958 bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8') - with self.assertRaises(ValueError) as cm: + with self.assertRaises(self.JSONDecodeError) as cm: self.loads(bom_json) self.assertIn('BOM', str(cm.exception)) - with self.assertRaises(ValueError) as cm: + with self.assertRaises(self.JSONDecodeError) as cm: self.json.load(StringIO(bom_json)) self.assertIn('BOM', str(cm.exception)) # make sure that the BOM is not detected in the middle of a string diff -r 64bb01bce12c Lib/test/test_json/test_fail.py --- a/Lib/test/test_json/test_fail.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/test/test_json/test_fail.py Mon Dec 01 00:17:14 2014 +0200 @@ -87,7 +87,7 @@ class TestFail: continue try: self.loads(doc) - except ValueError: + except self.JSONDecodeError: pass else: self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) @@ -124,10 +124,16 @@ class TestFail: ('"spam', 'Unterminated string starting at', 0), ] for data, msg, idx in test_cases: - self.assertRaisesRegex(ValueError, - r'^{0}: line 1 column {1} \(char {2}\)'.format( - re.escape(msg), idx + 1, idx), - self.loads, data) + with self.assertRaises(self.JSONDecodeError) as cm: + self.loads(data) + err = cm.exception + self.assertEqual(err.msg, msg) + self.assertEqual(err.pos, idx) + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, idx + 1) + errmsg = ('%s: line 1 column %d (char %d)' % + (msg, idx + 1, idx)) + self.assertRegex(str(err), '^' + re.escape(errmsg)) def test_unexpected_data(self): test_cases = [ @@ -154,10 +160,16 @@ class TestFail: ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11), ] for data, msg, idx in test_cases: - self.assertRaisesRegex(ValueError, - r'^{0}: line 1 column {1} \(char {2}\)'.format( - re.escape(msg), idx + 1, idx), - self.loads, data) + with self.assertRaises(self.JSONDecodeError) as cm: + self.loads(data) + err = cm.exception + self.assertEqual(err.msg, msg) + self.assertEqual(err.pos, idx) + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, idx + 1) + errmsg = ('%s: line 1 column %d (char %d)' % + (msg, idx + 1, idx)) + self.assertRegex(str(err), '^' + re.escape(errmsg)) def test_extra_data(self): test_cases = [ @@ -171,11 +183,15 @@ class TestFail: ('"spam",42', 'Extra data', 6), ] for data, msg, idx in test_cases: - self.assertRaisesRegex(ValueError, - r'^{0}: line 1 column {1} - line 1 column {2}' - r' \(char {3} - {4}\)'.format( - re.escape(msg), idx + 1, len(data) + 1, idx, len(data)), - self.loads, data) + with self.assertRaises(self.JSONDecodeError) as cm: + self.loads(data) + err = cm.exception + self.assertEqual(err.msg, msg) + self.assertEqual(err.pos, idx) + self.assertEqual(err.lineno, 1) + self.assertEqual(err.colno, idx + 1) + errmsg = '%s: line 1 column %d (char %d)' % (msg, idx + 1, idx) + self.assertRegex(str(err), '^' + re.escape(errmsg)) def test_linecol(self): test_cases = [ @@ -185,10 +201,16 @@ class TestFail: ('\n \n\n !', 4, 6, 10), ] for data, line, col, idx in test_cases: - self.assertRaisesRegex(ValueError, - r'^Expecting value: line {0} column {1}' - r' \(char {2}\)$'.format(line, col, idx), - self.loads, data) + with self.assertRaises(self.JSONDecodeError) as cm: + self.loads(data) + err = cm.exception + self.assertEqual(err.msg, 'Expecting value') + self.assertEqual(err.pos, idx) + self.assertEqual(err.lineno, line) + self.assertEqual(err.colno, col) + errmsg = ('Expecting value: line %s column %d' + ' (char %d)' % (line, col, idx)) + self.assertRegex(str(err), '^%s$' % re.escape(errmsg)) class TestPyFail(TestFail, PyTest): pass class TestCFail(TestFail, CTest): pass diff -r 64bb01bce12c Lib/test/test_json/test_scanstring.py --- a/Lib/test/test_json/test_scanstring.py Sun Nov 30 20:39:04 2014 +0200 +++ b/Lib/test/test_json/test_scanstring.py Mon Dec 01 00:17:14 2014 +0200 @@ -129,7 +129,7 @@ class TestScanstring: '"\\ud834\\u0X20"', ] for s in bad_escapes: - with self.assertRaises(ValueError, msg=s): + with self.assertRaises(self.JSONDecodeError, msg=s): scanstring(s, 1, True) def test_overflow(self): diff -r 64bb01bce12c Modules/_json.c --- a/Modules/_json.c Sun Nov 30 20:39:04 2014 +0200 +++ b/Modules/_json.c Mon Dec 01 00:17:14 2014 +0200 @@ -221,23 +221,22 @@ ascii_escape_unicode(PyObject *pystr) static void raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) { - /* Use the Python function json.decoder.errmsg to raise a nice - looking ValueError exception */ - static PyObject *errmsg_fn = NULL; - PyObject *pymsg; - if (errmsg_fn == NULL) { + /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */ + static PyObject *JSONDecodeError = NULL; + PyObject *exc; + if (JSONDecodeError == NULL) { PyObject *decoder = PyImport_ImportModule("json.decoder"); if (decoder == NULL) return; - errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); + JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); Py_DECREF(decoder); - if (errmsg_fn == NULL) + if (JSONDecodeError == NULL) return; } - pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); - if (pymsg) { - PyErr_SetObject(PyExc_ValueError, pymsg); - Py_DECREF(pymsg); + exc = PyObject_CallFunction(JSONDecodeError, "(zOn)", msg, s, end); + if (exc) { + PyErr_SetObject(JSONDecodeError, exc); + Py_DECREF(exc); } }