Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(48647)

Side by Side Diff: Lib/json/decoder.py

Issue 19361: Specialize exceptions thrown by JSON parser
Patch Set: Created 5 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Doc/library/json.rst ('k') | Lib/json/__init__.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Implementation of JSONDecoder 1 """Implementation of JSONDecoder
2 """ 2 """
3 import re 3 import re
4 4
5 from json import scanner 5 from json import scanner
6 try: 6 try:
7 from _json import scanstring as c_scanstring 7 from _json import scanstring as c_scanstring
8 except ImportError: 8 except ImportError:
9 c_scanstring = None 9 c_scanstring = None
10 10
11 __all__ = ['JSONDecoder'] 11 __all__ = ['JSONDecoder']
12 12
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14 14
15 NaN = float('nan') 15 NaN = float('nan')
16 PosInf = float('inf') 16 PosInf = float('inf')
17 NegInf = float('-inf') 17 NegInf = float('-inf')
18
19
20 class JSONDecodeError(ValueError):
21 """Subclass of ValueError with the following additional properties:
22
23 msg: The unformatted error message
24 doc: The JSON document being parsed
25 pos: The start index of doc where parsing failed
26 end: The end index of doc where parsing failed (may be None)
27 lineno: The line corresponding to pos
28 colno: The column corresponding to pos
29 endlineno: The line corresponding to end (may be None)
30 endcolno: The column corresponding to end (may be None)
31
32 """
33 # Note that this exception is used from _json
34 def __init__(self, msg, doc, pos, end=None):
35 ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
36 self.msg = msg
37 self.doc = doc
38 self.pos = pos
39 self.end = end
40 self.lineno, self.colno = linecol(doc, pos)
41 if end is not None:
42 self.endlineno, self.endcolno = linecol(doc, end)
43 else:
44 self.endlineno, self.endcolno = None, None
45
46 def __reduce__(self):
47 return self.__class__, (self.msg, self.doc, self.pos, self.end)
18 48
19 49
20 def linecol(doc, pos): 50 def linecol(doc, pos):
21 if isinstance(doc, bytes): 51 if isinstance(doc, bytes):
22 newline = b'\n' 52 newline = b'\n'
23 else: 53 else:
24 newline = '\n' 54 newline = '\n'
25 lineno = doc.count(newline, 0, pos) + 1 55 lineno = doc.count(newline, 0, pos) + 1
26 if lineno == 1: 56 if lineno == 1:
27 colno = pos + 1 57 colno = pos + 1
28 else: 58 else:
29 colno = pos - doc.rindex(newline, 0, pos) 59 colno = pos - doc.rindex(newline, 0, pos)
30 return lineno, colno 60 return lineno, colno
31 61
32 62
33 def errmsg(msg, doc, pos, end=None): 63 def errmsg(msg, doc, pos, end=None):
34 # Note that this function is called from _json
35 lineno, colno = linecol(doc, pos) 64 lineno, colno = linecol(doc, pos)
36 if end is None: 65 if end is None:
37 fmt = '{0}: line {1} column {2} (char {3})' 66 fmt = '{0}: line {1} column {2} (char {3})'
38 return fmt.format(msg, lineno, colno, pos) 67 return fmt.format(msg, lineno, colno, pos)
39 #fmt = '%s: line %d column %d (char %d)' 68 #fmt = '%s: line %d column %d (char %d)'
40 #return fmt % (msg, lineno, colno, pos) 69 #return fmt % (msg, lineno, colno, pos)
41 endlineno, endcolno = linecol(doc, end) 70 endlineno, endcolno = linecol(doc, end)
42 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 71 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
43 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 72 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
44 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 73 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
(...skipping 14 matching lines...) Expand all
59 } 88 }
60 89
61 def _decode_uXXXX(s, pos): 90 def _decode_uXXXX(s, pos):
62 esc = s[pos + 1:pos + 5] 91 esc = s[pos + 1:pos + 5]
63 if len(esc) == 4 and esc[1] not in 'xX': 92 if len(esc) == 4 and esc[1] not in 'xX':
64 try: 93 try:
65 return int(esc, 16) 94 return int(esc, 16)
66 except ValueError: 95 except ValueError:
67 pass 96 pass
68 msg = "Invalid \\uXXXX escape" 97 msg = "Invalid \\uXXXX escape"
69 raise ValueError(errmsg(msg, s, pos)) 98 raise JSONDecodeError(msg, s, pos)
70 99
71 def py_scanstring(s, end, strict=True, 100 def py_scanstring(s, end, strict=True,
72 _b=BACKSLASH, _m=STRINGCHUNK.match): 101 _b=BACKSLASH, _m=STRINGCHUNK.match):
73 """Scan the string s for a JSON string. End is the index of the 102 """Scan the string s for a JSON string. End is the index of the
74 character in s after the quote that started the JSON string. 103 character in s after the quote that started the JSON string.
75 Unescapes all valid JSON string escape sequences and raises ValueError 104 Unescapes all valid JSON string escape sequences and raises ValueError
76 on attempt to decode an invalid string. If strict is False then literal 105 on attempt to decode an invalid string. If strict is False then literal
77 control characters are allowed in the string. 106 control characters are allowed in the string.
78 107
79 Returns a tuple of the decoded string and the index of the character in s 108 Returns a tuple of the decoded string and the index of the character in s
80 after the end quote.""" 109 after the end quote."""
81 chunks = [] 110 chunks = []
82 _append = chunks.append 111 _append = chunks.append
83 begin = end - 1 112 begin = end - 1
84 while 1: 113 while 1:
85 chunk = _m(s, end) 114 chunk = _m(s, end)
86 if chunk is None: 115 if chunk is None:
87 raise ValueError( 116 raise JSONDecodeError("Unterminated string starting at", s, begin)
88 errmsg("Unterminated string starting at", s, begin))
89 end = chunk.end() 117 end = chunk.end()
90 content, terminator = chunk.groups() 118 content, terminator = chunk.groups()
91 # Content is contains zero or more unescaped string characters 119 # Content is contains zero or more unescaped string characters
92 if content: 120 if content:
93 _append(content) 121 _append(content)
94 # Terminator is the end of string, a literal control character, 122 # Terminator is the end of string, a literal control character,
95 # or a backslash denoting that an escape sequence follows 123 # or a backslash denoting that an escape sequence follows
96 if terminator == '"': 124 if terminator == '"':
97 break 125 break
98 elif terminator != '\\': 126 elif terminator != '\\':
99 if strict: 127 if strict:
100 #msg = "Invalid control character %r at" % (terminator,) 128 #msg = "Invalid control character %r at" % (terminator,)
101 msg = "Invalid control character {0!r} at".format(terminator) 129 msg = "Invalid control character {0!r} at".format(terminator)
102 raise ValueError(errmsg(msg, s, end)) 130 raise JSONDecodeError(msg, s, end)
103 else: 131 else:
104 _append(terminator) 132 _append(terminator)
105 continue 133 continue
106 try: 134 try:
107 esc = s[end] 135 esc = s[end]
108 except IndexError: 136 except IndexError:
109 raise ValueError( 137 raise JSONDecodeError("Unterminated string starting at", s, begin)
110 errmsg("Unterminated string starting at", s, begin))
111 # If not a unicode escape sequence, must be in the lookup table 138 # If not a unicode escape sequence, must be in the lookup table
112 if esc != 'u': 139 if esc != 'u':
113 try: 140 try:
114 char = _b[esc] 141 char = _b[esc]
115 except KeyError: 142 except KeyError:
116 msg = "Invalid \\escape: {0!r}".format(esc) 143 msg = "Invalid \\escape: {0!r}".format(esc)
117 raise ValueError(errmsg(msg, s, end)) 144 raise JSONDecodeError(msg, s, end)
118 end += 1 145 end += 1
119 else: 146 else:
120 uni = _decode_uXXXX(s, end) 147 uni = _decode_uXXXX(s, end)
121 end += 5 148 end += 5
122 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 149 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
123 uni2 = _decode_uXXXX(s, end + 1) 150 uni2 = _decode_uXXXX(s, end + 1)
124 if 0xdc00 <= uni2 <= 0xdfff: 151 if 0xdc00 <= uni2 <= 0xdfff:
125 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 152 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
126 end += 6 153 end += 6
127 char = chr(uni) 154 char = chr(uni)
(...skipping 28 matching lines...) Expand all
156 # Trivial empty object 183 # Trivial empty object
157 if nextchar == '}': 184 if nextchar == '}':
158 if object_pairs_hook is not None: 185 if object_pairs_hook is not None:
159 result = object_pairs_hook(pairs) 186 result = object_pairs_hook(pairs)
160 return result, end + 1 187 return result, end + 1
161 pairs = {} 188 pairs = {}
162 if object_hook is not None: 189 if object_hook is not None:
163 pairs = object_hook(pairs) 190 pairs = object_hook(pairs)
164 return pairs, end + 1 191 return pairs, end + 1
165 elif nextchar != '"': 192 elif nextchar != '"':
166 raise ValueError(errmsg( 193 raise JSONDecodeError(
167 "Expecting property name enclosed in double quotes", s, end)) 194 "Expecting property name enclosed in double quotes", s, end)
168 end += 1 195 end += 1
169 while True: 196 while True:
170 key, end = scanstring(s, end, strict) 197 key, end = scanstring(s, end, strict)
171 key = memo_get(key, key) 198 key = memo_get(key, key)
172 # To skip some function call overhead we optimize the fast paths where 199 # To skip some function call overhead we optimize the fast paths where
173 # the JSON key separator is ": " or just ":". 200 # the JSON key separator is ": " or just ":".
174 if s[end:end + 1] != ':': 201 if s[end:end + 1] != ':':
175 end = _w(s, end).end() 202 end = _w(s, end).end()
176 if s[end:end + 1] != ':': 203 if s[end:end + 1] != ':':
177 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 204 raise JSONDecodeError("Expecting ':' delimiter", s, end)
178 end += 1 205 end += 1
179 206
180 try: 207 try:
181 if s[end] in _ws: 208 if s[end] in _ws:
182 end += 1 209 end += 1
183 if s[end] in _ws: 210 if s[end] in _ws:
184 end = _w(s, end + 1).end() 211 end = _w(s, end + 1).end()
185 except IndexError: 212 except IndexError:
186 pass 213 pass
187 214
188 try: 215 try:
189 value, end = scan_once(s, end) 216 value, end = scan_once(s, end)
190 except StopIteration as err: 217 except StopIteration as err:
191 raise ValueError(errmsg("Expecting value", s, err.value)) from None 218 raise JSONDecodeError("Expecting value", s, err.value) from None
192 pairs_append((key, value)) 219 pairs_append((key, value))
193 try: 220 try:
194 nextchar = s[end] 221 nextchar = s[end]
195 if nextchar in _ws: 222 if nextchar in _ws:
196 end = _w(s, end + 1).end() 223 end = _w(s, end + 1).end()
197 nextchar = s[end] 224 nextchar = s[end]
198 except IndexError: 225 except IndexError:
199 nextchar = '' 226 nextchar = ''
200 end += 1 227 end += 1
201 228
202 if nextchar == '}': 229 if nextchar == '}':
203 break 230 break
204 elif nextchar != ',': 231 elif nextchar != ',':
205 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 232 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
206 end = _w(s, end).end() 233 end = _w(s, end).end()
207 nextchar = s[end:end + 1] 234 nextchar = s[end:end + 1]
208 end += 1 235 end += 1
209 if nextchar != '"': 236 if nextchar != '"':
210 raise ValueError(errmsg( 237 raise JSONDecodeError(
211 "Expecting property name enclosed in double quotes", s, end - 1) ) 238 "Expecting property name enclosed in double quotes", s, end - 1)
212 if object_pairs_hook is not None: 239 if object_pairs_hook is not None:
213 result = object_pairs_hook(pairs) 240 result = object_pairs_hook(pairs)
214 return result, end 241 return result, end
215 pairs = dict(pairs) 242 pairs = dict(pairs)
216 if object_hook is not None: 243 if object_hook is not None:
217 pairs = object_hook(pairs) 244 pairs = object_hook(pairs)
218 return pairs, end 245 return pairs, end
219 246
220 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 247 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
221 s, end = s_and_end 248 s, end = s_and_end
222 values = [] 249 values = []
223 nextchar = s[end:end + 1] 250 nextchar = s[end:end + 1]
224 if nextchar in _ws: 251 if nextchar in _ws:
225 end = _w(s, end + 1).end() 252 end = _w(s, end + 1).end()
226 nextchar = s[end:end + 1] 253 nextchar = s[end:end + 1]
227 # Look-ahead for trivial empty array 254 # Look-ahead for trivial empty array
228 if nextchar == ']': 255 if nextchar == ']':
229 return values, end + 1 256 return values, end + 1
230 _append = values.append 257 _append = values.append
231 while True: 258 while True:
232 try: 259 try:
233 value, end = scan_once(s, end) 260 value, end = scan_once(s, end)
234 except StopIteration as err: 261 except StopIteration as err:
235 raise ValueError(errmsg("Expecting value", s, err.value)) from None 262 raise JSONDecodeError("Expecting value", s, err.value) from None
236 _append(value) 263 _append(value)
237 nextchar = s[end:end + 1] 264 nextchar = s[end:end + 1]
238 if nextchar in _ws: 265 if nextchar in _ws:
239 end = _w(s, end + 1).end() 266 end = _w(s, end + 1).end()
240 nextchar = s[end:end + 1] 267 nextchar = s[end:end + 1]
241 end += 1 268 end += 1
242 if nextchar == ']': 269 if nextchar == ']':
243 break 270 break
244 elif nextchar != ',': 271 elif nextchar != ',':
245 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 272 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
246 try: 273 try:
247 if s[end] in _ws: 274 if s[end] in _ws:
248 end += 1 275 end += 1
249 if s[end] in _ws: 276 if s[end] in _ws:
250 end = _w(s, end + 1).end() 277 end = _w(s, end + 1).end()
251 except IndexError: 278 except IndexError:
252 pass 279 pass
253 280
254 return values, end 281 return values, end
255 282
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
336 363
337 364
338 def decode(self, s, _w=WHITESPACE.match): 365 def decode(self, s, _w=WHITESPACE.match):
339 """Return the Python representation of ``s`` (a ``str`` instance 366 """Return the Python representation of ``s`` (a ``str`` instance
340 containing a JSON document). 367 containing a JSON document).
341 368
342 """ 369 """
343 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 370 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
344 end = _w(s, end).end() 371 end = _w(s, end).end()
345 if end != len(s): 372 if end != len(s):
346 raise ValueError(errmsg("Extra data", s, end, len(s))) 373 raise JSONDecodeError("Extra data", s, end, len(s))
347 return obj 374 return obj
348 375
349 def raw_decode(self, s, idx=0): 376 def raw_decode(self, s, idx=0):
350 """Decode a JSON document from ``s`` (a ``str`` beginning with 377 """Decode a JSON document from ``s`` (a ``str`` beginning with
351 a JSON document) and return a 2-tuple of the Python 378 a JSON document) and return a 2-tuple of the Python
352 representation and the index in ``s`` where the document ended. 379 representation and the index in ``s`` where the document ended.
353 380
354 This can be used to decode a JSON document from a string that may 381 This can be used to decode a JSON document from a string that may
355 have extraneous data at the end. 382 have extraneous data at the end.
356 383
357 """ 384 """
358 try: 385 try:
359 obj, end = self.scan_once(s, idx) 386 obj, end = self.scan_once(s, idx)
360 except StopIteration as err: 387 except StopIteration as err:
361 raise ValueError(errmsg("Expecting value", s, err.value)) from None 388 raise JSONDecodeError("Expecting value", s, err.value) from None
362 return obj, end 389 return obj, end
OLDNEW
« no previous file with comments | « Doc/library/json.rst ('k') | Lib/json/__init__.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+