Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(119233)

Side by Side Diff: Lib/json/decoder.py

Issue 19361: Specialize exceptions thrown by JSON parser
Patch Set: Created 4 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Doc/library/json.rst ('k') | Lib/json/__init__.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Implementation of JSONDecoder 1 """Implementation of JSONDecoder
2 """ 2 """
3 import re 3 import re
4 4
5 from json import scanner 5 from json import scanner
6 try: 6 try:
7 from _json import scanstring as c_scanstring 7 from _json import scanstring as c_scanstring
8 except ImportError: 8 except ImportError:
9 c_scanstring = None 9 c_scanstring = None
10 10
11 __all__ = ['JSONDecoder'] 11 __all__ = ['JSONDecoder', 'JSONDecodeError']
12 12
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14 14
15 NaN = float('nan') 15 NaN = float('nan')
16 PosInf = float('inf') 16 PosInf = float('inf')
17 NegInf = float('-inf') 17 NegInf = float('-inf')
18 18
19 19
20 def linecol(doc, pos): 20 class JSONDecodeError(ValueError):
21 if isinstance(doc, bytes): 21 """Subclass of ValueError with the following additional properties:
22 newline = b'\n'
23 else:
24 newline = '\n'
25 lineno = doc.count(newline, 0, pos) + 1
26 if lineno == 1:
27 colno = pos + 1
28 else:
29 colno = pos - doc.rindex(newline, 0, pos)
30 return lineno, colno
31 22
23 msg: The unformatted error message
24 doc: The JSON document being parsed
25 pos: The start index of doc where parsing failed
26 lineno: The line corresponding to pos
27 colno: The column corresponding to pos
32 28
33 def errmsg(msg, doc, pos, end=None): 29 """
34 # Note that this function is called from _json 30 # Note that this exception is used from _json
35 lineno, colno = linecol(doc, pos) 31 def __init__(self, msg, doc, pos):
36 if end is None: 32 lineno = doc.count('\n', 0, pos) + 1
37 fmt = '{0}: line {1} column {2} (char {3})' 33 colno = pos - doc.rfind('\n', 0, pos)
38 return fmt.format(msg, lineno, colno, pos) 34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
39 #fmt = '%s: line %d column %d (char %d)' 35 ValueError.__init__(self, errmsg)
40 #return fmt % (msg, lineno, colno, pos) 36 self.msg = msg
41 endlineno, endcolno = linecol(doc, end) 37 self.doc = doc
42 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 38 self.pos = pos
43 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 39 self.lineno = lineno
44 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 40 self.colno = colno
45 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 41
42 def __reduce__(self):
43 return self.__class__, (self.msg, self.doc, self.pos)
46 44
47 45
48 _CONSTANTS = { 46 _CONSTANTS = {
49 '-Infinity': NegInf, 47 '-Infinity': NegInf,
50 'Infinity': PosInf, 48 'Infinity': PosInf,
51 'NaN': NaN, 49 'NaN': NaN,
52 } 50 }
53 51
54 52
55 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 53 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
56 BACKSLASH = { 54 BACKSLASH = {
57 '"': '"', '\\': '\\', '/': '/', 55 '"': '"', '\\': '\\', '/': '/',
58 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
59 } 57 }
60 58
61 def _decode_uXXXX(s, pos): 59 def _decode_uXXXX(s, pos):
62 esc = s[pos + 1:pos + 5] 60 esc = s[pos + 1:pos + 5]
63 if len(esc) == 4 and esc[1] not in 'xX': 61 if len(esc) == 4 and esc[1] not in 'xX':
64 try: 62 try:
65 return int(esc, 16) 63 return int(esc, 16)
66 except ValueError: 64 except ValueError:
67 pass 65 pass
68 msg = "Invalid \\uXXXX escape" 66 msg = "Invalid \\uXXXX escape"
69 raise ValueError(errmsg(msg, s, pos)) 67 raise JSONDecodeError(msg, s, pos)
70 68
71 def py_scanstring(s, end, strict=True, 69 def py_scanstring(s, end, strict=True,
72 _b=BACKSLASH, _m=STRINGCHUNK.match): 70 _b=BACKSLASH, _m=STRINGCHUNK.match):
73 """Scan the string s for a JSON string. End is the index of the 71 """Scan the string s for a JSON string. End is the index of the
74 character in s after the quote that started the JSON string. 72 character in s after the quote that started the JSON string.
75 Unescapes all valid JSON string escape sequences and raises ValueError 73 Unescapes all valid JSON string escape sequences and raises ValueError
76 on attempt to decode an invalid string. If strict is False then literal 74 on attempt to decode an invalid string. If strict is False then literal
77 control characters are allowed in the string. 75 control characters are allowed in the string.
78 76
79 Returns a tuple of the decoded string and the index of the character in s 77 Returns a tuple of the decoded string and the index of the character in s
80 after the end quote.""" 78 after the end quote."""
81 chunks = [] 79 chunks = []
82 _append = chunks.append 80 _append = chunks.append
83 begin = end - 1 81 begin = end - 1
84 while 1: 82 while 1:
85 chunk = _m(s, end) 83 chunk = _m(s, end)
86 if chunk is None: 84 if chunk is None:
87 raise ValueError( 85 raise JSONDecodeError("Unterminated string starting at", s, begin)
88 errmsg("Unterminated string starting at", s, begin))
89 end = chunk.end() 86 end = chunk.end()
90 content, terminator = chunk.groups() 87 content, terminator = chunk.groups()
91 # Content is contains zero or more unescaped string characters 88 # Content is contains zero or more unescaped string characters
92 if content: 89 if content:
93 _append(content) 90 _append(content)
94 # Terminator is the end of string, a literal control character, 91 # Terminator is the end of string, a literal control character,
95 # or a backslash denoting that an escape sequence follows 92 # or a backslash denoting that an escape sequence follows
96 if terminator == '"': 93 if terminator == '"':
97 break 94 break
98 elif terminator != '\\': 95 elif terminator != '\\':
99 if strict: 96 if strict:
100 #msg = "Invalid control character %r at" % (terminator,) 97 #msg = "Invalid control character %r at" % (terminator,)
101 msg = "Invalid control character {0!r} at".format(terminator) 98 msg = "Invalid control character {0!r} at".format(terminator)
102 raise ValueError(errmsg(msg, s, end)) 99 raise JSONDecodeError(msg, s, end)
103 else: 100 else:
104 _append(terminator) 101 _append(terminator)
105 continue 102 continue
106 try: 103 try:
107 esc = s[end] 104 esc = s[end]
108 except IndexError: 105 except IndexError:
109 raise ValueError( 106 raise JSONDecodeError("Unterminated string starting at", s, begin)
110 errmsg("Unterminated string starting at", s, begin))
111 # If not a unicode escape sequence, must be in the lookup table 107 # If not a unicode escape sequence, must be in the lookup table
112 if esc != 'u': 108 if esc != 'u':
113 try: 109 try:
114 char = _b[esc] 110 char = _b[esc]
115 except KeyError: 111 except KeyError:
116 msg = "Invalid \\escape: {0!r}".format(esc) 112 msg = "Invalid \\escape: {0!r}".format(esc)
117 raise ValueError(errmsg(msg, s, end)) 113 raise JSONDecodeError(msg, s, end)
118 end += 1 114 end += 1
119 else: 115 else:
120 uni = _decode_uXXXX(s, end) 116 uni = _decode_uXXXX(s, end)
121 end += 5 117 end += 5
122 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 118 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
123 uni2 = _decode_uXXXX(s, end + 1) 119 uni2 = _decode_uXXXX(s, end + 1)
124 if 0xdc00 <= uni2 <= 0xdfff: 120 if 0xdc00 <= uni2 <= 0xdfff:
125 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 121 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
126 end += 6 122 end += 6
127 char = chr(uni) 123 char = chr(uni)
(...skipping 28 matching lines...) Expand all
156 # Trivial empty object 152 # Trivial empty object
157 if nextchar == '}': 153 if nextchar == '}':
158 if object_pairs_hook is not None: 154 if object_pairs_hook is not None:
159 result = object_pairs_hook(pairs) 155 result = object_pairs_hook(pairs)
160 return result, end + 1 156 return result, end + 1
161 pairs = {} 157 pairs = {}
162 if object_hook is not None: 158 if object_hook is not None:
163 pairs = object_hook(pairs) 159 pairs = object_hook(pairs)
164 return pairs, end + 1 160 return pairs, end + 1
165 elif nextchar != '"': 161 elif nextchar != '"':
166 raise ValueError(errmsg( 162 raise JSONDecodeError(
167 "Expecting property name enclosed in double quotes", s, end)) 163 "Expecting property name enclosed in double quotes", s, end)
168 end += 1 164 end += 1
169 while True: 165 while True:
170 key, end = scanstring(s, end, strict) 166 key, end = scanstring(s, end, strict)
171 key = memo_get(key, key) 167 key = memo_get(key, key)
172 # To skip some function call overhead we optimize the fast paths where 168 # To skip some function call overhead we optimize the fast paths where
173 # the JSON key separator is ": " or just ":". 169 # the JSON key separator is ": " or just ":".
174 if s[end:end + 1] != ':': 170 if s[end:end + 1] != ':':
175 end = _w(s, end).end() 171 end = _w(s, end).end()
176 if s[end:end + 1] != ':': 172 if s[end:end + 1] != ':':
177 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 173 raise JSONDecodeError("Expecting ':' delimiter", s, end)
178 end += 1 174 end += 1
179 175
180 try: 176 try:
181 if s[end] in _ws: 177 if s[end] in _ws:
182 end += 1 178 end += 1
183 if s[end] in _ws: 179 if s[end] in _ws:
184 end = _w(s, end + 1).end() 180 end = _w(s, end + 1).end()
185 except IndexError: 181 except IndexError:
186 pass 182 pass
187 183
188 try: 184 try:
189 value, end = scan_once(s, end) 185 value, end = scan_once(s, end)
190 except StopIteration as err: 186 except StopIteration as err:
191 raise ValueError(errmsg("Expecting value", s, err.value)) from None 187 raise JSONDecodeError("Expecting value", s, err.value) from None
192 pairs_append((key, value)) 188 pairs_append((key, value))
193 try: 189 try:
194 nextchar = s[end] 190 nextchar = s[end]
195 if nextchar in _ws: 191 if nextchar in _ws:
196 end = _w(s, end + 1).end() 192 end = _w(s, end + 1).end()
197 nextchar = s[end] 193 nextchar = s[end]
198 except IndexError: 194 except IndexError:
199 nextchar = '' 195 nextchar = ''
200 end += 1 196 end += 1
201 197
202 if nextchar == '}': 198 if nextchar == '}':
203 break 199 break
204 elif nextchar != ',': 200 elif nextchar != ',':
205 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 201 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
206 end = _w(s, end).end() 202 end = _w(s, end).end()
207 nextchar = s[end:end + 1] 203 nextchar = s[end:end + 1]
208 end += 1 204 end += 1
209 if nextchar != '"': 205 if nextchar != '"':
210 raise ValueError(errmsg( 206 raise JSONDecodeError(
211 "Expecting property name enclosed in double quotes", s, end - 1) ) 207 "Expecting property name enclosed in double quotes", s, end - 1)
212 if object_pairs_hook is not None: 208 if object_pairs_hook is not None:
213 result = object_pairs_hook(pairs) 209 result = object_pairs_hook(pairs)
214 return result, end 210 return result, end
215 pairs = dict(pairs) 211 pairs = dict(pairs)
216 if object_hook is not None: 212 if object_hook is not None:
217 pairs = object_hook(pairs) 213 pairs = object_hook(pairs)
218 return pairs, end 214 return pairs, end
219 215
220 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 216 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
221 s, end = s_and_end 217 s, end = s_and_end
222 values = [] 218 values = []
223 nextchar = s[end:end + 1] 219 nextchar = s[end:end + 1]
224 if nextchar in _ws: 220 if nextchar in _ws:
225 end = _w(s, end + 1).end() 221 end = _w(s, end + 1).end()
226 nextchar = s[end:end + 1] 222 nextchar = s[end:end + 1]
227 # Look-ahead for trivial empty array 223 # Look-ahead for trivial empty array
228 if nextchar == ']': 224 if nextchar == ']':
229 return values, end + 1 225 return values, end + 1
230 _append = values.append 226 _append = values.append
231 while True: 227 while True:
232 try: 228 try:
233 value, end = scan_once(s, end) 229 value, end = scan_once(s, end)
234 except StopIteration as err: 230 except StopIteration as err:
235 raise ValueError(errmsg("Expecting value", s, err.value)) from None 231 raise JSONDecodeError("Expecting value", s, err.value) from None
236 _append(value) 232 _append(value)
237 nextchar = s[end:end + 1] 233 nextchar = s[end:end + 1]
238 if nextchar in _ws: 234 if nextchar in _ws:
239 end = _w(s, end + 1).end() 235 end = _w(s, end + 1).end()
240 nextchar = s[end:end + 1] 236 nextchar = s[end:end + 1]
241 end += 1 237 end += 1
242 if nextchar == ']': 238 if nextchar == ']':
243 break 239 break
244 elif nextchar != ',': 240 elif nextchar != ',':
245 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 241 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
246 try: 242 try:
247 if s[end] in _ws: 243 if s[end] in _ws:
248 end += 1 244 end += 1
249 if s[end] in _ws: 245 if s[end] in _ws:
250 end = _w(s, end + 1).end() 246 end = _w(s, end + 1).end()
251 except IndexError: 247 except IndexError:
252 pass 248 pass
253 249
254 return values, end 250 return values, end
255 251
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
336 332
337 333
338 def decode(self, s, _w=WHITESPACE.match): 334 def decode(self, s, _w=WHITESPACE.match):
339 """Return the Python representation of ``s`` (a ``str`` instance 335 """Return the Python representation of ``s`` (a ``str`` instance
340 containing a JSON document). 336 containing a JSON document).
341 337
342 """ 338 """
343 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
344 end = _w(s, end).end() 340 end = _w(s, end).end()
345 if end != len(s): 341 if end != len(s):
346 raise ValueError(errmsg("Extra data", s, end, len(s))) 342 raise JSONDecodeError("Extra data", s, end)
347 return obj 343 return obj
348 344
349 def raw_decode(self, s, idx=0): 345 def raw_decode(self, s, idx=0):
350 """Decode a JSON document from ``s`` (a ``str`` beginning with 346 """Decode a JSON document from ``s`` (a ``str`` beginning with
351 a JSON document) and return a 2-tuple of the Python 347 a JSON document) and return a 2-tuple of the Python
352 representation and the index in ``s`` where the document ended. 348 representation and the index in ``s`` where the document ended.
353 349
354 This can be used to decode a JSON document from a string that may 350 This can be used to decode a JSON document from a string that may
355 have extraneous data at the end. 351 have extraneous data at the end.
356 352
357 """ 353 """
358 try: 354 try:
359 obj, end = self.scan_once(s, idx) 355 obj, end = self.scan_once(s, idx)
360 except StopIteration as err: 356 except StopIteration as err:
361 raise ValueError(errmsg("Expecting value", s, err.value)) from None 357 raise JSONDecodeError("Expecting value", s, err.value) from None
362 return obj, end 358 return obj, end
OLDNEW
« no previous file with comments | « Doc/library/json.rst ('k') | Lib/json/__init__.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+