Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(162353)

Delta Between Two Patch Sets: Lib/sre_parse.py

Issue 12759: "(?P=)" input for Tools/scripts/redemo.py raises unnhandled exception
Left Patch Set: Created 8 years ago
Right Patch Set: Created 7 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | Lib/test/test_re.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 # 1 #
2 # Secret Labs' Regular Expression Engine 2 # Secret Labs' Regular Expression Engine
3 # 3 #
4 # convert re-style regular expression to sre pattern 4 # convert re-style regular expression to sre pattern
5 # 5 #
6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
7 # 7 #
8 # See the sre.py file for information on usage and redistribution. 8 # See the sre.py file for information on usage and redistribution.
9 # 9 #
10 10
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 elif op in UNITCODES: 170 elif op in UNITCODES:
171 lo = lo + 1 171 lo = lo + 1
172 hi = hi + 1 172 hi = hi + 1
173 elif op == SUCCESS: 173 elif op == SUCCESS:
174 break 174 break
175 self.width = int(min(lo, sys.maxsize)), int(min(hi, sys.maxsize)) 175 self.width = int(min(lo, sys.maxsize)), int(min(hi, sys.maxsize))
176 return self.width 176 return self.width
177 177
178 class Tokenizer: 178 class Tokenizer:
179 def __init__(self, string): 179 def __init__(self, string):
180 self.istext = isinstance(string, str)
180 self.string = string 181 self.string = string
181 self.index = 0 182 self.index = 0
182 self.__next() 183 self.__next()
183 def __next(self): 184 def __next(self):
184 if self.index >= len(self.string): 185 if self.index >= len(self.string):
185 self.next = None 186 self.next = None
186 return 187 return
187 char = self.string[self.index:self.index+1] 188 char = self.string[self.index:self.index+1]
188 # Special case for the str8, since indexing returns a integer 189 # Special case for the str8, since indexing returns a integer
189 # XXX This is only needed for test_bug_926075 in test_re.py 190 # XXX This is only needed for test_bug_926075 in test_re.py
190 if char and isinstance(char, bytes): 191 if char and not self.istext:
191 char = chr(char[0]) 192 char = chr(char[0])
192 if char == "\\": 193 if char == "\\":
193 try: 194 try:
194 c = self.string[self.index + 1] 195 c = self.string[self.index + 1]
195 except IndexError: 196 except IndexError:
196 raise error("bogus escape (end of line)") 197 raise error("bogus escape (end of line)")
197 if isinstance(self.string, bytes): 198 if not self.istext:
198 c = chr(c) 199 c = chr(c)
199 char = char + c 200 char = char + c
200 self.index = self.index + len(char) 201 self.index = self.index + len(char)
201 self.next = char 202 self.next = char
202 def match(self, char, skip=1): 203 def match(self, char, skip=1):
203 if char == self.next: 204 if char == self.next:
204 if skip: 205 if skip:
205 self.__next() 206 self.__next()
206 return 1 207 return 1
207 return 0 208 return 0
208 def get(self): 209 def get(self):
209 this = self.next 210 this = self.next
210 self.__next() 211 self.__next()
211 return this 212 return this
213 def getwhile(self, n, charset):
214 result = ''
215 for _ in range(n):
216 c = self.next
217 if c not in charset:
218 break
219 result += c
220 self.__next()
221 return result
212 def tell(self): 222 def tell(self):
213 return self.index, self.next 223 return self.index, self.next
214 def seek(self, index): 224 def seek(self, index):
215 self.index, self.next = index 225 self.index, self.next = index
216 226
217 def isident(char): 227 def isident(char):
218 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" 228 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
219 229
220 def isdigit(char): 230 def isdigit(char):
221 return "0" <= char <= "9" 231 return "0" <= char <= "9"
222 232
223 def isname(name): 233 def isname(name):
224 # check that group name is a valid string 234 # check that group name is a valid string
225 if name == "":
226 return False
227 if not isident(name[0]): 235 if not isident(name[0]):
228 return False 236 return False
229 for char in name[1:]: 237 for char in name[1:]:
230 if not isident(char) and not isdigit(char): 238 if not isident(char) and not isdigit(char):
231 return False 239 return False
232 return True 240 return True
233 241
234 def _class_escape(source, escape): 242 def _class_escape(source, escape):
235 # handle escape code inside character class 243 # handle escape code inside character class
236 code = ESCAPES.get(escape) 244 code = ESCAPES.get(escape)
237 if code: 245 if code:
238 return code 246 return code
239 code = CATEGORIES.get(escape) 247 code = CATEGORIES.get(escape)
240 if code: 248 if code:
241 return code 249 return code
242 try: 250 try:
243 c = escape[1:2] 251 c = escape[1:2]
244 if c == "x": 252 if c == "x":
245 # hexadecimal escape (exactly two digits) 253 # hexadecimal escape (exactly two digits)
246 while source.next in HEXDIGITS and len(escape) < 4: 254 escape += source.getwhile(2, HEXDIGITS)
247 escape = escape + source.get() 255 if len(escape) != 4:
248 escape = escape[2:] 256 raise ValueError
249 if len(escape) != 2: 257 return LITERAL, int(escape[2:], 16) & 0xff
250 raise error("bogus escape: %s" % repr("\\" + escape)) 258 elif c == "u" and source.istext:
251 return LITERAL, int(escape, 16) & 0xff 259 # unicode escape (exactly four digits)
260 escape += source.getwhile(4, HEXDIGITS)
261 if len(escape) != 6:
262 raise ValueError
263 return LITERAL, int(escape[2:], 16)
264 elif c == "U" and source.istext:
265 # unicode escape (exactly eight digits)
266 escape += source.getwhile(8, HEXDIGITS)
267 if len(escape) != 10:
268 raise ValueError
269 c = int(escape[2:], 16)
270 chr(c) # raise ValueError for invalid code
271 return LITERAL, c
252 elif c in OCTDIGITS: 272 elif c in OCTDIGITS:
253 # octal escape (up to three digits) 273 # octal escape (up to three digits)
254 while source.next in OCTDIGITS and len(escape) < 4: 274 escape += source.getwhile(2, OCTDIGITS)
255 escape = escape + source.get() 275 return LITERAL, int(escape[1:], 8) & 0xff
256 escape = escape[1:]
257 return LITERAL, int(escape, 8) & 0xff
258 elif c in DIGITS: 276 elif c in DIGITS:
259 raise error("bogus escape: %s" % repr(escape)) 277 raise ValueError
260 if len(escape) == 2: 278 if len(escape) == 2:
261 return LITERAL, ord(escape[1]) 279 return LITERAL, ord(escape[1])
262 except ValueError: 280 except ValueError:
263 pass 281 pass
264 raise error("bogus escape: %s" % repr(escape)) 282 raise error("bogus escape: %s" % repr(escape))
265 283
266 def _escape(source, escape, state): 284 def _escape(source, escape, state):
267 # handle escape code in expression 285 # handle escape code in expression
268 code = CATEGORIES.get(escape) 286 code = CATEGORIES.get(escape)
269 if code: 287 if code:
270 return code 288 return code
271 code = ESCAPES.get(escape) 289 code = ESCAPES.get(escape)
272 if code: 290 if code:
273 return code 291 return code
274 try: 292 try:
275 c = escape[1:2] 293 c = escape[1:2]
276 if c == "x": 294 if c == "x":
277 # hexadecimal escape 295 # hexadecimal escape
278 while source.next in HEXDIGITS and len(escape) < 4: 296 escape += source.getwhile(2, HEXDIGITS)
279 escape = escape + source.get()
280 if len(escape) != 4: 297 if len(escape) != 4:
281 raise ValueError 298 raise ValueError
282 return LITERAL, int(escape[2:], 16) & 0xff 299 return LITERAL, int(escape[2:], 16) & 0xff
300 elif c == "u" and source.istext:
301 # unicode escape (exactly four digits)
302 escape += source.getwhile(4, HEXDIGITS)
303 if len(escape) != 6:
304 raise ValueError
305 return LITERAL, int(escape[2:], 16)
306 elif c == "U" and source.istext:
307 # unicode escape (exactly eight digits)
308 escape += source.getwhile(8, HEXDIGITS)
309 if len(escape) != 10:
310 raise ValueError
311 c = int(escape[2:], 16)
312 chr(c) # raise ValueError for invalid code
313 return LITERAL, c
283 elif c == "0": 314 elif c == "0":
284 # octal escape 315 # octal escape
285 while source.next in OCTDIGITS and len(escape) < 4: 316 escape += source.getwhile(2, OCTDIGITS)
286 escape = escape + source.get()
287 return LITERAL, int(escape[1:], 8) & 0xff 317 return LITERAL, int(escape[1:], 8) & 0xff
288 elif c in DIGITS: 318 elif c in DIGITS:
289 # octal escape *or* decimal group reference (sigh) 319 # octal escape *or* decimal group reference (sigh)
290 if source.next in DIGITS: 320 if source.next in DIGITS:
291 escape = escape + source.get() 321 escape = escape + source.get()
292 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and 322 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
293 source.next in OCTDIGITS): 323 source.next in OCTDIGITS):
294 # got three octal digits; this is an octal escape 324 # got three octal digits; this is an octal escape
295 escape = escape + source.get() 325 escape = escape + source.get()
296 return LITERAL, int(escape[1:], 8) & 0xff 326 return LITERAL, int(escape[1:], 8) & 0xff
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
543 # named group: skip forward to end of name 573 # named group: skip forward to end of name
544 name = "" 574 name = ""
545 while 1: 575 while 1:
546 char = sourceget() 576 char = sourceget()
547 if char is None: 577 if char is None:
548 raise error("unterminated name") 578 raise error("unterminated name")
549 if char == ">": 579 if char == ">":
550 break 580 break
551 name = name + char 581 name = name + char
552 group = 1 582 group = 1
583 if not name:
584 raise error("bad group name")
553 if not isname(name): 585 if not isname(name):
554 raise error("bad character in group name") 586 raise error("bad character in group name")
555 elif sourcematch("="): 587 elif sourcematch("="):
556 # named backreference 588 # named backreference
557 name = "" 589 name = ""
558 while 1: 590 while 1:
559 char = sourceget() 591 char = sourceget()
560 if char is None: 592 if char is None:
561 raise error("unterminated name") 593 raise error("unterminated name")
562 if char == ")": 594 if char == ")":
563 break 595 break
564 name = name + char 596 name = name + char
597 if not name:
598 raise error("bad group name")
565 if not isname(name): 599 if not isname(name):
566 raise error("bad character in group name") 600 raise error("bad character in group name")
567 gid = state.groupdict.get(name) 601 gid = state.groupdict.get(name)
568 if gid is None: 602 if gid is None:
569 raise error("unknown group name") 603 raise error("unknown group name")
570 subpatternappend((GROUPREF, gid)) 604 subpatternappend((GROUPREF, gid))
571 continue 605 continue
572 else: 606 else:
573 char = sourceget() 607 char = sourceget()
574 if char is None: 608 if char is None:
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
607 # conditional backreference group 641 # conditional backreference group
608 condname = "" 642 condname = ""
609 while 1: 643 while 1:
610 char = sourceget() 644 char = sourceget()
611 if char is None: 645 if char is None:
612 raise error("unterminated name") 646 raise error("unterminated name")
613 if char == ")": 647 if char == ")":
614 break 648 break
615 condname = condname + char 649 condname = condname + char
616 group = 2 650 group = 2
651 if not condname:
652 raise error("bad group name")
617 if isname(condname): 653 if isname(condname):
618 condgroup = state.groupdict.get(condname) 654 condgroup = state.groupdict.get(condname)
619 if condgroup is None: 655 if condgroup is None:
620 raise error("unknown group name") 656 raise error("unknown group name")
621 else: 657 else:
622 try: 658 try:
623 condgroup = int(condname) 659 condgroup = int(condname)
624 except ValueError: 660 except ValueError:
625 raise error("bad character in group name") 661 raise error("bad character in group name")
626 else: 662 else:
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
809 groups, literals = template 845 groups, literals = template
810 literals = literals[:] 846 literals = literals[:]
811 try: 847 try:
812 for index, group in groups: 848 for index, group in groups:
813 literals[index] = s = g(group) 849 literals[index] = s = g(group)
814 if s is None: 850 if s is None:
815 raise error("unmatched group") 851 raise error("unmatched group")
816 except IndexError: 852 except IndexError:
817 raise error("invalid group reference") 853 raise error("invalid group reference")
818 return sep.join(literals) 854 return sep.join(literals)
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+