Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(68280)

Side by Side Diff: Lib/sre_parse.py

Issue 22578: Add addition attributes to re.error
Patch Set: Created 5 years ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« Doc/library/re.rst ('K') | « Lib/sre_constants.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # 1 #
2 # Secret Labs' Regular Expression Engine 2 # Secret Labs' Regular Expression Engine
3 # 3 #
4 # convert re-style regular expression to sre pattern 4 # convert re-style regular expression to sre pattern
5 # 5 #
6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
7 # 7 #
8 # See the sre.py file for information on usage and redistribution. 8 # See the sre.py file for information on usage and redistribution.
9 # 9 #
10 10
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after
200 return 200 return
201 char = self.string[self.index:self.index+1] 201 char = self.string[self.index:self.index+1]
202 # Special case for the str8, since indexing returns a integer 202 # Special case for the str8, since indexing returns a integer
203 # XXX This is only needed for test_bug_926075 in test_re.py 203 # XXX This is only needed for test_bug_926075 in test_re.py
204 if char and not self.istext: 204 if char and not self.istext:
205 char = chr(char[0]) 205 char = chr(char[0])
206 if char == "\\": 206 if char == "\\":
207 try: 207 try:
208 c = self.string[self.index + 1] 208 c = self.string[self.index + 1]
209 except IndexError: 209 except IndexError:
210 raise error("bogus escape (end of line)") 210 self.next = None
211 raise self.error("bogus escape (end of line)", 0)
211 if not self.istext: 212 if not self.istext:
212 c = chr(c) 213 c = chr(c)
213 char = char + c 214 char = char + c
214 self.index = self.index + len(char) 215 self.index = self.index + len(char)
215 self.next = char 216 self.next = char
216 def match(self, char, skip=1): 217 def match(self, char, skip=1):
217 if char == self.next: 218 if char == self.next:
218 if skip: 219 if skip:
219 self.__next() 220 self.__next()
220 return 1 221 return 1
221 return 0 222 return 0
222 def get(self): 223 def get(self):
223 this = self.next 224 this = self.next
224 self.__next() 225 self.__next()
225 return this 226 return this
226 def getwhile(self, n, charset): 227 def getwhile(self, n, charset):
227 result = '' 228 result = ''
228 for _ in range(n): 229 for _ in range(n):
229 c = self.next 230 c = self.next
230 if c not in charset: 231 if c not in charset:
231 break 232 break
232 result += c 233 result += c
233 self.__next() 234 self.__next()
234 return result 235 return result
235 def tell(self): 236 def tell(self):
236 return self.index, self.next 237 return self.index - len(self.next or '')
237 def seek(self, index): 238 def seek(self, index):
238 self.index, self.next = index 239 self.index = index
240 self.__next()
241
242 def error(self, msg, offset):
243 return error(msg, self.string, self.tell() - offset)
239 244
240 # The following three functions are not used in this module anymore, but we keep 245 # The following three functions are not used in this module anymore, but we keep
241 # them here (with DeprecationWarnings) for backwards compatibility. 246 # them here (with DeprecationWarnings) for backwards compatibility.
242 247
243 def isident(char): 248 def isident(char):
244 import warnings 249 import warnings
245 warnings.warn('sre_parse.isident() will be removed in 3.5', 250 warnings.warn('sre_parse.isident() will be removed in 3.5',
246 DeprecationWarning, stacklevel=2) 251 DeprecationWarning, stacklevel=2)
ezio.melotti 2014/10/08 14:58:20 Unrelated, but is there an issue for this?
storchaka 2014/10/08 19:36:19 Not yet.
247 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" 252 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
248 253
249 def isdigit(char): 254 def isdigit(char):
250 import warnings 255 import warnings
251 warnings.warn('sre_parse.isdigit() will be removed in 3.5', 256 warnings.warn('sre_parse.isdigit() will be removed in 3.5',
252 DeprecationWarning, stacklevel=2) 257 DeprecationWarning, stacklevel=2)
253 return "0" <= char <= "9" 258 return "0" <= char <= "9"
254 259
255 def isname(name): 260 def isname(name):
256 import warnings 261 import warnings
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
292 if len(escape) != 10: 297 if len(escape) != 10:
293 raise ValueError 298 raise ValueError
294 c = int(escape[2:], 16) 299 c = int(escape[2:], 16)
295 chr(c) # raise ValueError for invalid code 300 chr(c) # raise ValueError for invalid code
296 return LITERAL, c 301 return LITERAL, c
297 elif c in OCTDIGITS: 302 elif c in OCTDIGITS:
298 # octal escape (up to three digits) 303 # octal escape (up to three digits)
299 escape += source.getwhile(2, OCTDIGITS) 304 escape += source.getwhile(2, OCTDIGITS)
300 c = int(escape[1:], 8) 305 c = int(escape[1:], 8)
301 if c > 0o377: 306 if c > 0o377:
302 raise error('octal escape value %r outside of ' 307 raise source.error('octal escape value %r outside of '
303 'range 0-0o377' % escape) 308 'range 0-0o377' % escape, len(escape))
304 return LITERAL, c 309 return LITERAL, c
305 elif c in DIGITS: 310 elif c in DIGITS:
306 raise ValueError 311 raise ValueError
307 if len(escape) == 2: 312 if len(escape) == 2:
308 return LITERAL, ord(escape[1]) 313 return LITERAL, ord(escape[1])
309 except ValueError: 314 except ValueError:
310 pass 315 pass
311 raise error("bogus escape: %s" % repr(escape)) 316 raise source.error("bogus escape: %s" % repr(escape), len(escape))
312 317
313 def _escape(source, escape, state): 318 def _escape(source, escape, state):
314 # handle escape code in expression 319 # handle escape code in expression
315 code = CATEGORIES.get(escape) 320 code = CATEGORIES.get(escape)
316 if code: 321 if code:
317 return code 322 return code
318 code = ESCAPES.get(escape) 323 code = ESCAPES.get(escape)
319 if code: 324 if code:
320 return code 325 return code
321 try: 326 try:
(...skipping 25 matching lines...) Expand all
347 elif c in DIGITS: 352 elif c in DIGITS:
348 # octal escape *or* decimal group reference (sigh) 353 # octal escape *or* decimal group reference (sigh)
349 if source.next in DIGITS: 354 if source.next in DIGITS:
350 escape = escape + source.get() 355 escape = escape + source.get()
351 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and 356 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
352 source.next in OCTDIGITS): 357 source.next in OCTDIGITS):
353 # got three octal digits; this is an octal escape 358 # got three octal digits; this is an octal escape
354 escape = escape + source.get() 359 escape = escape + source.get()
355 c = int(escape[1:], 8) 360 c = int(escape[1:], 8)
356 if c > 0o377: 361 if c > 0o377:
357 raise error('octal escape value %r outside of ' 362 raise source.error('octal escape value %r outside of '
358 'range 0-0o377' % escape) 363 'range 0-0o377' % escape,
364 len(escape))
359 return LITERAL, c 365 return LITERAL, c
360 # not an octal escape, so this is a group reference 366 # not an octal escape, so this is a group reference
361 group = int(escape[1:]) 367 group = int(escape[1:])
362 if group < state.groups: 368 if group < state.groups:
363 if not state.checkgroup(group): 369 if not state.checkgroup(group):
364 raise error("cannot refer to open group") 370 raise source.error("cannot refer to open group",
371 len(escape))
365 return GROUPREF, group 372 return GROUPREF, group
366 raise ValueError 373 raise ValueError
367 if len(escape) == 2: 374 if len(escape) == 2:
368 return LITERAL, ord(escape[1]) 375 return LITERAL, ord(escape[1])
369 except ValueError: 376 except ValueError:
370 pass 377 pass
371 raise error("bogus escape: %s" % repr(escape)) 378 raise source.error("bogus escape: %s" % repr(escape), len(escape))
372 379
373 def _parse_sub(source, state, nested=1): 380 def _parse_sub(source, state, nested=1):
374 # parse an alternation: a|b|c 381 # parse an alternation: a|b|c
375 382
376 items = [] 383 items = []
377 itemsappend = items.append 384 itemsappend = items.append
378 sourcematch = source.match 385 sourcematch = source.match
379 while 1: 386 while 1:
380 itemsappend(_parse(source, state)) 387 itemsappend(_parse(source, state))
381 if sourcematch("|"): 388 if sourcematch("|"):
382 continue 389 continue
383 if not nested: 390 if not nested:
384 break 391 break
385 if not source.next or sourcematch(")", 0): 392 if not source.next or sourcematch(")", 0):
386 break 393 break
387 else: 394 else:
388 raise error("pattern not properly closed") 395 raise source.error("pattern not properly closed", 0)
389 396
390 if len(items) == 1: 397 if len(items) == 1:
391 return items[0] 398 return items[0]
392 399
393 subpattern = SubPattern(state) 400 subpattern = SubPattern(state)
394 subpatternappend = subpattern.append 401 subpatternappend = subpattern.append
395 402
396 # check if all items share a common prefix 403 # check if all items share a common prefix
397 while 1: 404 while 1:
398 prefix = None 405 prefix = None
(...skipping 28 matching lines...) Expand all
427 return subpattern 434 return subpattern
428 435
429 subpattern.append((BRANCH, (None, items))) 436 subpattern.append((BRANCH, (None, items)))
430 return subpattern 437 return subpattern
431 438
432 def _parse_sub_cond(source, state, condgroup): 439 def _parse_sub_cond(source, state, condgroup):
433 item_yes = _parse(source, state) 440 item_yes = _parse(source, state)
434 if source.match("|"): 441 if source.match("|"):
435 item_no = _parse(source, state) 442 item_no = _parse(source, state)
436 if source.match("|"): 443 if source.match("|"):
437 raise error("conditional backref with more than two branches") 444 raise source.error("conditional backref with more than two branches" ,
445 1)
438 else: 446 else:
439 item_no = None 447 item_no = None
440 if source.next and not source.match(")", 0): 448 if source.next and not source.match(")", 0):
441 raise error("pattern not properly closed") 449 raise source.error("pattern not properly closed", 0)
442 subpattern = SubPattern(state) 450 subpattern = SubPattern(state)
443 subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) 451 subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
444 return subpattern 452 return subpattern
445 453
446 _PATTERNENDERS = set("|)") 454 _PATTERNENDERS = set("|)")
447 _ASSERTCHARS = set("=!<") 455 _ASSERTCHARS = set("=!<")
448 _LOOKBEHINDASSERTCHARS = set("=!") 456 _LOOKBEHINDASSERTCHARS = set("=!")
449 _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) 457 _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
450 458
451 def _parse(source, state): 459 def _parse(source, state):
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
496 start = set[:] 504 start = set[:]
497 while 1: 505 while 1:
498 this = sourceget() 506 this = sourceget()
499 if this == "]" and set != start: 507 if this == "]" and set != start:
500 break 508 break
501 elif this and this[0] == "\\": 509 elif this and this[0] == "\\":
502 code1 = _class_escape(source, this) 510 code1 = _class_escape(source, this)
503 elif this: 511 elif this:
504 code1 = LITERAL, ord(this) 512 code1 = LITERAL, ord(this)
505 else: 513 else:
506 raise error("unexpected end of regular expression") 514 raise source.error("unexpected end of regular expression", 0 )
507 if sourcematch("-"): 515 if sourcematch("-"):
508 # potential range 516 # potential range
509 this = sourceget() 517 this = sourceget()
510 if this == "]": 518 if this == "]":
511 if code1[0] is IN: 519 if code1[0] is IN:
512 code1 = code1[1][0] 520 code1 = code1[1][0]
513 setappend(code1) 521 setappend(code1)
514 setappend((LITERAL, ord("-"))) 522 setappend((LITERAL, ord("-")))
515 break 523 break
516 elif this: 524 elif this:
517 if this[0] == "\\": 525 if this[0] == "\\":
518 code2 = _class_escape(source, this) 526 code2 = _class_escape(source, this)
519 else: 527 else:
520 code2 = LITERAL, ord(this) 528 code2 = LITERAL, ord(this)
521 if code1[0] != LITERAL or code2[0] != LITERAL: 529 if code1[0] != LITERAL or code2[0] != LITERAL:
522 raise error("bad character range") 530 raise source.error("bad character range", len(this))
523 lo = code1[1] 531 lo = code1[1]
524 hi = code2[1] 532 hi = code2[1]
525 if hi < lo: 533 if hi < lo:
526 raise error("bad character range") 534 raise source.error("bad character range", len(this))
527 setappend((RANGE, (lo, hi))) 535 setappend((RANGE, (lo, hi)))
528 else: 536 else:
529 raise error("unexpected end of regular expression") 537 raise source.error("unexpected end of regular expression ", 0)
530 else: 538 else:
531 if code1[0] is IN: 539 if code1[0] is IN:
532 code1 = code1[1][0] 540 code1 = code1[1][0]
533 setappend(code1) 541 setappend(code1)
534 542
535 # XXX: <fl> should move set optimization to compiler! 543 # XXX: <fl> should move set optimization to compiler!
536 if _len(set)==1 and set[0][0] is LITERAL: 544 if _len(set)==1 and set[0][0] is LITERAL:
537 subpatternappend(set[0]) # optimization 545 subpatternappend(set[0]) # optimization
538 elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: 546 elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
539 subpatternappend((NOT_LITERAL, set[1][1])) # optimization 547 subpatternappend((NOT_LITERAL, set[1][1])) # optimization
540 else: 548 else:
541 # XXX: <fl> should add charmap optimization here 549 # XXX: <fl> should add charmap optimization here
542 subpatternappend((IN, set)) 550 subpatternappend((IN, set))
543 551
544 elif this and this[0] in REPEAT_CHARS: 552 elif this and this[0] in REPEAT_CHARS:
545 # repeat previous item 553 # repeat previous item
554 here = source.tell()
546 if this == "?": 555 if this == "?":
547 min, max = 0, 1 556 min, max = 0, 1
548 elif this == "*": 557 elif this == "*":
549 min, max = 0, MAXREPEAT 558 min, max = 0, MAXREPEAT
550 559
551 elif this == "+": 560 elif this == "+":
552 min, max = 1, MAXREPEAT 561 min, max = 1, MAXREPEAT
553 elif this == "{": 562 elif this == "{":
554 if source.next == "}": 563 if source.next == "}":
555 subpatternappend((LITERAL, ord(this))) 564 subpatternappend((LITERAL, ord(this)))
556 continue 565 continue
557 here = source.tell()
558 min, max = 0, MAXREPEAT 566 min, max = 0, MAXREPEAT
559 lo = hi = "" 567 lo = hi = ""
560 while source.next in DIGITS: 568 while source.next in DIGITS:
561 lo = lo + source.get() 569 lo = lo + source.get()
562 if sourcematch(","): 570 if sourcematch(","):
563 while source.next in DIGITS: 571 while source.next in DIGITS:
564 hi = hi + sourceget() 572 hi = hi + sourceget()
565 else: 573 else:
566 hi = lo 574 hi = lo
567 if not sourcematch("}"): 575 if not sourcematch("}"):
568 subpatternappend((LITERAL, ord(this))) 576 subpatternappend((LITERAL, ord(this)))
569 source.seek(here) 577 source.seek(here)
570 continue 578 continue
571 if lo: 579 if lo:
572 min = int(lo) 580 min = int(lo)
573 if min >= MAXREPEAT: 581 if min >= MAXREPEAT:
574 raise OverflowError("the repetition number is too large" ) 582 raise OverflowError("the repetition number is too large" )
575 if hi: 583 if hi:
576 max = int(hi) 584 max = int(hi)
577 if max >= MAXREPEAT: 585 if max >= MAXREPEAT:
578 raise OverflowError("the repetition number is too large" ) 586 raise OverflowError("the repetition number is too large" )
579 if max < min: 587 if max < min:
580 raise error("bad repeat interval") 588 raise source.error("bad repeat interval",
589 source.tell() - here)
581 else: 590 else:
582 raise error("not supported") 591 raise source.error("not supported", len(this))
583 # figure out which item to repeat 592 # figure out which item to repeat
584 if subpattern: 593 if subpattern:
585 item = subpattern[-1:] 594 item = subpattern[-1:]
586 else: 595 else:
587 item = None 596 item = None
588 if not item or (_len(item) == 1 and item[0][0] == AT): 597 if not item or (_len(item) == 1 and item[0][0] == AT):
589 raise error("nothing to repeat") 598 raise source.error("nothing to repeat",
599 source.tell() - here + len(this))
590 if item[0][0] in REPEATCODES: 600 if item[0][0] in REPEATCODES:
591 raise error("multiple repeat") 601 raise source.error("multiple repeat",
602 source.tell() - here + len(this))
592 if sourcematch("?"): 603 if sourcematch("?"):
593 subpattern[-1] = (MIN_REPEAT, (min, max, item)) 604 subpattern[-1] = (MIN_REPEAT, (min, max, item))
594 else: 605 else:
595 subpattern[-1] = (MAX_REPEAT, (min, max, item)) 606 subpattern[-1] = (MAX_REPEAT, (min, max, item))
596 607
597 elif this == ".": 608 elif this == ".":
598 subpatternappend((ANY, None)) 609 subpatternappend((ANY, None))
599 610
600 elif this == "(": 611 elif this == "(":
601 group = 1 612 group = 1
602 name = None 613 name = None
603 condgroup = None 614 condgroup = None
604 if sourcematch("?"): 615 if sourcematch("?"):
605 group = 0 616 group = 0
606 # options 617 # options
607 if sourcematch("P"): 618 if sourcematch("P"):
608 # python extensions 619 # python extensions
609 if sourcematch("<"): 620 if sourcematch("<"):
610 # named group: skip forward to end of name 621 # named group: skip forward to end of name
611 name = "" 622 name = ""
612 while 1: 623 while 1:
613 char = sourceget() 624 char = sourceget()
614 if char is None: 625 if char is None:
615 raise error("unterminated name") 626 raise source.error("unterminated name", 0)
616 if char == ">": 627 if char == ">":
617 break 628 break
618 name = name + char 629 name = name + char
619 group = 1 630 group = 1
620 if not name: 631 if not name:
621 raise error("missing group name") 632 raise source.error("missing group name", 1)
622 if not name.isidentifier(): 633 if not name.isidentifier():
623 raise error("bad character in group name %r" % name) 634 raise source.error("bad character in group name "
635 "%r" % name,
636 len(name) + 1)
624 elif sourcematch("="): 637 elif sourcematch("="):
625 # named backreference 638 # named backreference
626 name = "" 639 name = ""
627 while 1: 640 while 1:
628 char = sourceget() 641 char = sourceget()
629 if char is None: 642 if char is None:
630 raise error("unterminated name") 643 raise source.error("unterminated name", 0)
631 if char == ")": 644 if char == ")":
632 break 645 break
633 name = name + char 646 name = name + char
634 if not name: 647 if not name:
635 raise error("missing group name") 648 raise source.error("missing group name", 1)
636 if not name.isidentifier(): 649 if not name.isidentifier():
637 raise error("bad character in backref group name " 650 raise source.error("bad character in backref "
638 "%r" % name) 651 "group name %r" % name,
652 len(name) + 1)
639 gid = state.groupdict.get(name) 653 gid = state.groupdict.get(name)
640 if gid is None: 654 if gid is None:
641 msg = "unknown group name: {0!r}".format(name) 655 msg = "unknown group name: {0!r}".format(name)
642 raise error(msg) 656 raise source.error(msg, len(name) + 1)
643 subpatternappend((GROUPREF, gid)) 657 subpatternappend((GROUPREF, gid))
644 continue 658 continue
645 else: 659 else:
646 char = sourceget() 660 char = sourceget()
647 if char is None: 661 if char is None:
648 raise error("unexpected end of pattern") 662 raise source.error("unexpected end of pattern", 0)
649 raise error("unknown specifier: ?P%s" % char) 663 raise source.error("unknown specifier: ?P%s" % char,
664 len(char))
650 elif sourcematch(":"): 665 elif sourcematch(":"):
651 # non-capturing group 666 # non-capturing group
652 group = 2 667 group = 2
653 elif sourcematch("#"): 668 elif sourcematch("#"):
654 # comment 669 # comment
655 while 1: 670 while 1:
656 if source.next is None or source.next == ")": 671 if source.next is None or source.next == ")":
657 break 672 break
658 sourceget() 673 sourceget()
659 if not sourcematch(")"): 674 if not sourcematch(")"):
660 raise error("unbalanced parenthesis") 675 raise source.error("unbalanced parenthesis", 0)
661 continue 676 continue
662 elif source.next in ASSERTCHARS: 677 elif source.next in ASSERTCHARS:
663 # lookahead assertions 678 # lookahead assertions
664 char = sourceget() 679 char = sourceget()
665 dir = 1 680 dir = 1
666 if char == "<": 681 if char == "<":
667 if source.next not in LOOKBEHINDASSERTCHARS: 682 if source.next not in LOOKBEHINDASSERTCHARS:
668 raise error("syntax error") 683 raise source.error("syntax error", 0)
669 dir = -1 # lookbehind 684 dir = -1 # lookbehind
670 char = sourceget() 685 char = sourceget()
671 p = _parse_sub(source, state) 686 p = _parse_sub(source, state)
672 if not sourcematch(")"): 687 if not sourcematch(")"):
673 raise error("unbalanced parenthesis") 688 raise source.error("unbalanced parenthesis", 0)
674 if char == "=": 689 if char == "=":
675 subpatternappend((ASSERT, (dir, p))) 690 subpatternappend((ASSERT, (dir, p)))
676 else: 691 else:
677 subpatternappend((ASSERT_NOT, (dir, p))) 692 subpatternappend((ASSERT_NOT, (dir, p)))
678 continue 693 continue
679 elif sourcematch("("): 694 elif sourcematch("("):
680 # conditional backreference group 695 # conditional backreference group
681 condname = "" 696 condname = ""
682 while 1: 697 while 1:
683 char = sourceget() 698 char = sourceget()
684 if char is None: 699 if char is None:
685 raise error("unterminated name") 700 raise source.error("unterminated name", 0)
686 if char == ")": 701 if char == ")":
687 break 702 break
688 condname = condname + char 703 condname = condname + char
689 group = 2 704 group = 2
690 if not condname: 705 if not condname:
691 raise error("missing group name") 706 raise source.error("missing group name", 1)
692 if condname.isidentifier(): 707 if condname.isidentifier():
693 condgroup = state.groupdict.get(condname) 708 condgroup = state.groupdict.get(condname)
694 if condgroup is None: 709 if condgroup is None:
695 msg = "unknown group name: {0!r}".format(condname) 710 msg = "unknown group name: {0!r}".format(condname)
696 raise error(msg) 711 raise source.error(msg, len(condname) + 1)
697 else: 712 else:
698 try: 713 try:
699 condgroup = int(condname) 714 condgroup = int(condname)
700 if condgroup < 0: 715 if condgroup < 0:
701 raise ValueError 716 raise ValueError
702 except ValueError: 717 except ValueError:
703 raise error("bad character in group name") 718 raise source.error("bad character in group name",
719 len(condname) + 1)
704 if not condgroup: 720 if not condgroup:
705 raise error("bad group number") 721 raise source.error("bad group number",
722 len(condname) + 1)
706 if condgroup >= MAXGROUPS: 723 if condgroup >= MAXGROUPS:
707 raise error("the group number is too large") 724 raise source.error("the group number is too large",
725 len(condname) + 1)
708 else: 726 else:
709 # flags 727 # flags
710 if not source.next in FLAGS: 728 if not source.next in FLAGS:
711 raise error("unexpected end of pattern") 729 raise source.error("unexpected end of pattern", 0)
712 while source.next in FLAGS: 730 while source.next in FLAGS:
713 state.flags = state.flags | FLAGS[sourceget()] 731 state.flags = state.flags | FLAGS[sourceget()]
714 if group: 732 if group:
715 # parse group contents 733 # parse group contents
716 if group == 2: 734 if group == 2:
717 # anonymous group 735 # anonymous group
718 group = None 736 group = None
719 else: 737 else:
720 group = state.opengroup(name) 738 try:
739 group = state.opengroup(name)
740 except error as err:
741 raise source.error(err.msg, len(name) + 1)
721 if condgroup: 742 if condgroup:
722 p = _parse_sub_cond(source, state, condgroup) 743 p = _parse_sub_cond(source, state, condgroup)
723 else: 744 else:
724 p = _parse_sub(source, state) 745 p = _parse_sub(source, state)
725 if not sourcematch(")"): 746 if not sourcematch(")"):
726 raise error("unbalanced parenthesis") 747 raise source.error("unbalanced parenthesis", 0)
727 if group is not None: 748 if group is not None:
728 state.closegroup(group) 749 state.closegroup(group)
729 subpatternappend((SUBPATTERN, (group, p))) 750 subpatternappend((SUBPATTERN, (group, p)))
730 else: 751 else:
731 while 1: 752 while 1:
732 char = sourceget() 753 char = sourceget()
733 if char is None: 754 if char is None:
734 raise error("unexpected end of pattern") 755 raise source.error("unexpected end of pattern", 0)
735 if char == ")": 756 if char == ")":
736 break 757 break
737 raise error("unknown extension") 758 raise source.error("unknown extension", len(char))
738 759
739 elif this == "^": 760 elif this == "^":
740 subpatternappend((AT, AT_BEGINNING)) 761 subpatternappend((AT, AT_BEGINNING))
741 762
742 elif this == "$": 763 elif this == "$":
743 subpattern.append((AT, AT_END)) 764 subpattern.append((AT, AT_END))
744 765
745 elif this and this[0] == "\\": 766 elif this and this[0] == "\\":
746 code = _escape(source, this, state) 767 code = _escape(source, this, state)
747 subpatternappend(code) 768 subpatternappend(code)
748 769
749 else: 770 else:
750 raise error("parser error") 771 raise source.error("parser error", len(this))
751 772
752 return subpattern 773 return subpattern
753 774
754 def fix_flags(src, flags): 775 def fix_flags(src, flags):
755 # Check and fix flags according to the type of pattern (str or bytes) 776 # Check and fix flags according to the type of pattern (str or bytes)
756 if isinstance(src, str): 777 if isinstance(src, str):
757 if not flags & SRE_FLAG_ASCII: 778 if not flags & SRE_FLAG_ASCII:
758 flags |= SRE_FLAG_UNICODE 779 flags |= SRE_FLAG_UNICODE
759 elif flags & SRE_FLAG_UNICODE: 780 elif flags & SRE_FLAG_UNICODE:
760 raise ValueError("ASCII and UNICODE flags are incompatible") 781 raise ValueError("ASCII and UNICODE flags are incompatible")
(...skipping 10 matching lines...) Expand all
771 if pattern is None: 792 if pattern is None:
772 pattern = Pattern() 793 pattern = Pattern()
773 pattern.flags = flags 794 pattern.flags = flags
774 pattern.str = str 795 pattern.str = str
775 796
776 p = _parse_sub(source, pattern, 0) 797 p = _parse_sub(source, pattern, 0)
777 p.pattern.flags = fix_flags(str, p.pattern.flags) 798 p.pattern.flags = fix_flags(str, p.pattern.flags)
778 799
779 tail = source.get() 800 tail = source.get()
780 if tail == ")": 801 if tail == ")":
781 raise error("unbalanced parenthesis") 802 raise source.error("unbalanced parenthesis", 1)
782 elif tail: 803 elif tail:
783 raise error("bogus characters at end of regular expression") 804 raise source.error("bogus characters at end of regular expression",
805 len(tail))
784 806
785 if flags & SRE_FLAG_DEBUG: 807 if flags & SRE_FLAG_DEBUG:
786 p.dump() 808 p.dump()
787 809
788 if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: 810 if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
789 # the VERBOSE flag was switched on inside the pattern. to be 811 # the VERBOSE flag was switched on inside the pattern. to be
790 # on the safe side, we'll parse the whole thing again... 812 # on the safe side, we'll parse the whole thing again...
791 return parse(str, p.pattern.flags) 813 return parse(str, p.pattern.flags)
792 814
793 return p 815 return p
(...skipping 19 matching lines...) Expand all
813 break # end of replacement string 835 break # end of replacement string
814 if this[0] == "\\": 836 if this[0] == "\\":
815 # group 837 # group
816 c = this[1] 838 c = this[1]
817 if c == "g": 839 if c == "g":
818 name = "" 840 name = ""
819 if s.match("<"): 841 if s.match("<"):
820 while True: 842 while True:
821 char = sget() 843 char = sget()
822 if char is None: 844 if char is None:
823 raise error("unterminated group name") 845 raise s.error("unterminated group name", 0)
824 if char == ">": 846 if char == ">":
825 break 847 break
826 name += char 848 name += char
827 if not name: 849 if not name:
828 raise error("missing group name") 850 raise s.error("missing group name", 1)
829 try: 851 try:
830 index = int(name) 852 index = int(name)
831 if index < 0: 853 if index < 0:
832 raise error("negative group number") 854 raise s.error("negative group number", len(name) + 1)
833 if index >= MAXGROUPS: 855 if index >= MAXGROUPS:
834 raise error("the group number is too large") 856 raise s.error("the group number is too large",
857 len(name) + 1)
835 except ValueError: 858 except ValueError:
836 if not name.isidentifier(): 859 if not name.isidentifier():
837 raise error("bad character in group name") 860 raise s.error("bad character in group name",
861 len(name) + 1)
838 try: 862 try:
839 index = pattern.groupindex[name] 863 index = pattern.groupindex[name]
840 except KeyError: 864 except KeyError:
841 msg = "unknown group name: {0!r}".format(name) 865 msg = "unknown group name: {0!r}".format(name)
842 raise IndexError(msg) 866 raise IndexError(msg)
843 addgroup(index) 867 addgroup(index)
844 elif c == "0": 868 elif c == "0":
845 if s.next in OCTDIGITS: 869 if s.next in OCTDIGITS:
846 this += sget() 870 this += sget()
847 if s.next in OCTDIGITS: 871 if s.next in OCTDIGITS:
848 this += sget() 872 this += sget()
849 lappend(chr(int(this[1:], 8) & 0xff)) 873 lappend(chr(int(this[1:], 8) & 0xff))
850 elif c in DIGITS: 874 elif c in DIGITS:
851 isoctal = False 875 isoctal = False
852 if s.next in DIGITS: 876 if s.next in DIGITS:
853 this += sget() 877 this += sget()
854 if (c in OCTDIGITS and this[2] in OCTDIGITS and 878 if (c in OCTDIGITS and this[2] in OCTDIGITS and
855 s.next in OCTDIGITS): 879 s.next in OCTDIGITS):
856 this += sget() 880 this += sget()
857 isoctal = True 881 isoctal = True
858 c = int(this[1:], 8) 882 c = int(this[1:], 8)
859 if c > 0o377: 883 if c > 0o377:
860 raise error('octal escape value %r outside of ' 884 raise s.error('octal escape value %r outside of '
861 'range 0-0o377' % this) 885 'range 0-0o377' % this, len(this))
862 lappend(chr(c)) 886 lappend(chr(c))
863 if not isoctal: 887 if not isoctal:
864 addgroup(int(this[1:])) 888 addgroup(int(this[1:]))
865 else: 889 else:
866 try: 890 try:
867 this = chr(ESCAPES[this][1]) 891 this = chr(ESCAPES[this][1])
868 except KeyError: 892 except KeyError:
869 pass 893 pass
870 lappend(this) 894 lappend(this)
871 else: 895 else:
(...skipping 12 matching lines...) Expand all
884 groups, literals = template 908 groups, literals = template
885 literals = literals[:] 909 literals = literals[:]
886 try: 910 try:
887 for index, group in groups: 911 for index, group in groups:
888 literals[index] = s = g(group) 912 literals[index] = s = g(group)
889 if s is None: 913 if s is None:
890 raise error("unmatched group") 914 raise error("unmatched group")
891 except IndexError: 915 except IndexError:
892 raise error("invalid group reference") 916 raise error("invalid group reference")
893 return sep.join(literals) 917 return sep.join(literals)
OLDNEW
« Doc/library/re.rst ('K') | « Lib/sre_constants.py ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+