diff -r 2b27ffe611f3 Lib/sre_parse.py --- a/Lib/sre_parse.py Mon Nov 10 18:28:53 2014 +0200 +++ b/Lib/sre_parse.py Mon Nov 10 18:47:47 2014 +0200 @@ -225,7 +225,7 @@ class Tokenizer: try: char += self.decoded_string[index] except IndexError: - raise error("bogus escape (end of line)", + raise error("bad escape", self.string, len(self.string) - 1) from None self.index = index + 1 self.next = char @@ -253,7 +253,9 @@ class Tokenizer: c = self.next self.__next() if c is None: - raise self.error("unterminated name") + if not result: + return None + raise self.error("missing %s" % terminator) if c == terminator: break result += c @@ -338,7 +340,7 @@ def _class_escape(source, escape): return LITERAL, ord(escape[1]) except ValueError: pass - raise source.error("bogus escape: %r" % escape, len(escape)) + raise source.error("bad escape", len(escape)) def _escape(source, escape, state): # handle escape code in expression @@ -392,7 +394,7 @@ def _escape(source, escape, state): group = int(escape[1:]) if group < state.groups: if not state.checkgroup(group): - raise source.error("cannot refer to open group", + raise source.error("can't refer to an open group", len(escape)) return GROUPREF, group raise ValueError @@ -400,7 +402,7 @@ def _escape(source, escape, state): return LITERAL, ord(escape[1]) except ValueError: pass - raise source.error("bogus escape: %r" % escape, len(escape)) + raise source.error("bad escape", len(escape)) def _parse_sub(source, state, nested=True): # parse an alternation: a|b|c @@ -413,7 +415,7 @@ def _parse_sub(source, state, nested=Tru if not sourcematch("|"): break if nested and source.next is not None and source.next != ")": - raise source.error("pattern not properly closed") + raise source.error("missing )") if len(items) == 1: return items[0] @@ -458,11 +460,11 @@ def _parse_sub_cond(source, state, condg if source.match("|"): item_no = _parse(source, state) if source.next == "|": - raise source.error("conditional backref with more than two branches") + raise source.error("missing )") else: item_no = None if source.next is not None and source.next != ")": - raise source.error("pattern not properly closed") + raise source.error("missing )") subpattern = SubPattern(state) subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) return subpattern @@ -519,7 +521,7 @@ def _parse(source, state): while True: this = sourceget() if this is None: - raise source.error("unexpected end of regular expression") + raise source.error("bad set") if this == "]" and set != start: break elif this[0] == "\\": @@ -530,7 +532,7 @@ def _parse(source, state): # potential range this = sourceget() if this is None: - raise source.error("unexpected end of regular expression") + raise source.error("bad set") if this == "]": if code1[0] is IN: code1 = code1[1][0] @@ -569,7 +571,6 @@ def _parse(source, state): min, max = 0, 1 elif this == "*": min, max = 0, MAXREPEAT - elif this == "+": min, max = 1, MAXREPEAT elif this == "{": @@ -592,15 +593,16 @@ def _parse(source, state): if lo: min = int(lo) if min >= MAXREPEAT: - raise OverflowError("the repetition number is too large") + raise source.error("repeat count too big") if hi: max = int(hi) if max >= MAXREPEAT: - raise OverflowError("the repetition number is too large") + raise source.error("repeat count too big") if max < min: - raise source.error("bad repeat interval", + raise source.error("min repeat greater than max repeat", source.tell() - here) else: + # Never reached. raise source.error("not supported", len(this)) # figure out which item to repeat if subpattern: @@ -611,7 +613,7 @@ def _parse(source, state): raise source.error("nothing to repeat", source.tell() - here + len(this)) if item[0][0] in _REPEATCODES: - raise source.error("multiple repeat", + raise source.error("nothing to repeat", source.tell() - here + len(this)) if sourcematch("?"): subpattern[-1] = (MIN_REPEAT, (min, max, item)) @@ -630,7 +632,7 @@ def _parse(source, state): # options char = sourceget() if char is None: - raise self.error("unexpected end of pattern") + raise self.error("unknown extension") if char == "P": # python extensions if sourcematch("<"): @@ -638,31 +640,29 @@ def _parse(source, state): name = source.getuntil(">") group = 1 if not name: - raise source.error("missing group name", 1) + raise source.error("bad group name", 1) if not name.isidentifier(): - raise source.error("bad character in group name " - "%r" % name, + raise source.error("bad group name", len(name) + 1) elif sourcematch("="): # named backreference name = source.getuntil(")") if not name: - raise source.error("missing group name", 1) + raise source.error("bad group name", 1) if not name.isidentifier(): - raise source.error("bad character in backref " - "group name %r" % name, + raise source.error("bad group name", len(name) + 1) gid = state.groupdict.get(name) if gid is None: - msg = "unknown group name: {0!r}".format(name) + msg = "unknown group" raise source.error(msg, len(name) + 1) subpatternappend((GROUPREF, gid)) continue else: char = sourceget() if char is None: - raise source.error("unexpected end of pattern") - raise source.error("unknown specifier: ?P%s" % char, + raise source.error("unknown extension") + raise source.error("unknown extension", len(char)) elif char == ":": # non-capturing group @@ -671,7 +671,7 @@ def _parse(source, state): # comment while True: if source.next is None: - raise source.error("unbalanced parenthesis") + raise source.error("missing )") if sourceget() == ")": break continue @@ -681,11 +681,11 @@ def _parse(source, state): if char == "<": char = sourceget() if char is None or char not in "=!": - raise source.error("syntax error") + raise source.error("missing )") dir = -1 # lookbehind p = _parse_sub(source, state) if not sourcematch(")"): - raise source.error("unbalanced parenthesis") + raise source.error("missing )") if char == "=": subpatternappend((ASSERT, (dir, p))) else: @@ -696,11 +696,11 @@ def _parse(source, state): condname = source.getuntil(")") group = 2 if not condname: - raise source.error("missing group name", 1) + raise source.error("bad group name", 1) if condname.isidentifier(): condgroup = state.groupdict.get(condname) if condgroup is None: - msg = "unknown group name: {0!r}".format(condname) + msg = "unknown group" raise source.error(msg, len(condname) + 1) else: try: @@ -708,13 +708,13 @@ def _parse(source, state): if condgroup < 0: raise ValueError except ValueError: - raise source.error("bad character in group name", + raise source.error("bad group name", len(condname) + 1) if not condgroup: - raise source.error("bad group number", + raise source.error("unknown group", len(condname) + 1) if condgroup >= MAXGROUPS: - raise source.error("the group number is too large", + raise source.error("unknown group", len(condname) + 1) elif char in FLAGS: # flags @@ -723,7 +723,7 @@ def _parse(source, state): state.flags |= FLAGS[sourceget()] verbose = state.flags & SRE_FLAG_VERBOSE else: - raise source.error("unexpected end of pattern") + raise source.error("unknown extension") if group: # parse group contents if group == 2: @@ -739,7 +739,7 @@ def _parse(source, state): else: p = _parse_sub(source, state) if not sourcematch(")"): - raise source.error("unbalanced parenthesis") + raise source.error("missing )") if group is not None: state.closegroup(group, p) subpatternappend((SUBPATTERN, (group, p))) @@ -747,7 +747,7 @@ def _parse(source, state): while True: char = sourceget() if char is None: - raise source.error("unexpected end of pattern") + raise source.error("unknown extension") if char == ")": break raise source.error("unknown extension", len(char)) @@ -759,6 +759,7 @@ def _parse(source, state): subpattern.append((AT, AT_END)) else: + # Never reached. raise source.error("parser error", len(this)) return subpattern @@ -789,11 +790,7 @@ def parse(str, flags=0, pattern=None): p.pattern.flags = fix_flags(str, p.pattern.flags) if source.next is not None: - if source.next == ")": - raise source.error("unbalanced parenthesis") - else: - raise source.error("bogus characters at end of regular expression", - len(tail)) + raise source.error("trailing characters in pattern") if flags & SRE_FLAG_DEBUG: p.dump() @@ -832,22 +829,22 @@ def parse_template(source, pattern): if s.match("<"): name = s.getuntil(">") if not name: - raise s.error("missing group name", 1) + raise s.error("bad group name", 1) try: index = int(name) if index < 0: - raise s.error("negative group number", len(name) + 1) + raise s.error("bad group name", len(name) + 1) if index >= MAXGROUPS: - raise s.error("the group number is too large", + raise s.error("invalid group", len(name) + 1) except ValueError: if not name.isidentifier(): - raise s.error("bad character in group name", + raise s.error("bad group name", len(name) + 1) try: index = pattern.groupindex[name] except KeyError: - msg = "unknown group name: {0!r}".format(name) + msg = "unknown group" raise IndexError(msg) addgroup(index) elif c == "0":