diff -r b84acd655cb2 regex_3/Python/_regex_core.py
--- a/regex_3/Python/_regex_core.py Wed Dec 24 20:17:08 2014 +0000
+++ b/regex_3/Python/_regex_core.py Wed Feb 25 00:01:10 2015 +0200
@@ -402,7 +402,9 @@ def apply_quantifier(source, info, count
element = Character(characters[-1], case_flags=case_flags)
else:
# The quantifier applies to the last item in the sequence.
- if applied or not sequence:
+ if applied:
+ raise error("multiple repeat", source.string, saved_pos)
+ if not sequence:
raise error("nothing to repeat", source.string, saved_pos)
element = sequence.pop()
@@ -1047,15 +1049,15 @@ def parse_name(source, allow_numeric=Fal
name = source.get_while(set(")>"), include=False)
if not name:
- raise error("bad group name", source.string, source.pos)
+ raise error("missing group name", source.string, source.pos)
if name.isdigit():
min_group = 0 if allow_group_0 else 1
if not allow_numeric or int(name) < min_group:
- raise error("bad group name", source.string, source.pos)
+ raise error("bad character in group name", source.string, source.pos)
else:
if not name.isidentifier():
- raise error("bad group name", source.string, source.pos)
+ raise error("bad character in group name", source.string, source.pos)
return name
@@ -1079,10 +1081,10 @@ def parse_escape(source, info, in_set):
source.ignore_space = saved_ignore
if not ch:
# A backslash at the end of the pattern.
- raise error("bad escape", source.string, source.pos)
+ raise error("bad escape (end of pattern)", source.string, source.pos)
if ch in HEX_ESCAPES:
# A hexadecimal escape sequence.
- return parse_hex_escape(source, info, HEX_ESCAPES[ch], in_set)
+ return parse_hex_escape(source, info, HEX_ESCAPES[ch], in_set, ch)
elif ch == "g" and not in_set:
# A group reference.
saved_pos = source.pos
@@ -1183,15 +1185,18 @@ def parse_octal_escape(source, info, dig
value = int("".join(digits), 8)
return make_character(info, value, in_set)
except ValueError:
- raise error("bad octal escape", source.string, source.pos)
-
-def parse_hex_escape(source, info, expected_len, in_set):
+ if digits[0] in OCT_DIGITS:
+ raise error("incomplete escape \\%s" % ''.join(digits), source.string, source.pos)
+ else:
+ raise error("bad escape \\%s" % digits[0], source.string, source.pos)
+
+def parse_hex_escape(source, info, expected_len, in_set, type):
"Parses a hex escape sequence."
digits = []
for i in range(expected_len):
ch = source.get()
if ch not in HEX_DIGITS:
- raise error("bad hex escape", source.string, source.pos)
+ raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), source.string, source.pos)
digits.append(ch)
value = int("".join(digits), 16)
@@ -1441,7 +1446,7 @@ def parse_set_item(source, info):
ch = source.get()
if not ch:
- raise error("bad set", source.string, source.pos)
+ raise error("unterminated character set", source.string, source.pos)
return Character(ord(ch))
@@ -1573,7 +1578,7 @@ def _compile_replacement(source, pattern
if ch in HEX_ESCAPES and (ch == "x" or is_unicode):
# A hexadecimal escape sequence.
- return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch])]
+ return False, [parse_repl_hex_escape(source, HEX_ESCAPES[ch], ch)]
if ch == "g":
# A group preference.
@@ -1629,18 +1634,18 @@ def _compile_replacement(source, pattern
if not ch:
# A trailing backslash.
- raise error("bad escape", source.string, source.pos)
+ raise error("bad escape (end of pattern)", source.string, source.pos)
# An escaped non-backslash is a backslash followed by the literal.
return False, [ord("\\"), ord(ch)]
-def parse_repl_hex_escape(source, expected_len):
+def parse_repl_hex_escape(source, expected_len, type):
"Parses a hex escape sequence in a replacement string."
digits = []
for i in range(expected_len):
ch = source.get()
if ch not in HEX_DIGITS:
- raise error("bad hex escape", source.string, source.pos)
+ raise error("incomplete escape \\%s%s" % (type, ''.join(digits)), source.string, source.pos)
digits.append(ch)
return int("".join(digits), 16)
@@ -1671,7 +1676,7 @@ def compile_repl_group(source, pattern):
if name.isdigit():
index = int(name)
if not 0 <= index <= pattern.groups:
- raise error("invalid group", source.string, source.pos)
+ raise error("invalid group reference", source.string, source.pos)
return index
@@ -2280,7 +2285,7 @@ class CallGroup(RegexBase):
raise error("unknown group", pattern, self.position)
if not 0 <= self.group <= self.info.group_count:
- raise error("unknown group", pattern, self.position)
+ raise error("invalid group reference", pattern, self.position)
if self.group > 0 and self.info.open_group_count[self.group] > 1:
raise error("ambiguous group reference", pattern, self.position)
@@ -2397,7 +2402,7 @@ class Conditional(RegexBase):
raise error("unknown group", pattern, self.position)
if not 1 <= self.group <= self.info.group_count:
- raise error("unknown group", pattern, self.position)
+ raise error("invalid group reference", pattern, self.position)
self.yes_item.fix_groups(pattern, reverse, fuzzy)
self.no_item.fix_groups(pattern, reverse, fuzzy)
@@ -3028,7 +3033,7 @@ class RefGroup(RegexBase):
raise error("unknown group", pattern, self.position)
if not 1 <= self.group <= self.info.group_count:
- raise error("unknown group", pattern, self.position)
+ raise error("invalid group reference", pattern, self.position)
self._key = self.__class__, self.group, self.case_flags
@@ -3989,7 +3994,7 @@ class Scanner:
source.ignore_space = bool(info.flags & VERBOSE)
parsed = _parse_pattern(source, info)
if not source.at_end():
- raise error("trailing characters", source.string, source.pos)
+ raise error("unbalanced parenthesis", source.string, source.pos)
# We want to forbid capture groups within each phrase.
patterns.append(parsed.remove_captures())
diff -r b84acd655cb2 regex_3/Python/regex.py
--- a/regex_3/Python/regex.py Wed Dec 24 20:17:08 2014 +0000
+++ b/regex_3/Python/regex.py Wed Feb 25 00:01:10 2015 +0200
@@ -499,7 +499,7 @@ def _compile(pattern, flags=0, kwargs={}
caught_exception.pos)
if not source.at_end():
- raise error("trailing characters in pattern", pattern, source.pos)
+ raise error("unbalanced parenthesis", pattern, source.pos)
# Check the global flags for conflicts.
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION
diff -r b84acd655cb2 regex_3/Python/test_regex.py
--- a/regex_3/Python/test_regex.py Wed Dec 24 20:17:08 2014 +0000
+++ b/regex_3/Python/test_regex.py Wed Feb 25 00:01:10 2015 +0200
@@ -20,23 +20,25 @@ class RegexTests(unittest.TestCase):
FLAGS_WITH_COMPILED_PAT = "cannot process flags argument with a compiled pattern"
INVALID_GROUP_REF = "invalid group reference"
MISSING_GT = "missing >"
- BAD_GROUP_NAME = "bad group name"
+ BAD_GROUP_NAME = "bad character in group name"
+ MISSING_GROUP_NAME = "missing group name"
MISSING_LT = "missing <"
UNKNOWN_GROUP_I = "unknown group"
UNKNOWN_GROUP = "unknown group"
- BAD_ESCAPE = "bad escape"
- BAD_OCTAL_ESCAPE = "bad octal escape"
- BAD_SET = "bad set"
+ BAD_ESCAPE = r"bad escape \(end of pattern\)"
+ BAD_OCTAL_ESCAPE = r"bad escape \\"
+ BAD_SET = "unterminated character set"
STR_PAT_ON_BYTES = "can't use a string pattern on a bytes-like object"
BYTES_PAT_ON_STR = "can't use a bytes pattern on a string-like object"
STR_PAT_BYTES_TEMPL = "expected str instance, not bytes"
BYTES_PAT_STR_TEMPL = "expected bytes-like object, not str"
BYTES_PAT_UNI_FLAG = "cannot use UNICODE flag with a bytes pattern"
MIXED_FLAGS = "ASCII, LOCALE and UNICODE flags are mutually incompatible"
- MISSING_RPAREN = "missing \\)" # Need to escape parenthesis for unittest.
- TRAILING_CHARS = "trailing characters in pattern"
+ MISSING_RPAREN = "missing \\)"
+ TRAILING_CHARS = "unbalanced parenthesis"
BAD_CHAR_RANGE = "bad character range"
NOTHING_TO_REPEAT = "nothing to repeat"
+ MULTIPLE_REPEAT = "multiple repeat"
OPEN_GROUP = "cannot refer to an open group"
DUPLICATE_GROUP = "duplicate group"
CANT_TURN_OFF = "bad inline flags: cannot turn flags off"
@@ -227,7 +229,7 @@ class RegexTests(unittest.TestCase):
def test_symbolic_refs(self):
self.assertRaisesRegex(regex.error, self.MISSING_GT, lambda:
regex.sub('(?Px)', r'\gx)', r'\g<', 'xx'))
self.assertRaisesRegex(regex.error, self.MISSING_LT, lambda:
regex.sub('(?Px)', r'\g', 'xx'))
@@ -712,7 +714,7 @@ class RegexTests(unittest.TestCase):
self.assertEqual(bool(regex.match(r"\x%02xz" % i, chr(i) + "z")),
True)
- self.assertRaisesRegex(regex.error, self.UNKNOWN_GROUP, lambda:
+ self.assertRaisesRegex(regex.error, self.INVALID_GROUP_REF, lambda:
regex.match(r"\911", ""))
def test_sre_character_class_literals(self):
@@ -1709,7 +1711,7 @@ class RegexTests(unittest.TestCase):
(r'(?a)\g', 'aa', '1', ascii('a')),
# Test octal escapes.
- ('\\1', 'a', '', regex.error, self.UNKNOWN_GROUP), # Backreference.
+ ('\\1', 'a', '', regex.error, self.INVALID_GROUP_REF), # Backreference.
('[\\1]', '\1', '0', "'\\x01'"), # Character.
('\\09', chr(0) + '9', '0', ascii(chr(0) + '9')),
('\\141', 'a', '0', ascii('a')),
@@ -1943,7 +1945,7 @@ class RegexTests(unittest.TestCase):
# Character properties.
(r"\g", "g", '0', ascii('g')),
- (r"\g<1>", "g", '', regex.error, self.UNKNOWN_GROUP),
+ (r"\g<1>", "g", '', regex.error, self.INVALID_GROUP_REF),
(r"(.)\g<1>", "gg", '0', ascii('gg')),
(r"(.)\g<1>", "gg", '', ascii(('gg', 'g'))),
(r"\N", "N", '0', ascii('N')),
@@ -2033,7 +2035,7 @@ class RegexTests(unittest.TestCase):
('(a)b(c)', 'abc', '0,1,2', ascii(('abc', 'a', 'c'))),
('a+b+c', 'aabbabc', '0', ascii('abc')),
('a{1,}b{1,}c', 'aabbabc', '0', ascii('abc')),
- ('a**', '-', '', regex.error, self.NOTHING_TO_REPEAT),
+ ('a**', '-', '', regex.error, self.MULTIPLE_REPEAT),
('a.+?c', 'abcabc', '0', ascii('abc')),
('(a+|b)*', 'ab', '0,1', ascii(('ab', 'b'))),
('(a+|b){0,}', 'ab', '0,1', ascii(('ab', 'b'))),
@@ -2087,9 +2089,9 @@ class RegexTests(unittest.TestCase):
# ('((((((((((a))))))))))\\41', 'aa', '', ascii(None)),
# ('((((((((((a))))))))))\\41', 'a!', '0', ascii('a!')),
('((((((((((a))))))))))\\41', '', '', regex.error,
- self.UNKNOWN_GROUP),
+ self.INVALID_GROUP_REF),
('(?i)((((((((((a))))))))))\\41', '', '', regex.error,
- self.UNKNOWN_GROUP),
+ self.INVALID_GROUP_REF),
('(((((((((a)))))))))', 'a', '0', ascii('a')),
('multiple words of text', 'uh-uh', '', ascii(None)),
@@ -2181,7 +2183,7 @@ class RegexTests(unittest.TestCase):
('(?i)a+b+c', 'AABBABC', '0', ascii('ABC')),
('(?i)a{1,}b{1,}c', 'AABBABC', '0', ascii('ABC')),
- ('(?i)a**', '-', '', regex.error, self.NOTHING_TO_REPEAT),
+ ('(?i)a**', '-', '', regex.error, self.MULTIPLE_REPEAT),
('(?i)a.+?c', 'ABCABC', '0', ascii('ABC')),
('(?i)a.*?c', 'ABCABC', '0', ascii('ABC')),
('(?i)a.{0,5}?c', 'ABCABC', '0', ascii('ABC')),
@@ -2315,7 +2317,7 @@ 123""", '0', ascii('abc')),
('[\\D]+', '1234abc5678', '0', ascii('abc')),
('[\\da-fA-F]+', '123abc', '0', ascii('123abc')),
# Not an error under PCRE/PRE:
- # ('[\\d-x]', '-', '', regex.error, self.SYNTAX_ERROR),
+ # ('[\\d-x]', '-', '', regex.error, self.BAD_CHAR_RANGE),
(r'([\s]*)([\S]*)([\s]*)', ' testing!1972', '3,2,1', ascii(('',
'testing!1972', ' '))),
(r'(\s*)(\S*)(\s*)', ' testing!1972', '3,2,1', ascii(('',
@@ -2405,6 +2407,7 @@ 123""", '0', ascii('abc')),
group_list.append(group)
if excval is not None:
+ with self.subTest(pattern=pattern, string=string):
self.assertRaisesRegex(expected, excval,
regex.search, pattern, string)
else:
@@ -3223,7 +3226,7 @@ 123""", '0', ascii('abc')),
# Hg issue 95.
self.assertRaisesRegex(regex.error,
- '^nothing to repeat at position 3$', lambda: regex.compile(r'.???'))
+ self.MULTIPLE_REPEAT, lambda: regex.compile(r'.???'))
# Hg issue 97.
self.assertEquals(regex.escape('foo!?'), 'foo\\!\\?')