diff -r 11cf18ec1900 Lib/sre_parse.py --- a/Lib/sre_parse.py Sat Sep 06 22:49:07 2014 +0300 +++ b/Lib/sre_parse.py Mon Sep 08 14:02:50 2014 +0300 @@ -283,7 +283,12 @@ elif c in OCTDIGITS: # octal escape (up to three digits) escape += source.getwhile(2, OCTDIGITS) - return LITERAL, int(escape[1:], 8) & 0xff + c = int(escape[1:], 8) + if c > 0o377: + import warnings + warnings.warn('octal escape value > 0o377', + UserWarning, stacklevel=8) + return LITERAL, c & 0xff elif c in DIGITS: raise ValueError if len(escape) == 2: @@ -325,7 +330,7 @@ elif c == "0": # octal escape escape += source.getwhile(2, OCTDIGITS) - return LITERAL, int(escape[1:], 8) & 0xff + return LITERAL, int(escape[1:], 8) elif c in DIGITS: # octal escape *or* decimal group reference (sigh) if source.next in DIGITS: @@ -334,7 +339,12 @@ source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff + c = int(escape[1:], 8) + if c > 0o377: + import warnings + warnings.warn('octal escape value > 0o377', + UserWarning, stacklevel=8) + return LITERAL, c & 0xff # not an octal escape, so this is a group reference group = int(escape[1:]) if group < state.groups: @@ -825,7 +835,12 @@ s.next in OCTDIGITS): this += sget() isoctal = True - lappend(chr(int(this[1:], 8) & 0xff)) + c = int(this[1:], 8) + if c > 0o377: + import warnings + warnings.warn('octal escape value > 0o377', + UserWarning, stacklevel=4) + lappend(chr(c & 0xff)) if not isoctal: addgroup(int(this[1:])) else: diff -r 11cf18ec1900 Lib/test/test_re.py --- a/Lib/test/test_re.py Sat Sep 06 22:49:07 2014 +0300 +++ b/Lib/test/test_re.py Mon Sep 08 14:02:50 2014 +0300 @@ -154,8 +154,10 @@ self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') - self.assertEqual(re.sub('x', r'\400', 'x'), '\0') - self.assertEqual(re.sub('x', r'\777', 'x'), '\377') + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertEqual(re.sub('x', r'\400', 'x'), '\0') + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertEqual(re.sub('x', r'\777', 'x'), '\377') self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') @@ -691,7 +693,8 @@ self.assertIsNotNone(re.match(r"\08", "\0008")) self.assertIsNotNone(re.match(r"\01", "\001")) self.assertIsNotNone(re.match(r"\018", "\0018")) - self.assertIsNotNone(re.match(r"\567", chr(0o167))) + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertIsNotNone(re.match(r"\567", chr(0o167))) self.assertRaises(re.error, re.match, r"\911", "") self.assertRaises(re.error, re.match, r"\x1", "") self.assertRaises(re.error, re.match, r"\x1z", "") @@ -719,6 +722,8 @@ self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i))) self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0")) self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z")) + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertIsNotNone(re.match(r"[\567]", chr(0o167))) self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e")) self.assertRaises(re.error, re.match, r"[\911]", "") self.assertRaises(re.error, re.match, r"[\x1z]", "") @@ -740,7 +745,8 @@ self.assertIsNotNone(re.match(br"\08", b"\0008")) self.assertIsNotNone(re.match(br"\01", b"\001")) self.assertIsNotNone(re.match(br"\018", b"\0018")) - self.assertIsNotNone(re.match(br"\567", bytes([0o167]))) + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertIsNotNone(re.match(br"\567", bytes([0o167]))) self.assertRaises(re.error, re.match, br"\911", b"") self.assertRaises(re.error, re.match, br"\x1", b"") self.assertRaises(re.error, re.match, br"\x1z", b"") @@ -755,6 +761,8 @@ self.assertIsNotNone(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) self.assertIsNotNone(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) self.assertIsNotNone(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) + with self.assertWarnsRegex(UserWarning, 'octal escape value > 0o377'): + self.assertIsNotNone(re.match(br"[\567]", bytes([0o167]))) self.assertIsNotNone(re.match(br"[\u]", b'u')) self.assertIsNotNone(re.match(br"[\U]", b'U')) self.assertRaises(re.error, re.match, br"[\911]", "")