=== modified file 'Doc/library/re.rst' --- Doc/library/re.rst 2008-05-24 16:05:21 +0000 +++ Doc/library/re.rst 2008-05-24 21:32:48 +0000 @@ -269,6 +269,16 @@ encounter another closing parenthesis which would not be balanced and cause the regular expression engine to generate an error. +``(?P#...)`` + A Parentheses-balanced comment. Like the standard comment, text + between the parentheses is ignored, but in addition to this, if + there are balanced parenthesis within the commented expression, + these too will be ignored until the balancing closing parenthesis + is encountered. Also, an escaped closing parentesis is ignored + as part of the sequence of balancing parentheses. For example, + ``(?P# 6\) There is no rule SIX (6))`` would be a well-formed + regular expression that was a complete comment. + ``(?=...)`` Matches if ``...`` matches next, but doesn't consume any of the string. This is called a lookahead assertion. For example, ``Isaac (?=Asimov)`` will match === modified file 'Lib/sre_parse.py' --- Lib/sre_parse.py 2008-04-27 12:54:15 +0000 +++ Lib/sre_parse.py 2008-05-24 21:32:48 +0000 @@ -566,6 +566,27 @@ raise error, "unknown group name" subpatternappend((GROUPREF, gid)) continue + elif sourcematch("#"): + # Python-Specific Comment -- allows for nested + # paren + depth = 1 + while 1: + if sourcematch("\\"): + # Ignore escaped characters + if not source.next: + break + elif source.next == "(": + depth += 1 + elif source.next == ")": + depth -= 1 + if not depth: + break + if source.next is None: + break + sourceget() + if not sourcematch(")"): + raise error, "unbalanced parenthesis" + continue else: char = sourceget() if char is None: === modified file 'Lib/test/test_re.py' --- Lib/test/test_re.py 2008-05-22 18:59:22 +0000 +++ Lib/test/test_re.py 2008-05-24 21:32:48 +0000 @@ -683,6 +683,33 @@ self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#') + def test_nested_parenthesis_in_comments(self): + """Verify that 'Hello' matches + 'Hell(?P# not the (really) bad place)o' but not + 'Hell(?# not the (really) bad place)o' (which is invalid).""" + self.assertRaises(re.error, re.compile, + 'Hell(?# not the (really) bad place)o') + + goodHello = 'Hello' + badHello = 'Hell bad place)o' + + patPyComment = re.compile('Hell(?P# not the (really) bad place)o') + self.assertEqual(patPyComment.match(goodHello).group(0), goodHello) + self.assertEqual(patPyComment.match(badHello), None) + + goodWorld = 'Hello World!' + badWorld = 'Hello ((Planet)))World!' + + self.assertRaises(re.error, re.compile, + r'Hello (?# 3\) ((Planet)))World!') + #patNumComment = re.compile(r'Hello (?# 3\) ((Planet)))World!') + #self.assertEqual(patNumComment.match(goodWorld), None) + #self.assertEqual(patNumComment.match(badWorld).group(0), badWorld) + + patNumPyComment = re.compile(r'Hello (?P# 3\) ((Planet)))World!') + self.assertEqual(patNumPyComment.match(goodWorld).group(0), goodWorld) + self.assertEqual(patNumPyComment.match(badWorld), None) + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR