=== modified file 'Doc/library/re.rst'
--- Doc/library/re.rst	2008-05-09 06:36:07 +0000
+++ Doc/library/re.rst	2008-05-24 21:32:48 +0000
@@ -229,6 +229,14 @@
    *cannot* be retrieved after performing a match or referenced later in the
    pattern.
 
+``(?P...)``
+   Regular expressions of this form indicate a Python-Specific
+   extension to the general Regular Expression syntax.  The ``(?P...)``
+   form is reserved for the Python programming language by agreement
+   between Larry Wall and Guido van Rossum that Perl shall never
+   implement any new extension to the Regular Expression syntax that is
+   of the form ``(?P...)``.
+
 ``(?P<name>...)``
    Similar to regular parentheses, but the substring matched by the group is
    accessible via the symbolic group name *name*.  Group names must be valid Python
@@ -248,6 +256,29 @@
 ``(?#...)``
    A comment; the contents of the parentheses are simply ignored.
 
+   .. note:
+
+      The first closing parenthesis encountered in the commented
+      expression will be interpreted as the comment's closing
+      parenthesis.  For example, not only would
+      ``He(?# 2 (TWO) ls)llo`` **not** match the string expression
+      ``Hello``, the expression itself is not even a valid regular
+      expression as the comment would consist of the characters
+      `` 2 (TWO`` and then terminate with the first closing
+      parenthesis, and then, after the `` ls`` the compiler would
+      encounter another closing parenthesis which would not be balanced
+      and cause the regular expression engine to generate an error.
+
+``(?P#...)``
+   A Parentheses-balanced comment.  Like the standard comment, text
+   between the parentheses is ignored, but in addition to this, if
+   there are balanced parenthesis within the commented expression,
+   these too will be ignored until the balancing closing parenthesis
+   is encountered.  Also, an escaped closing parentesis is ignored
+   as part of the sequence of balancing parentheses.  For example,
+   ``(?P# 6\) There is no rule SIX (6))`` would be a well-formed
+   regular expression that was a complete comment.
+
 ``(?=...)``
    Matches if ``...`` matches next, but doesn't consume any of the string.  This is
    called a lookahead assertion.  For example, ``Isaac (?=Asimov)`` will match

=== modified file 'Lib/re.py'
--- Lib/re.py	2008-05-20 07:49:57 +0000
+++ Lib/re.py	2008-05-24 18:56:21 +0000
@@ -235,7 +235,7 @@
         if flags:
             raise ValueError('Cannot process flags argument with a compiled pattern')
         return pattern
-    if not sre_compile.isstring(pattern):
+    if not isinstance(pattern, basestring):
         raise TypeError, "first argument must be string or compiled pattern"
     try:
         p = sre_compile.compile(pattern, flags)

=== modified file 'Lib/sre_compile.py'
--- Lib/sre_compile.py	2008-04-08 21:27:42 +0000
+++ Lib/sre_compile.py	2008-05-24 21:31:18 +0000
@@ -11,7 +11,7 @@
 """Internal support module for sre"""
 
 import _sre, sys
-import sre_parse
+
 from sre_constants import *
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
@@ -149,7 +149,7 @@
                 emit(OPCODES[JUMP])
                 tailappend(_len(code)); emit(0)
                 code[skip] = _len(code) - skip
-            emit(0) # end of branch
+            emit(OPCODES[FAILURE]) # end of branch
             for tail in tail:
                 code[tail] = _len(code) - tail
         elif op is CATEGORY:
@@ -470,19 +470,6 @@
         _compile_charset(charset, flags, code)
     code[skip] = len(code) - skip
 
-try:
-    unicode
-except NameError:
-    STRING_TYPES = (type(""),)
-else:
-    STRING_TYPES = (type(""), type(unicode("")))
-
-def isstring(obj):
-    for tp in STRING_TYPES:
-        if isinstance(obj, tp):
-            return 1
-    return 0
-
 def _code(p, flags):
 
     flags = p.pattern.flags | flags
@@ -501,7 +488,8 @@
 def compile(p, flags=0):
     # internal: convert pattern list to internal format
 
-    if isstring(p):
+    if isinstance(p, basestring):
+        import sre_parse
         pattern = p
         p = sre_parse.parse(p, flags)
     else:

=== modified file 'Lib/sre_constants.py'
--- Lib/sre_constants.py	2004-08-25 02:22:30 +0000
+++ Lib/sre_constants.py	2008-05-24 21:31:18 +0000
@@ -13,7 +13,7 @@
 
 # update when constants are added or removed
 
-MAGIC = 20031017
+MAGIC = 20080329
 
 # max code word in this release
 

=== modified file 'Lib/sre_parse.py'
--- Lib/sre_parse.py	2006-12-19 08:17:50 +0000
+++ Lib/sre_parse.py	2008-05-24 21:32:48 +0000
@@ -424,8 +424,6 @@
             # character set
             set = []
             setappend = set.append
-##          if sourcematch(":"):
-##              pass # handle character classes
             if sourcematch("^"):
                 setappend((NEGATE, None))
             # check remaining characters
@@ -568,6 +566,27 @@
                             raise error, "unknown group name"
                         subpatternappend((GROUPREF, gid))
                         continue
+                    elif sourcematch("#"):
+                        # Python-Specific Comment -- allows for nested
+                        # paren
+                        depth = 1
+                        while 1:
+                            if sourcematch("\\"):
+                                # Ignore escaped characters
+                                if not source.next:
+                                    break
+                            elif source.next == "(":
+                                depth += 1
+                            elif source.next == ")":
+                                depth -= 1
+                                if not depth:
+                                    break
+                            if source.next is None:
+                                break
+                            sourceget()
+                        if not sourcematch(")"):
+                            raise error, "unbalanced parenthesis"
+                        continue
                     else:
                         char = sourceget()
                         if char is None:

=== modified file 'Lib/test/test_re.py'
--- Lib/test/test_re.py	2008-01-10 21:59:42 +0000
+++ Lib/test/test_re.py	2008-05-24 21:32:48 +0000
@@ -644,8 +644,8 @@
 
     def test_inline_flags(self):
         # Bug #1700
-        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
-        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
+        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below
+        lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below
 
         p = re.compile(upper_char, re.I | re.U)
         q = p.match(lower_char)
@@ -683,6 +683,33 @@
         self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
         self.assertEqual(pattern.sub('#', '\n'), '#\n#')
 
+    def test_nested_parenthesis_in_comments(self):
+        """Verify that 'Hello' matches
+        'Hell(?P# not the (really) bad place)o' but not
+        'Hell(?# not the (really) bad place)o' (which is invalid)."""
+        self.assertRaises(re.error, re.compile,
+	                  'Hell(?# not the (really) bad place)o')
+
+        goodHello = 'Hello'
+        badHello = 'Hell bad place)o'
+
+        patPyComment = re.compile('Hell(?P# not the (really) bad place)o')
+        self.assertEqual(patPyComment.match(goodHello).group(0), goodHello)
+        self.assertEqual(patPyComment.match(badHello), None)
+
+        goodWorld = 'Hello World!'
+        badWorld = 'Hello  ((Planet)))World!'
+
+        self.assertRaises(re.error, re.compile,
+	                  r'Hello (?# 3\) ((Planet)))World!')
+        #patNumComment = re.compile(r'Hello (?# 3\) ((Planet)))World!')
+        #self.assertEqual(patNumComment.match(goodWorld), None)
+        #self.assertEqual(patNumComment.match(badWorld).group(0), badWorld)
+
+        patNumPyComment = re.compile(r'Hello (?P# 3\) ((Planet)))World!')
+        self.assertEqual(patNumPyComment.match(goodWorld).group(0), goodWorld)
+        self.assertEqual(patNumPyComment.match(badWorld), None)
+
 
 def run_re_tests():
     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR

=== modified file 'Modules/sre_constants.h'
--- Modules/sre_constants.h	2003-10-17 22:13:16 +0000
+++ Modules/sre_constants.h	2008-05-24 21:31:18 +0000
@@ -11,7 +11,7 @@
  * See the _sre.c file for information on usage and redistribution.
  */
 
-#define SRE_MAGIC 20031017
+#define SRE_MAGIC 20080329
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2