=== modified file 'Doc/library/re.rst' --- old/Doc/library/re.rst 2008-05-31 13:05:34 +0000 +++ new/Doc/library/re.rst 2008-06-17 18:57:14 +0000 @@ -936,6 +936,18 @@ The string passed to :func:`match` or :func:`search`. +.. attribute:: MatchObject.``group_identifier`` + + If a named capture group was included in the original regular expression, + and its name is not one of the predefined MatchObject attributes specified + above, the value captured by that named group can be accessed directly as + an attribute of the returned MatchObject. For example: + + >>> p = re.compile('(?P.*)') + >>> m = p.match('bar') + >>> m.foo + 'bar' + Examples -------- === modified file 'Lib/re.py' --- old/Lib/re.py 2008-05-20 07:49:57 +0000 +++ new/Lib/re.py 2008-06-16 17:09:36 +0000 @@ -235,7 +235,7 @@ if flags: raise ValueError('Cannot process flags argument with a compiled pattern') return pattern - if not sre_compile.isstring(pattern): + if not isinstance(pattern, basestring): raise TypeError, "first argument must be string or compiled pattern" try: p = sre_compile.compile(pattern, flags) === modified file 'Lib/sre_compile.py' --- old/Lib/sre_compile.py 2008-04-08 21:27:42 +0000 +++ new/Lib/sre_compile.py 2008-06-16 17:09:36 +0000 @@ -11,7 +11,7 @@ """Internal support module for sre""" import _sre, sys -import sre_parse + from sre_constants import * assert _sre.MAGIC == MAGIC, "SRE module mismatch" @@ -149,7 +149,7 @@ emit(OPCODES[JUMP]) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip - emit(0) # end of branch + emit(OPCODES[FAILURE]) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: @@ -470,19 +470,6 @@ _compile_charset(charset, flags, code) code[skip] = len(code) - skip -try: - unicode -except NameError: - STRING_TYPES = (type(""),) -else: - STRING_TYPES = (type(""), type(unicode(""))) - -def isstring(obj): - for tp in STRING_TYPES: - if isinstance(obj, tp): - return 1 - return 0 - def _code(p, flags): flags = p.pattern.flags | flags @@ -501,7 +488,8 @@ def compile(p, flags=0): # internal: convert pattern list to internal format - if isstring(p): + if isinstance(p, basestring): + import sre_parse pattern = p p = sre_parse.parse(p, flags) else: === modified file 'Lib/sre_constants.py' --- old/Lib/sre_constants.py 2004-08-25 02:22:30 +0000 +++ new/Lib/sre_constants.py 2008-06-16 17:09:36 +0000 @@ -13,7 +13,7 @@ # update when constants are added or removed -MAGIC = 20031017 +MAGIC = 20080329 # max code word in this release === modified file 'Lib/sre_parse.py' --- old/Lib/sre_parse.py 2008-05-27 01:18:39 +0000 +++ new/Lib/sre_parse.py 2008-06-16 17:09:36 +0000 @@ -422,8 +422,6 @@ # character set set = [] setappend = set.append -## if sourcematch(":"): -## pass # handle character classes if sourcematch("^"): setappend((NEGATE, None)) # check remaining characters === modified file 'Lib/test/test_re.py' --- old/Lib/test/test_re.py 2008-01-10 21:59:42 +0000 +++ new/Lib/test/test_re.py 2008-06-17 12:57:01 +0000 @@ -35,6 +35,23 @@ self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) self.assertEqual(re.match('a+', 'xxx'), None) + def test_branching(self): + """Test Branching + Test expressions using the OR ('|') operator.""" + self.assertEqual(re.match('(ab|ba)', 'ab').span(), (0, 2)) + self.assertEqual(re.match('(ab|ba)', 'ba').span(), (0, 2)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'abc').span(), + (0, 3)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'bac').span(), + (0, 3)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'ca').span(), + (0, 2)) + self.assertEqual(re.match('(abc|bac|ca|cb)', 'cb').span(), + (0, 2)) + self.assertEqual(re.match('((a)|(b)|(c))', 'a').span(), (0, 1)) + self.assertEqual(re.match('((a)|(b)|(c))', 'b').span(), (0, 1)) + self.assertEqual(re.match('((a)|(b)|(c))', 'c').span(), (0, 1)) + def bump_num(self, matchobj): int_value = int(matchobj.group(0)) return str(int_value + 1) @@ -644,8 +661,8 @@ def test_inline_flags(self): # Bug #1700 - upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow - lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow + upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Below + lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Below p = re.compile(upper_char, re.I | re.U) q = p.match(lower_char) @@ -672,7 +689,8 @@ self.assertNotEqual(q, None) def test_dollar_matches_twice(self): - "$ matches the end of string, and just before the terminating \n" + """Test that $ does not include \\n + $ matches the end of string, and just before the terminating \n""" pattern = re.compile('$') self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#') self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#') @@ -683,6 +701,34 @@ self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#') self.assertEqual(pattern.sub('#', '\n'), '#\n#') + def test_named_match_groups(self): + """Test Named Match Group Attributes + e.g. re.match('(?Px)', 'x').foo""" + match = re.match('(?Px)(?Py)(?Pz)', 'xyz') + + self.assertEqual(match.foo, 'x') + self.assertEqual(match.bar, 'y') + + # Show that the pos attribute blocks the same named group + self.assertEqual(match.pos, 0) + + # Make sure match.1 does not match the first match group; This + # is invalid Python so it should raise a SyntaxError, which is + # captured using the eval statement; then we test via the + # getattr helper that a raw number would not be a valid Type + # (TypeError) for getattr and that the string version would not + # map to an int and thus raises an AttributeError. + self.assertRaises(SyntaxError, eval, "match.1", globals(), + locals()) + self.assertRaises(TypeError, getattr, match, 1) + self.assertRaises(AttributeError, getattr, match, "1") + + # Make sure bad attributes still raise an exception + # (i.e. AttributeError) + self.assertRaises(AttributeError, eval, "match.baz", globals(), + locals()) + self.assertRaises(AttributeError, getattr, match, "baz") + def run_re_tests(): from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR === modified file 'Modules/_sre.c' --- old/Modules/_sre.c 2008-06-09 04:58:54 +0000 +++ new/Modules/_sre.c 2008-06-17 13:00:41 +0000 @@ -55,8 +55,8 @@ #define SRE_PY_MODULE "re" -/* defining this one enables tracing */ -#undef VERBOSE +/* uncomment this define to enable tracing */ +/* #define VERBOSE_SRE_ENGINE */ #if PY_VERSION_HEX >= 0x01060000 #if PY_VERSION_HEX < 0x02020000 || defined(Py_USING_UNICODE) @@ -101,7 +101,7 @@ #define SRE_ERROR_MEMORY -9 /* out of memory */ #define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ -#if defined(VERBOSE) +#if defined(VERBOSE_SRE_ENGINE) #define TRACE(v) printf v #else #define TRACE(v) @@ -3114,7 +3114,7 @@ return Py_None; } - if (!strcmp(name, "lastgroup")) { + else if (!strcmp(name, "lastgroup")) { if (self->pattern->indexgroup && self->lastindex >= 0) { PyObject* result = PySequence_GetItem( self->pattern->indexgroup, self->lastindex @@ -3127,7 +3127,7 @@ return Py_None; } - if (!strcmp(name, "string")) { + else if (!strcmp(name, "string")) { if (self->string) { Py_INCREF(self->string); return self->string; @@ -3137,7 +3137,7 @@ } } - if (!strcmp(name, "regs")) { + else if (!strcmp(name, "regs")) { if (self->regs) { Py_INCREF(self->regs); return self->regs; @@ -3145,19 +3145,34 @@ return match_regs(self); } - if (!strcmp(name, "re")) { + else if (!strcmp(name, "re")) { Py_INCREF(self->pattern); return (PyObject*) self->pattern; } - if (!strcmp(name, "pos")) + else if (!strcmp(name, "pos")) { return Py_BuildValue("i", self->pos); + } - if (!strcmp(name, "endpos")) + else if (!strcmp(name, "endpos")) { return Py_BuildValue("i", self->endpos); - - PyErr_SetString(PyExc_AttributeError, name); - return NULL; + } + + else { + PyObject *pyName = PyString_FromString(name); + res = match_getslice(self, pyName, Py_None); + Py_DECREF(pyName); + + if (res) { + return res; + } + else { + PyErr_Clear(); + + PyErr_SetString(PyExc_AttributeError, name); + return NULL; + } + } } /* FIXME: implement setattr("string", None) as a special case (to === modified file 'Modules/sre_constants.h' --- old/Modules/sre_constants.h 2003-10-17 22:13:16 +0000 +++ new/Modules/sre_constants.h 2008-06-16 17:09:36 +0000 @@ -11,7 +11,7 @@ * See the _sre.c file for information on usage and redistribution. */ -#define SRE_MAGIC 20031017 +#define SRE_MAGIC 20080329 #define SRE_OP_FAILURE 0 #define SRE_OP_SUCCESS 1 #define SRE_OP_ANY 2