Index: src/Lib/sre_compile.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v retrieving revision 1.42 diff -u -r1.42 sre_compile.py --- src/Lib/sre_compile.py 2 Jun 2002 00:40:05 -0000 1.42 +++ src/Lib/sre_compile.py 24 Jun 2002 00:17:28 -0000 @@ -139,6 +139,19 @@ else: emit(OPCODES[op]) emit(av-1) + elif op is GROUPREF_EXISTS: + emit(OPCODES[op]) + emit((av[0]-1)*2) + skipyes = len(code); emit(0) + _compile(code, av[1], flags) + if av[2]: + emit(OPCODES[JUMP]) + skipno = len(code); emit(0) + code[skipyes] = len(code) - skipyes + 1 + _compile(code, av[2], flags) + code[skipno] = len(code) - skipno + else: + code[skipyes] = len(code) - skipyes + 1 else: raise ValueError, ("unsupported operand type", op) Index: src/Lib/sre_constants.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v retrieving revision 1.30 diff -u -r1.30 sre_constants.py --- src/Lib/sre_constants.py 4 Sep 2001 19:10:20 -0000 1.30 +++ src/Lib/sre_constants.py 24 Jun 2002 00:17:29 -0000 @@ -42,6 +42,7 @@ CHARSET = "charset" GROUPREF = "groupref" GROUPREF_IGNORE = "groupref_ignore" +GROUPREF_EXISTS = "groupref_exists" IN = "in" IN_IGNORE = "in_ignore" INFO = "info" @@ -107,7 +108,7 @@ CALL, CATEGORY, CHARSET, BIGCHARSET, - GROUPREF, GROUPREF_IGNORE, + GROUPREF, GROUPREF_IGNORE, GROUPREF_EXISTS, IN, IN_IGNORE, INFO, JUMP, Index: src/Lib/sre_parse.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v retrieving revision 1.55 diff -u -r1.55 sre_parse.py --- src/Lib/sre_parse.py 2 Jun 2002 00:40:05 -0000 1.55 +++ src/Lib/sre_parse.py 24 Jun 2002 00:17:32 -0000 @@ -364,6 +364,20 @@ subpattern.append((BRANCH, (None, items))) return subpattern +def _parse_sub_cond(source, state, condgroup): + item_yes = _parse(source, state) + if source.match("|"): + item_no = _parse(source, state) + if source.match("|"): + raise error, "conditional backref with more than two branches" + else: + item_no = None + if source.next and not source.match(")", 0): + raise error, "pattern not properly closed" + subpattern = SubPattern(state) + subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) + return subpattern + def _parse(source, state): # parse a simple pattern @@ -497,6 +511,7 @@ elif this == "(": group = 1 name = None + condgroup = None if source.match("?"): group = 0 # options @@ -566,6 +581,26 @@ else: subpattern.append((ASSERT_NOT, (dir, p))) continue + elif source.match("("): + # conditional backreference group + condname = "" + while 1: + char = source.get() + if char is None: + raise error, "unterminated name" + if char == ")": + break + condname = condname + char + group = 2 + if isname(condname): + condgroup = state.groupdict.get(condname) + if condgroup is None: + raise error, "unknown group name" + else: + try: + condgroup = atoi(condname) + except ValueError: + raise error, "bad character in group name" else: # flags if not source.next in FLAGS: @@ -579,7 +614,10 @@ group = None else: group = state.opengroup(name) - p = _parse_sub(source, state) + if condgroup: + p = _parse_sub_cond(source, state, condgroup) + else: + p = _parse_sub(source, state) if not source.match(")"): raise error, "unbalanced parenthesis" if group is not None: Index: src/Modules/_sre.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/_sre.c,v retrieving revision 2.80 diff -u -r2.80 _sre.c --- src/Modules/_sre.c 13 Jun 2002 21:11:11 -0000 2.80 +++ src/Modules/_sre.c 24 Jun 2002 00:17:52 -0000 @@ -826,6 +826,21 @@ } } pattern++; + break; + + case SRE_OP_GROUPREF_EXISTS: + TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, ptr, pattern[0])); + /* codeyes codeno ... */ + i = pattern[0]; + { + SRE_CHAR* p = (SRE_CHAR*) state->mark[i+i]; + SRE_CHAR* e = (SRE_CHAR*) state->mark[i+i+1]; + if (!p || !e || e < p) { + pattern += pattern[1]; + break; + } + } + pattern += 2; break; case SRE_OP_LITERAL_IGNORE: Index: src/Modules/sre_constants.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/sre_constants.h,v retrieving revision 2.13 diff -u -r2.13 sre_constants.h --- src/Modules/sre_constants.h 2 Jul 2001 16:58:38 -0000 2.13 +++ src/Modules/sre_constants.h 24 Jun 2002 00:18:18 -0000 @@ -26,22 +26,23 @@ #define SRE_OP_BIGCHARSET 11 #define SRE_OP_GROUPREF 12 #define SRE_OP_GROUPREF_IGNORE 13 -#define SRE_OP_IN 14 -#define SRE_OP_IN_IGNORE 15 -#define SRE_OP_INFO 16 -#define SRE_OP_JUMP 17 -#define SRE_OP_LITERAL 18 -#define SRE_OP_LITERAL_IGNORE 19 -#define SRE_OP_MARK 20 -#define SRE_OP_MAX_UNTIL 21 -#define SRE_OP_MIN_UNTIL 22 -#define SRE_OP_NOT_LITERAL 23 -#define SRE_OP_NOT_LITERAL_IGNORE 24 -#define SRE_OP_NEGATE 25 -#define SRE_OP_RANGE 26 -#define SRE_OP_REPEAT 27 -#define SRE_OP_REPEAT_ONE 28 -#define SRE_OP_SUBPATTERN 29 +#define SRE_OP_GROUPREF_EXISTS 14 +#define SRE_OP_IN 15 +#define SRE_OP_IN_IGNORE 16 +#define SRE_OP_INFO 17 +#define SRE_OP_JUMP 18 +#define SRE_OP_LITERAL 19 +#define SRE_OP_LITERAL_IGNORE 20 +#define SRE_OP_MARK 21 +#define SRE_OP_MAX_UNTIL 22 +#define SRE_OP_MIN_UNTIL 23 +#define SRE_OP_NOT_LITERAL 24 +#define SRE_OP_NOT_LITERAL_IGNORE 25 +#define SRE_OP_NEGATE 26 +#define SRE_OP_RANGE 27 +#define SRE_OP_REPEAT 28 +#define SRE_OP_REPEAT_ONE 29 +#define SRE_OP_SUBPATTERN 30 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BEGINNING_STRING 2 Index: src/Doc/lib/libre.tex =================================================================== RCS file: /cvsroot/python/python/dist/src/Doc/lib/libre.tex,v retrieving revision 1.83 diff -u -r1.83 libre.tex --- src/Doc/lib/libre.tex 11 Apr 2002 12:24:12 -0000 1.83 +++ src/Doc/lib/libre.tex 24 Jun 2002 01:39:14 -0000 @@ -296,6 +296,15 @@ fixed length. Patterns which start with negative lookbehind assertions may match at the beginning of the string being searched. +\item[\code{(?(\var{id/name})yes-pattern|no-pattern)}] Will try to match +with \regexp{yes-pattern} if group with given \var{id} or \var{name} +exists, and with \regexp{no-pattern} if it doesn't. \regexp{|no-pattern} +is optional and can be omitted. For example, +\regexp{(<)?(\e w+@\e w+(?:\e .\e w+)+)(?(1)>)} is a poor email matching pattern, +which will match with \code{''} as well as +\code{'user@host.com'}. +\versionadded{2.3} + \end{list} The special sequences consist of \character{\e} and a character from the