diff -r 8a2755f6ae96 Lib/sre_compile.py --- a/Lib/sre_compile.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_compile.py Fri Sep 19 10:19:07 2014 +0300 @@ -13,7 +13,6 @@ import _sre import sre_parse from sre_constants import * -from _sre import MAXREPEAT assert _sre.MAGIC == MAGIC, "SRE module mismatch" @@ -41,65 +40,65 @@ def _compile(code, pattern, flags): for op, av in pattern: if op in LITERAL_CODES: if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) + emit(OP_IGNORE[op]) emit(_sre.getlower(av, flags)) else: - emit(OPCODES[op]) + emit(op) emit(av) elif op is IN: if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) + emit(OP_IGNORE[op]) def fixup(literal, flags=flags): return _sre.getlower(literal, flags) else: - emit(OPCODES[op]) + emit(op) fixup = _identityfunction skip = _len(code); emit(0) _compile_charset(av, flags, code, fixup) code[skip] = _len(code) - skip elif op is ANY: if flags & SRE_FLAG_DOTALL: - emit(OPCODES[ANY_ALL]) + emit(ANY_ALL) else: - emit(OPCODES[ANY]) + emit(ANY) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator") elif _simple(av) and op is not REPEAT: if op is MAX_REPEAT: - emit(OPCODES[REPEAT_ONE]) + emit(REPEAT_ONE) else: - emit(OPCODES[MIN_REPEAT_ONE]) + emit(MIN_REPEAT_ONE) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) + emit(SUCCESS) code[skip] = _len(code) - skip else: - emit(OPCODES[REPEAT]) + emit(REPEAT) skip = _len(code); emit(0) emit(av[0]) emit(av[1]) _compile(code, av[2], flags) code[skip] = _len(code) - skip if op is MAX_REPEAT: - emit(OPCODES[MAX_UNTIL]) + emit(MAX_UNTIL) else: - emit(OPCODES[MIN_UNTIL]) + emit(MIN_UNTIL) elif op is SUBPATTERN: if av[0]: - emit(OPCODES[MARK]) + emit(MARK) emit((av[0]-1)*2) # _compile_info(code, av[1], flags) _compile(code, av[1], flags) if av[0]: - emit(OPCODES[MARK]) + emit(MARK) emit((av[0]-1)*2+1) elif op in SUCCESS_CODES: - emit(OPCODES[op]) + emit(op) elif op in ASSERT_CODES: - emit(OPCODES[op]) + emit(op) skip = _len(code); emit(0) if av[0] >= 0: emit(0) # look ahead @@ -109,57 +108,57 @@ def _compile(code, pattern, flags): raise error("look-behind requires fixed-width pattern") emit(lo) # look behind _compile(code, av[1], flags) - emit(OPCODES[SUCCESS]) + emit(SUCCESS) code[skip] = _len(code) - skip elif op is CALL: - emit(OPCODES[op]) + emit(op) skip = _len(code); emit(0) _compile(code, av, flags) - emit(OPCODES[SUCCESS]) + emit(SUCCESS) code[skip] = _len(code) - skip elif op is AT: - emit(OPCODES[op]) + emit(op) if flags & SRE_FLAG_MULTILINE: av = AT_MULTILINE.get(av, av) if flags & SRE_FLAG_LOCALE: av = AT_LOCALE.get(av, av) elif flags & SRE_FLAG_UNICODE: av = AT_UNICODE.get(av, av) - emit(ATCODES[av]) + emit(av) elif op is BRANCH: - emit(OPCODES[op]) + emit(op) tail = [] tailappend = tail.append for av in av[1]: skip = _len(code); emit(0) # _compile_info(code, av, flags) _compile(code, av, flags) - emit(OPCODES[JUMP]) + emit(JUMP) tailappend(_len(code)); emit(0) code[skip] = _len(code) - skip emit(0) # end of branch for tail in tail: code[tail] = _len(code) - tail elif op is CATEGORY: - emit(OPCODES[op]) + emit(op) if flags & SRE_FLAG_LOCALE: av = CH_LOCALE[av] elif flags & SRE_FLAG_UNICODE: av = CH_UNICODE[av] - emit(CHCODES[av]) + emit(av) elif op is GROUPREF: if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) + emit(OP_IGNORE[op]) else: - emit(OPCODES[op]) + emit(op) emit(av-1) elif op is GROUPREF_EXISTS: - emit(OPCODES[op]) + emit(op) emit(av[0]-1) skipyes = _len(code); emit(0) _compile(code, av[1], flags) if av[2]: - emit(OPCODES[JUMP]) + emit(JUMP) skipno = _len(code); emit(0) code[skipyes] = _len(code) - skipyes + 1 _compile(code, av[2], flags) @@ -175,7 +174,7 @@ def _compile_charset(charset, flags, cod if fixup is None: fixup = _identityfunction for op, av in _optimize_charset(charset, fixup): - emit(OPCODES[op]) + emit(op) if op is NEGATE: pass elif op is LITERAL: @@ -189,14 +188,14 @@ def _compile_charset(charset, flags, cod code.extend(av) elif op is CATEGORY: if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) + emit(CH_LOCALE[av]) elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) + emit(CH_UNICODE[av]) else: - emit(CHCODES[av]) + emit(av) else: raise error("internal: unsupported set operator") - emit(OPCODES[FAILURE]) + emit(FAILURE) def _optimize_charset(charset, fixup): # internal: optimize character set @@ -407,7 +406,7 @@ def _compile_info(code, pattern, flags): ## print "*** CHARSET", charset # add an info block emit = code.append - emit(OPCODES[INFO]) + emit(INFO) skip = len(code); emit(0) # literal flag mask = 0 @@ -453,7 +452,7 @@ def _code(p, flags): # compile the pattern _compile(code, p.data, flags) - code.append(OPCODES[SUCCESS]) + code.append(SUCCESS) return code @@ -468,7 +467,7 @@ def compile(p, flags=0): code = _code(p, flags) - # print code + # print(code) # XXX: get rid of this limitation! if p.pattern.groups > 100: diff -r 8a2755f6ae96 Lib/sre_constants.py --- a/Lib/sre_constants.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_constants.py Fri Sep 19 10:19:07 2014 +0300 @@ -16,6 +16,7 @@ MAGIC = 20031017 from _sre import MAXREPEAT +from enum import IntEnum as _IntEnum # SRE standard exception (access as sre.error) # should this really be here? @@ -23,136 +24,75 @@ from _sre import MAXREPEAT class error(Exception): pass + +class _NamedIntConstants(_IntEnum): + def __str__(self): + return self.name + + __repr__ = __str__ + +MAXREPEAT = _NamedIntConstants('_', {'MAXREPEAT': MAXREPEAT})['MAXREPEAT'] + +def _makecodes(names): + names = names.strip().split() + names = {name: i for i, name in enumerate(names)} + cls = _NamedIntConstants('_', names) + globals().update(cls.__members__) + return cls + # operators +# failure=0 success=1 (just because it looks better that way :-) +OPCODES = _makecodes(""" + FAILURE SUCCESS -FAILURE = "failure" -SUCCESS = "success" + ANY ANY_ALL + ASSERT ASSERT_NOT + AT + BRANCH + CALL + CATEGORY + CHARSET BIGCHARSET + GROUPREF GROUPREF_EXISTS GROUPREF_IGNORE + IN IN_IGNORE + INFO + JUMP + LITERAL LITERAL_IGNORE + MARK + MAX_UNTIL + MIN_UNTIL + NOT_LITERAL NOT_LITERAL_IGNORE + NEGATE + RANGE + REPEAT + REPEAT_ONE + SUBPATTERN + MIN_REPEAT_ONE -ANY = "any" -ANY_ALL = "any_all" -ASSERT = "assert" -ASSERT_NOT = "assert_not" -AT = "at" -BIGCHARSET = "bigcharset" -BRANCH = "branch" -CALL = "call" -CATEGORY = "category" -CHARSET = "charset" -GROUPREF = "groupref" -GROUPREF_IGNORE = "groupref_ignore" -GROUPREF_EXISTS = "groupref_exists" -IN = "in" -IN_IGNORE = "in_ignore" -INFO = "info" -JUMP = "jump" -LITERAL = "literal" -LITERAL_IGNORE = "literal_ignore" -MARK = "mark" -MAX_REPEAT = "max_repeat" -MAX_UNTIL = "max_until" -MIN_REPEAT = "min_repeat" -MIN_UNTIL = "min_until" -NEGATE = "negate" -NOT_LITERAL = "not_literal" -NOT_LITERAL_IGNORE = "not_literal_ignore" -RANGE = "range" -REPEAT = "repeat" -REPEAT_ONE = "repeat_one" -SUBPATTERN = "subpattern" -MIN_REPEAT_ONE = "min_repeat_one" + MIN_REPEAT MAX_REPEAT +""") # positions -AT_BEGINNING = "at_beginning" -AT_BEGINNING_LINE = "at_beginning_line" -AT_BEGINNING_STRING = "at_beginning_string" -AT_BOUNDARY = "at_boundary" -AT_NON_BOUNDARY = "at_non_boundary" -AT_END = "at_end" -AT_END_LINE = "at_end_line" -AT_END_STRING = "at_end_string" -AT_LOC_BOUNDARY = "at_loc_boundary" -AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" -AT_UNI_BOUNDARY = "at_uni_boundary" -AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" +ATCODES = _makecodes(""" + AT_BEGINNING AT_BEGINNING_LINE AT_BEGINNING_STRING + AT_BOUNDARY AT_NON_BOUNDARY + AT_END AT_END_LINE AT_END_STRING + AT_LOC_BOUNDARY AT_LOC_NON_BOUNDARY + AT_UNI_BOUNDARY AT_UNI_NON_BOUNDARY +""") # categories -CATEGORY_DIGIT = "category_digit" -CATEGORY_NOT_DIGIT = "category_not_digit" -CATEGORY_SPACE = "category_space" -CATEGORY_NOT_SPACE = "category_not_space" -CATEGORY_WORD = "category_word" -CATEGORY_NOT_WORD = "category_not_word" -CATEGORY_LINEBREAK = "category_linebreak" -CATEGORY_NOT_LINEBREAK = "category_not_linebreak" -CATEGORY_LOC_WORD = "category_loc_word" -CATEGORY_LOC_NOT_WORD = "category_loc_not_word" -CATEGORY_UNI_DIGIT = "category_uni_digit" -CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" -CATEGORY_UNI_SPACE = "category_uni_space" -CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" -CATEGORY_UNI_WORD = "category_uni_word" -CATEGORY_UNI_NOT_WORD = "category_uni_not_word" -CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" -CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" +CHCODES = _makecodes(""" + CATEGORY_DIGIT CATEGORY_NOT_DIGIT + CATEGORY_SPACE CATEGORY_NOT_SPACE + CATEGORY_WORD CATEGORY_NOT_WORD + CATEGORY_LINEBREAK CATEGORY_NOT_LINEBREAK + CATEGORY_LOC_WORD CATEGORY_LOC_NOT_WORD + CATEGORY_UNI_DIGIT CATEGORY_UNI_NOT_DIGIT + CATEGORY_UNI_SPACE CATEGORY_UNI_NOT_SPACE + CATEGORY_UNI_WORD CATEGORY_UNI_NOT_WORD + CATEGORY_UNI_LINEBREAK CATEGORY_UNI_NOT_LINEBREAK +""") -OPCODES = [ - - # failure=0 success=1 (just because it looks better that way :-) - FAILURE, SUCCESS, - - ANY, ANY_ALL, - ASSERT, ASSERT_NOT, - AT, - BRANCH, - CALL, - CATEGORY, - CHARSET, BIGCHARSET, - GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, - IN, IN_IGNORE, - INFO, - JUMP, - LITERAL, LITERAL_IGNORE, - MARK, - MAX_UNTIL, - MIN_UNTIL, - NOT_LITERAL, NOT_LITERAL_IGNORE, - NEGATE, - RANGE, - REPEAT, - REPEAT_ONE, - SUBPATTERN, - MIN_REPEAT_ONE - -] - -ATCODES = [ - AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, - AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, - AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, - AT_UNI_NON_BOUNDARY -] - -CHCODES = [ - CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, - CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, - CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, - CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, - CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, - CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, - CATEGORY_UNI_NOT_LINEBREAK -] - -def makedict(list): - d = {} - i = 0 - for item in list: - d[item] = i - i = i + 1 - return d - -OPCODES = makedict(OPCODES) -ATCODES = makedict(ATCODES) -CHCODES = makedict(CHCODES) # replacement operations for "ignore case" mode OP_IGNORE = { @@ -217,9 +157,9 @@ SRE_INFO_CHARSET = 4 # pattern starts wi if __name__ == "__main__": def dump(f, d, prefix): - items = sorted(d.items(), key=lambda a: a[1]) - for k, v in items: - f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) + items = sorted(d) + for item in items: + f.write("#define %s_%s %d\n" % (prefix, item, item)) f = open("sre_constants.h", "w") f.write("""\ /* diff -r 8a2755f6ae96 Lib/sre_parse.py --- a/Lib/sre_parse.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/sre_parse.py Fri Sep 19 10:19:07 2014 +0300 @@ -13,7 +13,6 @@ # XXX: show string offset and offending character for all errors from sre_constants import * -from _sre import MAXREPEAT SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" @@ -97,13 +96,13 @@ class SubPattern: nl = 1 seqtypes = (tuple, list) for op, av in self.data: - print(level*" " + op, end=' '); nl = 0 - if op == "in": + print(level*" " + str(op), end=' '); nl = 0 + if op == IN: # member sublanguage print(); nl = 1 for op, a in av: - print((level+1)*" " + op, a) - elif op == "branch": + print((level+1)*" " + str(op), a) + elif op == BRANCH: print(); nl = 1 i = 0 for a in av[1]: diff -r 8a2755f6ae96 Lib/test/test_re.py --- a/Lib/test/test_re.py Thu Sep 18 19:45:04 2014 +0300 +++ b/Lib/test/test_re.py Fri Sep 19 10:19:07 2014 +0300 @@ -1206,13 +1206,13 @@ class ReTests(unittest.TestCase): with captured_stdout() as out: re.compile('foo', re.DEBUG) self.assertEqual(out.getvalue().splitlines(), - ['literal 102 ', 'literal 111 ', 'literal 111 ']) + ['LITERAL 102 ', 'LITERAL 111 ', 'LITERAL 111 ']) # Debug output is output again even a second time (bypassing # the cache -- issue #20426). with captured_stdout() as out: re.compile('foo', re.DEBUG) self.assertEqual(out.getvalue().splitlines(), - ['literal 102 ', 'literal 111 ', 'literal 111 ']) + ['LITERAL 102 ', 'LITERAL 111 ', 'LITERAL 111 ']) def test_keyword_parameters(self): # Issue #20283: Accepting the string keyword parameter. diff -r 8a2755f6ae96 Modules/sre_constants.h --- a/Modules/sre_constants.h Thu Sep 18 19:45:04 2014 +0300 +++ b/Modules/sre_constants.h Fri Sep 19 10:19:07 2014 +0300 @@ -44,6 +44,8 @@ #define SRE_OP_REPEAT_ONE 29 #define SRE_OP_SUBPATTERN 30 #define SRE_OP_MIN_REPEAT_ONE 31 +#define SRE_OP_MIN_REPEAT 32 +#define SRE_OP_MAX_REPEAT 33 #define SRE_AT_BEGINNING 0 #define SRE_AT_BEGINNING_LINE 1 #define SRE_AT_BEGINNING_STRING 2