diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -353,6 +353,27 @@ lo, hi = av[2].getwidth() return lo == hi == 1 and av[2][0][0] != SUBPATTERN +def _generate_overlap_table(prefix): + """ + Generate an overlap table for the following prefix. + An overlap table is a table of the same size as the prefix which + informs about the potential self-overlap for each index in the prefix: + - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...] + - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with + prefix[0:k] + """ + table = [0] * len(prefix) + for i in range(1, len(prefix)): + idx = table[i - 1] + while prefix[i] != prefix[idx]: + if idx == 0: + table[i] = 0 + break + idx = table[idx - 1] + else: + table[i] = idx + 1 + return table + def _compile_info(code, pattern, flags): # internal: compile an info block. in the current version, # this contains min/max pattern width, and an optional literal @@ -449,12 +470,7 @@ emit(prefix_skip) # skip code.extend(prefix) # generate overlap table - table = [-1] + ([0]*len(prefix)) - for i in range(len(prefix)): - table[i+1] = table[i]+1 - while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: - table[i+1] = table[table[i+1]-1]+1 - code.extend(table[1:]) # don't store first entry + code.extend(_generate_overlap_table(prefix)) elif charset: _compile_charset(charset, flags, code) code[skip] = len(code) - skip diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -3,10 +3,12 @@ import io import re from re import Scanner +import sre_compile import sre_constants import sys import string import traceback +import unittest from weakref import proxy # Misc tests from Tim Peters' re.doc @@ -15,8 +17,6 @@ # what you're doing. Some of these tests were carefully modeled to # cover most of the code. -import unittest - class S(str): def __getitem__(self, index): return S(super().__getitem__(index)) @@ -1140,6 +1140,22 @@ self.assertEqual(m.group(1), "") self.assertEqual(m.group(2), "y") + +class ImplementationTest(unittest.TestCase): + """ + Test implementation details of the re module. + """ + + def test_overlap_table(self): + f = sre_compile._generate_overlap_table + self.assertEqual(f(""), []) + self.assertEqual(f("a"), [0]) + self.assertEqual(f("abcd"), [0, 0, 0, 0]) + self.assertEqual(f("aaaa"), [0, 1, 2, 3]) + self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1]) + self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0]) + + def run_re_tests(): from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR if verbose: @@ -1269,7 +1285,7 @@ def test_main(): - run_unittest(ReTests) + run_unittest(__name__) run_re_tests() if __name__ == "__main__":