diff --git a/Doc/library/re.rst b/Doc/library/re.rst --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -543,16 +543,21 @@ form. decimal number are functionally equal:: a = re.compile(r"""\d + # the integral part \. # the decimal point \d * # some fractional digits""", re.X) b = re.compile(r"\d+\.\d*") +.. data:: FULLMATCH + + When specified, the regular expression will only be matched to the entire + input string. + .. function:: search(pattern, string, flags=0) Scan through *string* looking for a location where the regular expression *pattern* produces a match, and return a corresponding :ref:`match object `. Return ``None`` if no position in the string matches the pattern; note that this is different from finding a zero-length match at some diff --git a/Lib/re.py b/Lib/re.py --- a/Lib/re.py +++ b/Lib/re.py @@ -122,28 +122,29 @@ import sys import sre_compile import sre_parse import functools # public symbols __all__ = [ "match", "search", "sub", "subn", "split", "findall", "compile", "purge", "template", "escape", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", - "UNICODE", "error" ] + "FULLMATCH", "UNICODE", "error" ] __version__ = "2.2.1" # flags A = ASCII = sre_compile.SRE_FLAG_ASCII # assume ascii "locale" I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode "locale" M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments +FULLMATCH = sre_compile.SRE_FLAG_FULLMATCH # only match full string # sre extensions (experimental, don't rely on these) T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation # sre exception error = sre_compile.error diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -206,16 +206,17 @@ SRE_FLAG_TEMPLATE = 1 # template mode (d SRE_FLAG_IGNORECASE = 2 # case insensitive SRE_FLAG_LOCALE = 4 # honour system locale SRE_FLAG_MULTILINE = 8 # treat target as multiline string SRE_FLAG_DOTALL = 16 # treat target as a single string SRE_FLAG_UNICODE = 32 # use unicode "locale" SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments SRE_FLAG_DEBUG = 128 # debugging SRE_FLAG_ASCII = 256 # use ascii "locale" +SRE_FLAG_FULLMATCH = 512 # only match full string # flags for INFO primitive SRE_INFO_PREFIX = 1 # has prefix SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) SRE_INFO_CHARSET = 4 # pattern starts with character from given set if __name__ == "__main__": def dump(f, d, prefix): @@ -248,15 +249,16 @@ if __name__ == "__main__": f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) + f.write("#define SRE_FLAG_FULLMATCH %d\n" % SRE_FLAG_FULLMATCH) f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) f.close() print("done") diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -739,16 +739,20 @@ def parse(str, flags=0, pattern=None): if flags & SRE_FLAG_DEBUG: p.dump() if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... return parse(str, p.pattern.flags) + if flags & SRE_FLAG_FULLMATCH: + p.insert(0, (AT, AT_BEGINNING_STRING)) + p.append((AT, AT_END_STRING)) + return p def parse_template(source, pattern): # parse 're' replacement string into list of literals and # group references s = Tokenizer(source) sget = s.get p = [] diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -257,16 +257,52 @@ class ReTests(unittest.TestCase): self.assertEqual(m.group(1, 1), ('a', 'a')) pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), (None, 'b', None)) self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) + def test_re_match_fullmatch(self): + self.assertEqual(re.match('a', 'a', re.FULLMATCH).groups(), ()) + self.assertEqual(re.match('(a)', 'a', re.FULLMATCH).groups(), ('a',)) + self.assertEqual(re.match(r'(a)', 'a', re.FULLMATCH).group(0), 'a') + self.assertEqual(re.match(r'(a)', 'a', re.FULLMATCH).group(1), 'a') + self.assertEqual(re.match(r'(a)', 'a', re.FULLMATCH).group(1, 1), ('a', 'a')) + + pat = re.compile('((a)|(b))(c)?', re.FULLMATCH) + self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) + self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) + self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) + self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) + self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) + + # A single group + m = re.match('(a)', 'a', re.FULLMATCH) + self.assertEqual(m.group(0), 'a') + self.assertEqual(m.group(0), 'a') + self.assertEqual(m.group(1), 'a') + self.assertEqual(m.group(1, 1), ('a', 'a')) + + pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?', re.FULLMATCH) + self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) + self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), + (None, 'b', None)) + self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) + + self.assertEqual(re.match(r'ab', 'abc', re.FULLMATCH), None) + self.assertEqual(re.match(r'^abc$', 'abc', re.FULLMATCH).group(0), 'abc') + + # test branch + self.assertEqual(re.match(r'a|ab', 'ab', re.FULLMATCH).group(0), 'ab') + + # test nongreedy qualifiers + self.assertEqual(re.match(r'.*?', 'abc', re.FULLMATCH).group(0), 'abc') + def test_re_groupref_exists(self): self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), ('(', 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), (None, 'a')) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),