Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(80398)

Delta Between Two Patch Sets: Lib/test/test_re.py

Issue 16510: Using appropriate checks in tests
Left Patch Set: Created 6 years, 9 months ago
Right Patch Set: Created 5 years, 6 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Lib/test/test_pyexpat.py ('k') | Lib/test/test_richcmp.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 from test.support import verbose, run_unittest, gc_collect 1 from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
2 cpython_only, captured_stdout
2 import io 3 import io
3 import re 4 import re
4 from re import Scanner 5 from re import Scanner
6 import sre_compile
7 import sre_constants
5 import sys 8 import sys
6 import string 9 import string
7 import traceback 10 import traceback
11 import unittest
8 from weakref import proxy 12 from weakref import proxy
9 13
10 # Misc tests from Tim Peters' re.doc 14 # Misc tests from Tim Peters' re.doc
11 15
12 # WARNING: Don't change details in these tests if you don't know 16 # WARNING: Don't change details in these tests if you don't know
13 # what you're doing. Some of these tests were carefully modeled to 17 # what you're doing. Some of these tests were carefully modeled to
14 # cover most of the code. 18 # cover most of the code.
15 19
16 import unittest 20 class S(str):
21 def __getitem__(self, index):
22 return S(super().__getitem__(index))
23
24 class B(bytes):
25 def __getitem__(self, index):
26 return B(super().__getitem__(index))
17 27
18 class ReTests(unittest.TestCase): 28 class ReTests(unittest.TestCase):
29
30 def assertTypedEqual(self, actual, expect, msg=None):
31 self.assertEqual(actual, expect, msg)
32 def recurse(actual, expect):
33 if isinstance(expect, (tuple, list)):
34 for x, y in zip(actual, expect):
35 recurse(x, y)
36 else:
37 self.assertIs(type(actual), type(expect), msg)
38 recurse(actual, expect)
19 39
20 def test_keep_buffer(self): 40 def test_keep_buffer(self):
21 # See bug 14212 41 # See bug 14212
22 b = bytearray(b'x') 42 b = bytearray(b'x')
23 it = re.finditer(b'a', b) 43 it = re.finditer(b'a', b)
24 with self.assertRaises(BufferError): 44 with self.assertRaises(BufferError):
25 b.extend(b'x'*400) 45 b.extend(b'x'*400)
26 list(it) 46 list(it)
27 del it 47 del it
28 gc_collect() 48 gc_collect()
(...skipping 15 matching lines...) Expand all
44 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) 64 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
45 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) 65 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
46 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) 66 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
47 self.assertEqual(re.match('a+', 'xxx'), None) 67 self.assertEqual(re.match('a+', 'xxx'), None)
48 68
49 def bump_num(self, matchobj): 69 def bump_num(self, matchobj):
50 int_value = int(matchobj.group(0)) 70 int_value = int(matchobj.group(0))
51 return str(int_value + 1) 71 return str(int_value + 1)
52 72
53 def test_basic_re_sub(self): 73 def test_basic_re_sub(self):
74 self.assertTypedEqual(re.sub('y', 'a', 'xyz'), 'xaz')
75 self.assertTypedEqual(re.sub('y', S('a'), S('xyz')), 'xaz')
76 self.assertTypedEqual(re.sub(b'y', b'a', b'xyz'), b'xaz')
77 self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
78 self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
79 self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')) , b'xaz')
80 for y in ("\xe0", "\u0430", "\U0001d49c"):
81 self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
82
54 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 83 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
55 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 84 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
56 '9.3 -3 24x100y') 85 '9.3 -3 24x100y')
57 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 86 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
58 '9.3 -3 23x99y') 87 '9.3 -3 23x99y')
59 88
60 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') 89 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
61 self.assertEqual(re.sub('.', r"\n", 'x'), '\n') 90 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
62 91
63 s = r"\1\1" 92 s = r"\1\1"
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
172 self.assertRaises(re.error, re.compile, '(?P=a1)') 201 self.assertRaises(re.error, re.compile, '(?P=a1)')
173 self.assertRaises(re.error, re.compile, '(?P=a.)') 202 self.assertRaises(re.error, re.compile, '(?P=a.)')
174 self.assertRaises(re.error, re.compile, '(?P<)') 203 self.assertRaises(re.error, re.compile, '(?P<)')
175 self.assertRaises(re.error, re.compile, '(?P<>)') 204 self.assertRaises(re.error, re.compile, '(?P<>)')
176 self.assertRaises(re.error, re.compile, '(?P<1>)') 205 self.assertRaises(re.error, re.compile, '(?P<1>)')
177 self.assertRaises(re.error, re.compile, '(?P<a.>)') 206 self.assertRaises(re.error, re.compile, '(?P<a.>)')
178 self.assertRaises(re.error, re.compile, '(?())') 207 self.assertRaises(re.error, re.compile, '(?())')
179 self.assertRaises(re.error, re.compile, '(?(a))') 208 self.assertRaises(re.error, re.compile, '(?(a))')
180 self.assertRaises(re.error, re.compile, '(?(1a))') 209 self.assertRaises(re.error, re.compile, '(?(1a))')
181 self.assertRaises(re.error, re.compile, '(?(a.))') 210 self.assertRaises(re.error, re.compile, '(?(a.))')
211 # New valid/invalid identifiers in Python 3
212 re.compile('(?P<ยต>x)(?P=ยต)(?(ยต)y)')
213 re.compile('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)(?P=๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)(?(๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข)y)')
214 self.assertRaises(re.error, re.compile, '(?P<ยฉ>x)')
182 215
183 def test_symbolic_refs(self): 216 def test_symbolic_refs(self):
184 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') 217 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
185 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx') 218 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
186 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') 219 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
187 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') 220 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
188 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx') 221 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
189 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 222 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
190 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') 223 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
191 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') 224 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
192 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') 225 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
193 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') 226 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
227 # New valid/invalid identifiers in Python 3
228 self.assertEqual(re.sub('(?P<ยต>x)', r'\g<ยต>', 'xx'), 'xx')
229 self.assertEqual(re.sub('(?P<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>x)', r'\g<๐”˜๐”ซ๐”ฆ๐” ๐”ฌ๐”ก๐”ข>', 'xx'), 'xx')
230 self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<ยฉ>', 'xx')
194 231
195 def test_re_subn(self): 232 def test_re_subn(self):
196 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 233 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
197 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 234 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
198 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) 235 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
199 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 236 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
200 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 237 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
201 238
202 def test_re_split(self): 239 def test_re_split(self):
203 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) 240 for string in ":a:b::c", S(":a:b::c"):
204 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) 241 self.assertTypedEqual(re.split(":", string),
205 self.assertEqual(re.split("(:*)", ":a:b::c"), 242 ['', 'a', 'b', '', 'c'])
206 ['', ':', 'a', ':', 'b', '::', 'c']) 243 self.assertTypedEqual(re.split(":*", string),
244 ['', 'a', 'b', 'c'])
245 self.assertTypedEqual(re.split("(:*)", string),
246 ['', ':', 'a', ':', 'b', '::', 'c'])
247 for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
248 memoryview(b":a:b::c")):
249 self.assertTypedEqual(re.split(b":", string),
250 [b'', b'a', b'b', b'', b'c'])
251 self.assertTypedEqual(re.split(b":*", string),
252 [b'', b'a', b'b', b'c'])
253 self.assertTypedEqual(re.split(b"(:*)", string),
254 [b'', b':', b'a', b':', b'b', b'::', b'c'])
255 for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
256 "\U0001d49c\U0001d49e\U0001d4b5"):
257 string = ":%s:%s::%s" % (a, b, c)
258 self.assertEqual(re.split(":", string), ['', a, b, '', c])
259 self.assertEqual(re.split(":*", string), ['', a, b, c])
260 self.assertEqual(re.split("(:*)", string),
261 ['', ':', a, ':', b, '::', c])
262
207 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) 263 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
208 self.assertEqual(re.split("(:)*", ":a:b::c"), 264 self.assertEqual(re.split("(:)*", ":a:b::c"),
209 ['', ':', 'a', ':', 'b', ':', 'c']) 265 ['', ':', 'a', ':', 'b', ':', 'c'])
210 self.assertEqual(re.split("([b:]+)", ":a:b::c"), 266 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
211 ['', ':', 'a', ':b::', 'c']) 267 ['', ':', 'a', ':b::', 'c'])
212 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), 268 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
213 ['', None, ':', 'a', None, ':', '', 'b', None, '', 269 ['', None, ':', 'a', None, ':', '', 'b', None, '',
214 None, '::', 'c']) 270 None, '::', 'c'])
215 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), 271 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
216 ['', 'a', '', '', 'c']) 272 ['', 'a', '', '', 'c'])
217 273
218 def test_qualified_re_split(self): 274 def test_qualified_re_split(self):
219 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 275 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
220 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) 276 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
221 self.assertEqual(re.split("(:)", ":a:b::c", 2), 277 self.assertEqual(re.split("(:)", ":a:b::c", 2),
222 ['', ':', 'a', ':', 'b::c']) 278 ['', ':', 'a', ':', 'b::c'])
223 self.assertEqual(re.split("(:*)", ":a:b::c", 2), 279 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
224 ['', ':', 'a', ':', 'b::c']) 280 ['', ':', 'a', ':', 'b::c'])
225 281
226 def test_re_findall(self): 282 def test_re_findall(self):
227 self.assertEqual(re.findall(":+", "abc"), []) 283 self.assertEqual(re.findall(":+", "abc"), [])
228 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"]) 284 for string in "a:b::c:::d", S("a:b::c:::d"):
229 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"]) 285 self.assertTypedEqual(re.findall(":+", string),
230 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""), 286 [":", "::", ":::"])
231 (":", ":"), 287 self.assertTypedEqual(re.findall("(:+)", string),
232 (":", "::")]) 288 [":", "::", ":::"])
289 self.assertTypedEqual(re.findall("(:)(:*)", string),
290 [(":", ""), (":", ":"), (":", "::")])
291 for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d") ,
292 memoryview(b"a:b::c:::d")):
293 self.assertTypedEqual(re.findall(b":+", string),
294 [b":", b"::", b":::"])
295 self.assertTypedEqual(re.findall(b"(:+)", string),
296 [b":", b"::", b":::"])
297 self.assertTypedEqual(re.findall(b"(:)(:*)", string),
298 [(b":", b""), (b":", b":"), (b":", b"::")])
299 for x in ("\xe0", "\u0430", "\U0001d49c"):
300 xx = x * 2
301 xxx = x * 3
302 string = "a%sb%sc%sd" % (x, xx, xxx)
303 self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
304 self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
305 self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
306 [(x, ""), (x, x), (x, xx)])
233 307
234 def test_bug_117612(self): 308 def test_bug_117612(self):
235 self.assertEqual(re.findall(r"(a|(b))", "aba"), 309 self.assertEqual(re.findall(r"(a|(b))", "aba"),
236 [("a", ""),("b", "b"),("a", "")]) 310 [("a", ""),("b", "b"),("a", "")])
237 311
238 def test_re_match(self): 312 def test_re_match(self):
239 self.assertEqual(re.match('a', 'a').groups(), ()) 313 for string in 'a', S('a'):
240 self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) 314 self.assertEqual(re.match('a', string).groups(), ())
241 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') 315 self.assertEqual(re.match('(a)', string).groups(), ('a',))
242 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') 316 self.assertEqual(re.match('(a)', string).group(0), 'a')
243 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) 317 self.assertEqual(re.match('(a)', string).group(1), 'a')
318 self.assertEqual(re.match('(a)', string).group(1, 1), ('a', 'a'))
319 for string in b'a', B(b'a'), bytearray(b'a'), memoryview(b'a'):
320 self.assertEqual(re.match(b'a', string).groups(), ())
321 self.assertEqual(re.match(b'(a)', string).groups(), (b'a',))
322 self.assertEqual(re.match(b'(a)', string).group(0), b'a')
323 self.assertEqual(re.match(b'(a)', string).group(1), b'a')
324 self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
325 for a in ("\xe0", "\u0430", "\U0001d49c"):
326 self.assertEqual(re.match(a, a).groups(), ())
327 self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
328 self.assertEqual(re.match('(%s)' % a, a).group(0), a)
329 self.assertEqual(re.match('(%s)' % a, a).group(1), a)
330 self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
244 331
245 pat = re.compile('((a)|(b))(c)?') 332 pat = re.compile('((a)|(b))(c)?')
246 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 333 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
247 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 334 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
248 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 335 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
249 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 336 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
250 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 337 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
251 338
252 # A single group 339 # A single group
253 m = re.match('(a)', 'a') 340 m = re.match('(a)', 'a')
254 self.assertEqual(m.group(0), 'a') 341 self.assertEqual(m.group(0), 'a')
255 self.assertEqual(m.group(0), 'a') 342 self.assertEqual(m.group(0), 'a')
256 self.assertEqual(m.group(1), 'a') 343 self.assertEqual(m.group(1), 'a')
257 self.assertEqual(m.group(1, 1), ('a', 'a')) 344 self.assertEqual(m.group(1, 1), ('a', 'a'))
258 345
259 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 346 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
260 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 347 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
261 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 348 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
262 (None, 'b', None)) 349 (None, 'b', None))
263 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 350 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
351
352 def test_re_fullmatch(self):
353 # Issue 16203: Proposal: add re.fullmatch() method.
354 self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1))
355 for string in "ab", S("ab"):
356 self.assertEqual(re.fullmatch(r"a|ab", string).span(), (0, 2))
357 for string in b"ab", B(b"ab"), bytearray(b"ab"), memoryview(b"ab"):
358 self.assertEqual(re.fullmatch(br"a|ab", string).span(), (0, 2))
359 for a, b in "\xe0\xdf", "\u0430\u0431", "\U0001d49c\U0001d49e":
360 r = r"%s|%s" % (a, a + b)
361 self.assertEqual(re.fullmatch(r, a + b).span(), (0, 2))
362 self.assertEqual(re.fullmatch(r".*?$", "abc").span(), (0, 3))
363 self.assertEqual(re.fullmatch(r".*?", "abc").span(), (0, 3))
364 self.assertEqual(re.fullmatch(r"a.*?b", "ab").span(), (0, 2))
365 self.assertEqual(re.fullmatch(r"a.*?b", "abb").span(), (0, 3))
366 self.assertEqual(re.fullmatch(r"a.*?b", "axxb").span(), (0, 4))
367 self.assertIsNone(re.fullmatch(r"a+", "ab"))
368 self.assertIsNone(re.fullmatch(r"abc$", "abc\n"))
369 self.assertIsNone(re.fullmatch(r"abc\Z", "abc\n"))
370 self.assertIsNone(re.fullmatch(r"(?m)abc$", "abc\n"))
371 self.assertEqual(re.fullmatch(r"ab(?=c)cd", "abcd").span(), (0, 4))
372 self.assertEqual(re.fullmatch(r"ab(?<=b)cd", "abcd").span(), (0, 4))
373 self.assertEqual(re.fullmatch(r"(?=a|ab)ab", "ab").span(), (0, 2))
374
375 self.assertEqual(
376 re.compile(r"bc").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3))
377 self.assertEqual(
378 re.compile(r".*?$").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3 ))
379 self.assertEqual(
380 re.compile(r".*?").fullmatch("abcd", pos=1, endpos=3).span(), (1, 3) )
264 381
265 def test_re_groupref_exists(self): 382 def test_re_groupref_exists(self):
266 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 383 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
267 ('(', 'a')) 384 ('(', 'a'))
268 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), 385 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
269 (None, 'a')) 386 (None, 'a'))
270 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) 387 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
271 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) 388 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
272 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 389 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
273 ('a', 'b')) 390 ('a', 'b'))
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
411 self.assertEqual(len(re.findall(r"\b", " ")), 0) 528 self.assertEqual(len(re.findall(r"\b", " ")), 0)
412 self.assertEqual(len(re.findall(r"\b", " ")), 0) 529 self.assertEqual(len(re.findall(r"\b", " ")), 0)
413 # Can match around the whitespace. 530 # Can match around the whitespace.
414 self.assertEqual(len(re.findall(r"\B", " ")), 2) 531 self.assertEqual(len(re.findall(r"\B", " ")), 2)
415 532
416 def test_bigcharset(self): 533 def test_bigcharset(self):
417 self.assertEqual(re.match("([\u2222\u2223])", 534 self.assertEqual(re.match("([\u2222\u2223])",
418 "\u2222").group(1), "\u2222") 535 "\u2222").group(1), "\u2222")
419 self.assertEqual(re.match("([\u2222\u2223])", 536 self.assertEqual(re.match("([\u2222\u2223])",
420 "\u2222", re.UNICODE).group(1), "\u2222") 537 "\u2222", re.UNICODE).group(1), "\u2222")
538 r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
539 self.assertEqual(re.match(r,
540 "\uff01", re.UNICODE).group(), "\uff01")
541
542 def test_big_codesize(self):
543 # Issue #1160
544 r = re.compile('|'.join(('%d'%x for x in range(10000))))
545 self.assertIsNotNone(r.match('1000'))
546 self.assertIsNotNone(r.match('9999'))
421 547
422 def test_anyall(self): 548 def test_anyall(self):
423 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), 549 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
424 "a\nb") 550 "a\nb")
425 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), 551 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
426 "a\n\nb") 552 "a\n\nb")
427 553
428 def test_non_consuming(self): 554 def test_non_consuming(self):
429 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") 555 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
430 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") 556 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
586 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i))) 712 self.assertIsNotNone(re.match(r"[\x%02x]" % i, chr(i)))
587 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i))) 713 self.assertIsNotNone(re.match(r"[\x%02x0]" % i, chr(i)))
588 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i))) 714 self.assertIsNotNone(re.match(r"[\x%02xz]" % i, chr(i)))
589 if i < 0x10000: 715 if i < 0x10000:
590 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i))) 716 self.assertIsNotNone(re.match(r"[\u%04x]" % i, chr(i)))
591 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i))) 717 self.assertIsNotNone(re.match(r"[\u%04x0]" % i, chr(i)))
592 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i))) 718 self.assertIsNotNone(re.match(r"[\u%04xz]" % i, chr(i)))
593 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i))) 719 self.assertIsNotNone(re.match(r"[\U%08x]" % i, chr(i)))
594 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0")) 720 self.assertIsNotNone(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
595 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z")) 721 self.assertIsNotNone(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
722 self.assertIsNotNone(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
596 self.assertRaises(re.error, re.match, r"[\911]", "") 723 self.assertRaises(re.error, re.match, r"[\911]", "")
597 self.assertRaises(re.error, re.match, r"[\x1z]", "") 724 self.assertRaises(re.error, re.match, r"[\x1z]", "")
598 self.assertRaises(re.error, re.match, r"[\u123z]", "") 725 self.assertRaises(re.error, re.match, r"[\u123z]", "")
599 self.assertRaises(re.error, re.match, r"[\U0001234z]", "") 726 self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
600 self.assertRaises(re.error, re.match, r"[\U00110000]", "") 727 self.assertRaises(re.error, re.match, r"[\U00110000]", "")
601 728
602 def test_sre_byte_literals(self): 729 def test_sre_byte_literals(self):
603 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 730 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
604 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i]))) 731 self.assertIsNotNone(re.match((r"\%03o" % i).encode(), bytes([i])))
605 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b "0")) 732 self.assertIsNotNone(re.match((r"\%03o0" % i).encode(), bytes([i])+b "0"))
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
666 def test_bug_612074(self): 793 def test_bug_612074(self):
667 pat="["+re.escape("\u2039")+"]" 794 pat="["+re.escape("\u2039")+"]"
668 self.assertEqual(re.compile(pat) and 1, 1) 795 self.assertEqual(re.compile(pat) and 1, 1)
669 796
670 def test_stack_overflow(self): 797 def test_stack_overflow(self):
671 # nasty cases that used to overflow the straightforward recursive 798 # nasty cases that used to overflow the straightforward recursive
672 # implementation of repeated groups. 799 # implementation of repeated groups.
673 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') 800 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
674 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 801 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
675 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 802 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
803
804 def test_unlimited_zero_width_repeat(self):
805 # Issue #9669
806 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
807 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
808 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
809 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
810 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
811 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
676 812
677 def test_scanner(self): 813 def test_scanner(self):
678 def s_ident(scanner, token): return token 814 def s_ident(scanner, token): return token
679 def s_operator(scanner, token): return "op%s" % token 815 def s_operator(scanner, token): return "op%s" % token
680 def s_float(scanner, token): return float(token) 816 def s_float(scanner, token): return float(token)
681 def s_int(scanner, token): return int(token) 817 def s_int(scanner, token): return int(token)
682 818
683 scanner = Scanner([ 819 scanner = Scanner([
684 (r"[a-zA-Z_]\w*", s_ident), 820 (r"[a-zA-Z_]\w*", s_ident),
685 (r"\d+\.\d*", s_float), 821 (r"\d+\.\d*", s_float),
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
935 1071
936 def test_compile(self): 1072 def test_compile(self):
937 # Test return value when given string and pattern as parameter 1073 # Test return value when given string and pattern as parameter
938 pattern = re.compile('random pattern') 1074 pattern = re.compile('random pattern')
939 self.assertIsInstance(pattern, re._pattern_type) 1075 self.assertIsInstance(pattern, re._pattern_type)
940 same_pattern = re.compile(pattern) 1076 same_pattern = re.compile(pattern)
941 self.assertIsInstance(same_pattern, re._pattern_type) 1077 self.assertIsInstance(same_pattern, re._pattern_type)
942 self.assertIs(same_pattern, pattern) 1078 self.assertIs(same_pattern, pattern)
943 # Test behaviour when not given a string or pattern as parameter 1079 # Test behaviour when not given a string or pattern as parameter
944 self.assertRaises(TypeError, re.compile, 0) 1080 self.assertRaises(TypeError, re.compile, 0)
1081
1082 def test_bug_13899(self):
1083 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
1084 # nothing. Ditto B and Z.
1085 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
1086 ['A', 'B', '\b', 'C', 'Z'])
1087
1088 @bigmemtest(size=_2G, memuse=1)
1089 def test_large_search(self, size):
1090 # Issue #10182: indices were 32-bit-truncated.
1091 s = 'a' * size
1092 m = re.search('$', s)
1093 self.assertIsNotNone(m)
1094 self.assertEqual(m.start(), size)
1095 self.assertEqual(m.end(), size)
1096
1097 # The huge memuse is because of re.sub() using a list and a join()
1098 # to create the replacement result.
1099 @bigmemtest(size=_2G, memuse=16 + 2)
1100 def test_large_subn(self, size):
1101 # Issue #10182: indices were 32-bit-truncated.
1102 s = 'a' * size
1103 r, n = re.subn('', '', s)
1104 self.assertEqual(r, s)
1105 self.assertEqual(n, size + 1)
1106
1107 def test_bug_16688(self):
1108 # Issue 16688: Backreferences make case-insensitive regex fail on
1109 # non-ASCII strings.
1110 self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
1111 self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
1112
1113 def test_repeat_minmax_overflow(self):
1114 # Issue #13169
1115 string = "x" * 100000
1116 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
1117 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
1118 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
1119 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
1120 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
1121 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
1122 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
1123 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
1124 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
1125 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
1126 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**1 28))
1127
1128 @cpython_only
1129 def test_repeat_minmax_overflow_maxrepeat(self):
1130 try:
1131 from _sre import MAXREPEAT
1132 except ImportError:
1133 self.skipTest('requires _sre.MAXREPEAT constant')
1134 string = "x" * 100000
1135 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
1136 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
1137 (0, 100000))
1138 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
1139 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
1140 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
1141 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
1142
1143 def test_backref_group_name_in_exception(self):
1144 # Issue 17341: Poor error message when compiling invalid regex
1145 with self.assertRaisesRegex(sre_constants.error, '<foo>'):
1146 re.compile('(?P=<foo>)')
1147
1148 def test_group_name_in_exception(self):
1149 # Issue 17341: Poor error message when compiling invalid regex
1150 with self.assertRaisesRegex(sre_constants.error, '\?foo'):
1151 re.compile('(?P<?foo>)')
1152
1153 def test_issue17998(self):
1154 for reps in '*', '+', '?', '{1}':
1155 for mod in '', '?':
1156 pattern = '.' + reps + mod + 'yz'
1157 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
1158 ['xyz'], msg=pattern)
1159 pattern = pattern.encode()
1160 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
1161 [b'xyz'], msg=pattern)
1162
1163 def test_match_repr(self):
1164 for string in '[abracadabra]', S('[abracadabra]'):
1165 m = re.search(r'(.+)(.*?)\1', string)
1166 self.assertEqual(repr(m), "<%s.%s object; "
1167 "span=(1, 12), match='abracadabra'>" %
1168 (type(m).__module__, type(m).__qualname__))
1169 for string in (b'[abracadabra]', B(b'[abracadabra]'),
1170 bytearray(b'[abracadabra]'),
1171 memoryview(b'[abracadabra]')):
1172 m = re.search(rb'(.+)(.*?)\1', string)
1173 self.assertEqual(repr(m), "<%s.%s object; "
1174 "span=(1, 12), match=b'abracadabra'>" %
1175 (type(m).__module__, type(m).__qualname__))
1176
1177 first, second = list(re.finditer("(aa)|(bb)", "aa bb"))
1178 self.assertEqual(repr(first), "<%s.%s object; "
1179 "span=(0, 2), match='aa'>" %
1180 (type(second).__module__, type(first).__qualname__))
1181 self.assertEqual(repr(second), "<%s.%s object; "
1182 "span=(3, 5), match='bb'>" %
1183 (type(second).__module__, type(second).__qualname__))
1184
1185
1186 def test_bug_2537(self):
1187 # issue 2537: empty submatches
1188 for outer_op in ('{0,}', '*', '+', '{1,187}'):
1189 for inner_op in ('{0,}', '*', '?'):
1190 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
1191 m = r.match("xyyzy")
1192 self.assertEqual(m.group(0), "xyy")
1193 self.assertEqual(m.group(1), "")
1194 self.assertEqual(m.group(2), "y")
1195
1196 def test_debug_flag(self):
1197 with captured_stdout() as out:
1198 re.compile('foo', re.DEBUG)
1199 self.assertEqual(out.getvalue().splitlines(),
1200 ['literal 102 ', 'literal 111 ', 'literal 111 '])
1201 # Debug output is output again even a second time (bypassing
1202 # the cache -- issue #20426).
1203 with captured_stdout() as out:
1204 re.compile('foo', re.DEBUG)
1205 self.assertEqual(out.getvalue().splitlines(),
1206 ['literal 102 ', 'literal 111 ', 'literal 111 '])
1207
1208
1209 class PatternReprTests(unittest.TestCase):
1210 def check(self, pattern, expected):
1211 self.assertEqual(repr(re.compile(pattern)), expected)
1212
1213 def check_flags(self, pattern, flags, expected):
1214 self.assertEqual(repr(re.compile(pattern, flags)), expected)
1215
1216 def test_without_flags(self):
1217 self.check('random pattern',
1218 "re.compile('random pattern')")
1219
1220 def test_single_flag(self):
1221 self.check_flags('random pattern', re.IGNORECASE,
1222 "re.compile('random pattern', re.IGNORECASE)")
1223
1224 def test_multiple_flags(self):
1225 self.check_flags('random pattern', re.I|re.S|re.X,
1226 "re.compile('random pattern', "
1227 "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
1228
1229 def test_unicode_flag(self):
1230 self.check_flags('random pattern', re.U,
1231 "re.compile('random pattern')")
1232 self.check_flags('random pattern', re.I|re.S|re.U,
1233 "re.compile('random pattern', "
1234 "re.IGNORECASE|re.DOTALL)")
1235
1236 def test_inline_flags(self):
1237 self.check('(?i)pattern',
1238 "re.compile('(?i)pattern', re.IGNORECASE)")
1239
1240 def test_unknown_flags(self):
1241 self.check_flags('random pattern', 0x123000,
1242 "re.compile('random pattern', 0x123000)")
1243 self.check_flags('random pattern', 0x123000|re.I,
1244 "re.compile('random pattern', re.IGNORECASE|0x123000)")
1245
1246 def test_bytes(self):
1247 self.check(b'bytes pattern',
1248 "re.compile(b'bytes pattern')")
1249 self.check_flags(b'bytes pattern', re.A,
1250 "re.compile(b'bytes pattern', re.ASCII)")
1251
1252 def test_quotes(self):
1253 self.check('random "double quoted" pattern',
1254 '''re.compile('random "double quoted" pattern')''')
1255 self.check("random 'single quoted' pattern",
1256 '''re.compile("random 'single quoted' pattern")''')
1257 self.check('''both 'single' and "double" quotes''',
1258 '''re.compile('both \\'single\\' and "double" quotes')''')
1259
1260 def test_long_pattern(self):
1261 pattern = 'Very %spattern' % ('long ' * 1000)
1262 r = repr(re.compile(pattern))
1263 self.assertLess(len(r), 300)
1264 self.assertEqual(r[:30], "re.compile('Very long long lon")
1265 r = repr(re.compile(pattern, re.I))
1266 self.assertLess(len(r), 300)
1267 self.assertEqual(r[:30], "re.compile('Very long long lon")
1268 self.assertEqual(r[-16:], ", re.IGNORECASE)")
1269
1270
1271 class ImplementationTest(unittest.TestCase):
1272 """
1273 Test implementation details of the re module.
1274 """
1275
1276 def test_overlap_table(self):
1277 f = sre_compile._generate_overlap_table
1278 self.assertEqual(f(""), [])
1279 self.assertEqual(f("a"), [0])
1280 self.assertEqual(f("abcd"), [0, 0, 0, 0])
1281 self.assertEqual(f("aaaa"), [0, 1, 2, 3])
1282 self.assertEqual(f("ababba"), [0, 0, 1, 2, 0, 1])
1283 self.assertEqual(f("abcabdac"), [0, 0, 0, 1, 2, 0, 1, 0])
1284
945 1285
946 def run_re_tests(): 1286 def run_re_tests():
947 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR 1287 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
948 if verbose: 1288 if verbose:
949 print('Running re_tests test suite') 1289 print('Running re_tests test suite')
950 else: 1290 else:
951 # To save time, only run the first and last 10 tests 1291 # To save time, only run the first and last 10 tests
952 #tests = tests[:10] + tests[-10:] 1292 #tests = tests[:10] + tests[-10:]
953 pass 1293 pass
954 1294
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
1065 1405
1066 # Try the match with UNICODE locale enabled, and check 1406 # Try the match with UNICODE locale enabled, and check
1067 # that it still succeeds. 1407 # that it still succeeds.
1068 obj = re.compile(pattern, re.UNICODE) 1408 obj = re.compile(pattern, re.UNICODE)
1069 result = obj.search(s) 1409 result = obj.search(s)
1070 if result is None: 1410 if result is None:
1071 print('=== Fails on unicode-sensitive match', t) 1411 print('=== Fails on unicode-sensitive match', t)
1072 1412
1073 1413
1074 def test_main(): 1414 def test_main():
1075 run_unittest(ReTests) 1415 run_unittest(__name__)
1076 run_re_tests() 1416 run_re_tests()
1077 1417
1078 if __name__ == "__main__": 1418 if __name__ == "__main__":
1079 test_main() 1419 test_main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+