Message389949
Certain patterns and input strings cause re.match to take exponentially longer time. This can be expected because of recursive nature of matching some patterns, but it can take surprisingly long time with some combination of a pattern and input data of moderate length. For example:
import re
import time
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
Does:
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.2190000000409782 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.4529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.9529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 1.797000000020489 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 3.6090000001713634 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 7.125 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 14.890999999828637 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 29.688000000081956 s
Perhaps re.match needs to be optimized and/or rewritten in low level language (C) |
|
Date |
User |
Action |
Args |
2021-04-01 03:11:36 | alegrigoriev | set | recipients:
+ alegrigoriev |
2021-04-01 03:11:36 | alegrigoriev | set | messageid: <1617246696.22.0.947636964181.issue43686@roundup.psfhosted.org> |
2021-04-01 03:11:36 | alegrigoriev | link | issue43686 messages |
2021-04-01 03:11:36 | alegrigoriev | create | |
|