''' Created on Aug 22, 2013 @author: tal My results: regex #0 match success = 0.0548729896545 seconds re #0 match success = 0.0322258472443 seconds regex #0 match failure = 0.153463840485 seconds re #0 match failure = 0.0835230350494 seconds regex #1 match success = 0.0268259048462 seconds re #1 match success = 0.0108819007874 seconds regex #1 match failure = 0.0748851299286 seconds re #1 match failure = 0.0345940589905 seconds regex #2 match success = 0.0110721588135 seconds re #2 match success = 0.00680017471313 seconds regex #2 match failure = 0.00865983963013 seconds re #2 match failure = 0.00549507141113 seconds Python 2.7, Ubuntu 12.10 64 bits, Lenovo W530. ''' from timeit import repeat PATTERNS = [ (ur"""ur'''(((?P((are(?=$|\W)|is(?=$|\W)|am(?=$|\W))?))[\s,]*(?P(blueprintingbrainstorm|making[\s\-]*arrangements|makes[\s\-]*arrangements|make[\s\-]*arrangements|brainstorming|contemplating|contemplates|brainstorms|contemplate|engineering|formulating|setting[\s\-]*out|working[\s\-]*out|blueprints|concocting|conspiring|contriving|cooking[\s\-]*up|formulates|organizing|preparing|arranging|blueprint|conspires|contrives|designing|engineers|formulate|lining[\s\-]*up|organizes|outlining|works[\s\-]*out|planning|prepares|arranges|concocts|conspire|contrive|cooks[\s\-]*up|crafting|devising|drafting|engineer|lines[\s\-]*up|organize|outlines|plotting|readying|scheming|sets[\s\-]*out|work[\s\-]*out|prepare|arrange|concoct|cook[\s\-]*up|designs|devises|forming|framing|line[\s\-]*up|mapping|outline|readies|schemes|set[\s\-]*out|crafts|design|devise|drafts|frames|scheme|plans|craft|draft|forms|frame|plots|ready|plan|form|maps|plot|map))))[\s,]*(((?P((to|2)?))[\s,]*(((((((?P(coming(?=$|\W)|being(?=$|\W)|come(?=$|\W)|stop(?=$|\W)|pop(?=$|\W)|be(?=$|\W)))[\s,]*(?P((back[\s\-]*home|back|home)?))))[\s,]*(?P(to[\s\-]*see(?=$|\W)|in[\s\-]*to(?=$|\W)|into(?=$|\W)|in(?=$|\W)|at(?=$|\W)|to(?=$|\W)|by(?=$|\W)))))|(((?P(paying[\s\-]*a[\s\-]*visit[\s\-]*to|paying[\s\-]*a[\s\-]*call[\s\-]*on|pay[\s\-]*a[\s\-]*visit[\s\-]*to|crashing[\s\-]*in[\s\-]*on|pay[\s\-]*a[\s\-]*call[\s\-]*on|coming[\s\-]*around|droping[\s\-]*in[\s\-]*on|going[\s\-]*over[\s\-]*to|looking[\s\-]*in[\s\-]*on|popping[\s\-]*in[\s\-]*to|droping[\s\-]*over|popping[\s\-]*into|stopping[\s\-]*off|come[\s\-]*around|crash[\s\-]*in[\s\-]*on|dwelling[\s\-]*in|frequenting|residing[\s\-]*in|stopping[\s\-]*by|swinging[\s\-]*by|drop[\s\-]*in[\s\-]*on|go[\s\-]*over[\s\-]*to|look[\s\-]*in[\s\-]*on|calling[\s\-]*on|droping[\s\-]*by|inspecting|looking[\s\-]*up|popping[\s\-]*in|drop[\s\-]*over|reside[\s\-]*in|taking[\s\-]*in|dwell[\s\-]*in|frequent|stop[\s\-]*off|swing[\s\-]*by|visiting|crashing|arriving|call[\s\-]*on|come[\s\-]*by|drop[\s\-]*by|inspect|look[\s\-]*up|take[\s\-]*in|hitting|touring|go[\s\-]*see|voyage|arrive|seeing|visit|crash|reach|tour|hit|see))[\s,]*(?P((back[\s\-]*home|back|home)?))))))))'''""", u"is planning to visit", u'is planning to asdasd'), (ur"""ur'''(?P((right[\s\-]*up[\s\-]*front|head[\s\-]*of[\s\-]*line|introductory|fundamental|primogenial|rudimentary|elementary|number[\s\-]*one|numero[\s\-]*uno|primordial|beginning|first[\s\-]*off|inaugural|inceptive|incipient|primitive|principal|anterior|cardinal|foremost|headmost|lead[\s\-]*off|number[\s\-]*1|original|primeval|pristine|initial|leading|opening|pioneer|premier|primary|basic|chief|first|front|prime|primo|head|1st|key)?))[\s,]*(?P(determination|landing[\s\-]*place|journey['"]?s[\s\-]*end|premeditation|resting[\s\-]*place|destination|expectation|proposition|aspiration|intendment|direction|intention|objective|ambition|function|prospect|terminal|terminus|mission|project|purpose|resolve|station|desire|harbor|intent|scheme|target|dest\.|dream|drift|haven|place|point|scope|duty|goal|hope|idea|plan|stop|will|wish|aim|end))'''""", u"fundamental drift", u"head of line parallel"), (ur"""ur'''(?P(checking[\s\-]*in|check[\s\-]*in))|(?P(checking[\s\-]*out|check[\s\-]*out))'''""", u"Check-out", u"checking account"), ] def play(): """Play with the new regex module""" num_of_repeats, num_of_tests = 10, 10000 for index, (pattern, success_string, fail_string) in enumerate(PATTERNS): success_test = u"pattern.match(u'{}').group()".format(success_string) failure_test = u"pattern.match(u'{}')".format(fail_string) regex_setup = u"import regex as re; pattern = re.compile({}, re.I)" re_setup = u"import re; pattern = re.compile({}, re.I)" print "regex #{} match success = {} seconds".format(index, min(repeat(success_test, setup = regex_setup.format(pattern), repeat = num_of_repeats, number = num_of_tests))) print "re #{} match success = {} seconds".format(index, min(repeat(success_test, setup = re_setup.format(pattern), repeat = num_of_repeats, number = num_of_tests))) print "regex #{} match failure = {} seconds".format(index, min(repeat(failure_test, setup = regex_setup.format(pattern), repeat = num_of_repeats, number = num_of_tests))) print "re #{} match failure = {} seconds".format(index, min(repeat(failure_test, setup = re_setup.format(pattern), repeat = num_of_repeats, number = num_of_tests))) if __name__ == '__main__': play()