''' Run this file with the current (3.5) difflib.py in place not the patched version ''' import collections import difflib import random import string from timeit import Timer from collections import Counter as _Counter SpeedResult = collections.namedtuple('SpeedResult', 'wordLen tests cached oldNew time') normal_quick_ratio = difflib.SequenceMatcher.quick_ratio def new_quick_ratio(self): la, lb = len(self.a), len(self.b) if la == 0 and lb == 0: return 1.0 elif la == 0 or lb == 0: return 0.0 if self.fullbcount is None: self.fullbcount = _Counter(self.b) fullbcount = self.fullbcount fullacount = _Counter(self.a) matches = 0 for elem, count in fullbcount.items(): other_count = fullacount[elem] # 0 for not existing matches += count if count < other_count else other_count return difflib._calculate_ratio(matches, la + lb) def hybrid_quick_ratio(self): la, lb = len(self.a), len(self.b) if la == 0 and lb == 0: return 1.0 elif la == 0 or lb == 0: return 0.0 if la + lb < 120: return normal_quick_ratio(self) if self.fullbcount is None: self.fullbcount = _Counter(self.b) fullbcount = self.fullbcount fullacount = _Counter(self.a) matches = 0 for elem, count in fullbcount.items(): other_count = fullacount[elem] # 0 for not existing matches += count if count < other_count else other_count return difflib._calculate_ratio(matches, la + lb) def randString(n): return (''.join(random.choice(string.ascii_uppercase + string.punctuation + string.whitespace + string.ascii_lowercase + string.digits) for _ in range(n))) def niceTime(t): ''' from timeit.main -- should eventually make it its own method... ''' prefix = "" tNice = t vals = [(1e-6, 'n'), (1e-3,'ยต'), (1, 'm')] for useMax, usePrefix in vals: if t < useMax: tNice = (t * 1000) / useMax prefix = usePrefix break return "%.3g %ssec" % (tNice, prefix) def runSpeedTests(): testResults = [] for wordLen in (0, 1, 5, 20, 60, 200, 1000, 10000, 100000, 1000000, 10000000): for cached in (True, False): wordA = randString(wordLen) wordB = randString(wordLen) for qrMeth, qrMethName in ((normal_quick_ratio, 'old'), (new_quick_ratio, 'new'), (hybrid_quick_ratio, 'hybrid'), ): statement = "sm.quick_ratio()" if cached is False: statement = "sm.fullbcount = None; " + statement numTests = int(1000000/(wordLen + 1)) + 1 if cached is False and numTests == 1: continue difflib.SequenceMatcher.quick_ratio = qrMeth sm = difflib.SequenceMatcher(None, wordA, wordB) t = Timer(statement, globals=locals()) # I love Py3.5 :-) secondsReturned = t.timeit(numTests) speedTuple = SpeedResult(wordLen, numTests, cached, qrMethName, niceTime(secondsReturned/numTests)) testResults.append(speedTuple) print(speedTuple) return testResults if __name__ == '__main__': print(runSpeedTests())