diff -r 98678738b7e9 Lib/difflib.py --- a/Lib/difflib.py Sun May 01 20:34:00 2016 +0300 +++ b/Lib/difflib.py Sun May 01 17:26:39 2016 -0400 @@ -32,6 +32,7 @@ from heapq import nlargest as _nlargest from collections import namedtuple as _namedtuple +from collections import Counter as _Counter Match = _namedtuple('Match', 'a b size') @@ -184,7 +185,8 @@ # fullbcount # for x in b, fullbcount[x] == the number of times x # appears in b; only materialized if really needed (used - # only for computing quick_ratio()) + # only for computing quick_ratio()). + # uses collections.Counter() # matching_blocks # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k]; # ascending & non-overlapping in i and in j; terminated by @@ -651,27 +653,40 @@ is faster to compute. """ + la, lb = len(self.a), len(self.b) + + # degenerate cases are otherwise quite slow + # compared to < 3.6 code. + if la == 0 and lb == 0: + return 1.0 + elif la == 0 or lb == 0: + return 0.0 + # viewing a and b as multisets, set matches to the cardinality # of their intersection; this counts the number of matches # without regard to order, so is clearly an upper bound if self.fullbcount is None: - self.fullbcount = fullbcount = {} - for elt in self.b: - fullbcount[elt] = fullbcount.get(elt, 0) + 1 + self.fullbcount = _Counter(self.b) + + fullbcount = self.fullbcount - # avail[x] is the number of times x appears in 'b' less the - # number of times we've seen it in 'a' so far ... kinda - avail = {} - availhas, matches = avail.__contains__, 0 - for elt in self.a: - if availhas(elt): - numb = avail[elt] - else: - numb = fullbcount.get(elt, 0) - avail[elt] = numb - 1 - if numb > 0: - matches = matches + 1 - return _calculate_ratio(matches, len(self.a) + len(self.b)) + fullacount = _Counter(self.a) + + # intersection of collections.Counter() + # is defined as the minimum count number for + # each element. Thus, this would work: + # + # sum((fullacount & fullbcount).values()) + # + # but it involves creating another Counter, etc. + # so we roll our own counter + + matches = 0 + for elem, count in fullbcount.items(): + other_count = fullacount[elem] # 0 for not existing + matches += count if count < other_count else other_count + + return _calculate_ratio(matches, la + lb) def real_quick_ratio(self): """Return an upper bound on ratio() very quickly.