from collections import Counter as _Counter class SequenceMatcher: def quick_ratio(self): """Return an upper bound on ratio() relatively quickly. This isn't defined beyond that it is an upper bound on .ratio(), and is faster to compute. """ len_a, len_b = len(self.a), len(self.b) # degenerate cases are otherwise quite slow # compared to other code. if len_a == 0 and len_b == 0: return 1.0 elif len_a == 0 or len_b == 0: return 0.0 # viewing a and b as multisets, set matches to the cardinality # of their intersection; this counts the number of matches # without regard to order, so is clearly an upper bound if self.fullbcount is None: if len_b > 60 and len_a > 60: self.fullbcount = _Counter(self.b) else: self.fullbcount = fullbcount = {} for elt in self.b: fullbcount[elt] = fullbcount.get(elt, 0) + 1 fullbcount = self.fullbcount # code branch -- for < about 120 items total # the overhead of creating this Counter is too # high, especially since it is never cached. if len_b > 60 and type(fullbcount) != dict: fullacount = _Counter(self.a) # inplace saves creating a third Counter fullacount &= fullbcount matches = sum(fullacount.values()) else: # use older code: # avail[x] is the number of times x appears in 'b' less the # number of times we've seen it in 'a' so far ... kinda avail = {} availhas, matches = avail.__contains__, 0 for elt in self.a: if availhas(elt): numb = avail[elt] else: numb = fullbcount.get(elt, 0) avail[elt] = numb - 1 if numb > 0: matches = matches + 1 return _calculate_ratio(matches, len_a + len_b)