# HG changeset patch # User RussellBallestrini # Date 1398354587 14400 # Thu Apr 24 11:49:47 2014 -0400 # Node ID faf99c07ae77625c61a8fdc65ddc22afe0ed07bf # Parent 4ff37fbcd4e829ed3f57edd241f2d9f681707154 New function in difflib: get_scored_matches() This function acts just like the existing get_close_matches() function however instead of returning a list of words, it returns a list of tuples (score, word) pairs. This gives the end-user the ability to access the computationally expensive scores/ratios produced as a by-product. The new usage does _not_ impact backward compatibility:: >>> import difflib >>> import keyword as _keyword >>> difflib.get_scored_matches("wheel", _keyword.kwlist) [(0.6, 'while')] >>> difflib.get_close_matches("wheel", _keyword.kwlist) ['while'] changed Lib/difflib.py diff -r 4ff37fbcd4e8 -r faf99c07ae77 Lib/difflib.py --- a/Lib/difflib.py Wed Apr 23 15:37:37 2014 -0500 +++ b/Lib/difflib.py Thu Apr 24 11:49:47 2014 -0400 @@ -4,6 +4,9 @@ Function get_close_matches(word, possibilities, n=3, cutoff=0.6): Use SequenceMatcher to return list of the best "good enough" matches. +Function get_scored_matches(word, possibilities, n=3, cutoff=0.6): + Use SequenceMatcher to return list of the best (score, word) tuples. + Function context_diff(a, b): For two lists of strings, return a delta in context diff format. @@ -713,6 +716,20 @@ >>> get_close_matches("accept", _keyword.kwlist) ['except'] """ + result = get_scored_matches(word, possibilities, n, cutoff) + # Strip scores for the best n matches + return [x for score, x in result] + + +def get_scored_matches(word, possibilities, n=3, cutoff=0.6): + """Use SequenceMatcher to return list of the best (score, word) tuples. + + All arguments are the same as get_close_matches(). + + >>> import keyword as _keyword + >>> get_scored_matches("wheel", _keyword.kwlist) + [(1.0, 'while')] + """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) @@ -728,10 +745,8 @@ s.ratio() >= cutoff: result.append((s.ratio(), x)) - # Move the best scorers to head of list - result = heapq.nlargest(n, result) - # Strip scores for the best n matches - return [x for score, x in result] + # Move the best scorers to head of list and return result. + return heapq.nlargest(n, result) def _count_leading(line, ch): """