# HG changeset patch # User RussellBallestrini # Date 1398350380 14400 # Thu Apr 24 10:39:40 2014 -0400 # Node ID 70436261056967ad963b049495487eb546d3f65c # Parent 4ff37fbcd4e829ed3f57edd241f2d9f681707154 Save scores or ratios in difflib get_close_matches The current implementation of difflib's get_close_matches() function computes computationally complex scores (ratios) but then tosses them out without giving the end-user the chance to have at them. This patch adds an optional "scores" boolean argument that may be passed to alter the return output from a list of words, to a list of (score, word) tuples. changed Lib/difflib.py diff -r 4ff37fbcd4e8 -r 704362610569 Lib/difflib.py --- a/Lib/difflib.py Wed Apr 23 15:37:37 2014 -0500 +++ b/Lib/difflib.py Thu Apr 24 10:39:40 2014 -0400 @@ -1,7 +1,7 @@ """ Module difflib -- helpers for computing deltas between objects. -Function get_close_matches(word, possibilities, n=3, cutoff=0.6): +Function get_close_matches(word, possibilities, n=3, cutoff=0.6, scores=False): Use SequenceMatcher to return list of the best "good enough" matches. Function context_diff(a, b): @@ -685,7 +685,7 @@ # shorter sequence return _calculate_ratio(min(la, lb), la + lb) -def get_close_matches(word, possibilities, n=3, cutoff=0.6): +def get_close_matches(word, possibilities, n=3, cutoff=0.6, scores=False): """Use SequenceMatcher to return list of the best "good enough" matches. word is a sequence for which close matches are desired (typically a @@ -700,6 +700,10 @@ Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. + Optional arg scores (default False) is a boolean. If False a list of + words will be returned. If True a list of (score,word) tuples will be + returned. + The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. @@ -708,6 +712,8 @@ >>> import keyword as _keyword >>> get_close_matches("wheel", _keyword.kwlist) ['while'] + >>> get_close_matches("wheel", _keyword.kwlist, scores=True) + [(1.0, 'while')] >>> get_close_matches("Apple", _keyword.kwlist) [] >>> get_close_matches("accept", _keyword.kwlist) @@ -730,6 +736,10 @@ # Move the best scorers to head of list result = heapq.nlargest(n, result) + + if scores: + return result + # Strip scores for the best n matches return [x for score, x in result]