# HG changeset patch
# User RussellBallestrini
# Date 1398390177 14400
#      Thu Apr 24 21:42:57 2014 -0400
# Node ID a138619f47292c80fca77fbdc9621afa05f473dc
# Parent  78bbfcccff11eab2d1010f54a79b9eaf11f55983
Added documentation for get_scored_matches()

changed Doc/library/difflib.rst

diff -r 78bbfcccff11 -r a138619f4729 Doc/library/difflib.rst
--- a/Doc/library/difflib.rst	Thu Apr 24 20:58:05 2014 -0400
+++ b/Doc/library/difflib.rst	Thu Apr 24 21:42:57 2014 -0400
@@ -207,6 +207,33 @@
       ['except']
 
 
+.. function:: get_scored_matches(word, possibilities, n=3, cutoff=0.6)
+
+   Return a list of the best "good enough" tuple matches.
+   This function operates the same as *get_close_matches* and accepts
+   the same arguments.  The only difference is the return type:: 
+
+       [(score, word),]
+ 
+   *word* is a sequence for which close matches are desired
+   (typically a string), and *possibilities* is a list of sequences
+   against which to match *word* (typically a list of strings).
+
+   Optional argument *n* (default ``3``) is the maximum number of
+   close matches to return; *n* must be greater than ``0``.
+
+   Optional argument *cutoff* (default ``0.6``) is a float in the
+   range [0, 1]. Possibilities that don't score at least that similar
+   to *word* are ignored.
+
+   The best (no more than *n*) matches among the possibilities are
+   returned in a list, sorted by similarity score, most similar first.
+
+       >>> import keyword as _keyword
+       >>> get_scored_matches("wheel", _keyword.kwlist)
+       [(0.6, 'while')]
+
+
 .. function:: ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK)
 
    Compare *a* and *b* (lists of strings); return a :class:`Differ`\ -style
# HG changeset patch
# User RussellBallestrini
# Date 1398387485 14400
#      Thu Apr 24 20:58:05 2014 -0400
# Node ID 78bbfcccff11eab2d1010f54a79b9eaf11f55983
# Parent  4ff37fbcd4e829ed3f57edd241f2d9f681707154
New function in difflib: get_scored_matches()

This function acts just like the existing get_close_matches()
function however instead of returning a list of words, it
returns a list of tuples (score, word) pairs.

This gives the end-user the ability to access the
computationally expensive scores/ratios produced as a by-product.

This patch also contains complete test coverage for both
get_close_matches() and get_scored_matches().

The new usage does _not_ impact backward compatibility::

  >>> import difflib
  >>> import keyword as _keyword
  >>> difflib.get_scored_matches("wheel", _keyword.kwlist)
  [(0.6, 'while')]
  >>> difflib.get_close_matches("wheel", _keyword.kwlist)
  ['while']

hg: branch 'default'
changed Lib/difflib.py
changed Lib/test/test_difflib.py

diff -r 4ff37fbcd4e8 -r 78bbfcccff11 Lib/difflib.py
--- a/Lib/difflib.py	Wed Apr 23 15:37:37 2014 -0500
+++ b/Lib/difflib.py	Thu Apr 24 20:58:05 2014 -0400
@@ -4,6 +4,9 @@
 Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
     Use SequenceMatcher to return list of the best "good enough" matches.
 
+Function get_scored_matches(word, possibilities, n=3, cutoff=0.6):
+    Use SequenceMatcher to return list of the best (score, word) tuples.
+
 Function context_diff(a, b):
     For two lists of strings, return a delta in context diff format.
 
@@ -713,6 +716,20 @@
     >>> get_close_matches("accept", _keyword.kwlist)
     ['except']
     """
+    result = get_scored_matches(word, possibilities, n, cutoff)
+    # transform the list of (score, word) tuples, into a list of words.
+    return [x for score, x in result]
+    
+    
+def get_scored_matches(word, possibilities, n=3, cutoff=0.6):
+    """Use SequenceMatcher to return list of the best (score, word) tuples.
+
+    All arguments are the same as get_close_matches().
+
+    >>> import keyword as _keyword
+    >>> get_scored_matches("wheel", _keyword.kwlist)
+    [(0.6, 'while')]
+    """
 
     if not n >  0:
         raise ValueError("n must be > 0: %r" % (n,))
@@ -728,10 +745,8 @@
            s.ratio() >= cutoff:
             result.append((s.ratio(), x))
 
-    # Move the best scorers to head of list
-    result = heapq.nlargest(n, result)
-    # Strip scores for the best n matches
-    return [x for score, x in result]
+    # Move the best scorers to head of list and return result.
+    return heapq.nlargest(n, result)
 
 def _count_leading(line, ch):
     """
diff -r 4ff37fbcd4e8 -r 78bbfcccff11 Lib/test/test_difflib.py
--- a/Lib/test/test_difflib.py	Wed Apr 23 15:37:37 2014 -0500
+++ b/Lib/test/test_difflib.py	Thu Apr 24 20:58:05 2014 -0400
@@ -278,12 +278,61 @@
         self.assertEqual(fmt(0,0), '0')
 
 
+class TestGetCloseScoredMatches(unittest.TestCase):
+    """
+    This test suite covers the following difflib functions:
+
+    * get_scored_matches
+    * get_close_matches
+    """
+    def test_get_close_matches_with_defaults(self):
+        matches = difflib.get_close_matches("appel",
+            ["ape", "apple", "peach", "puppy"])
+        self.assertEqual(matches, ["apple", "ape"])
+
+    def test_get_scored_matches_with_defaults(self):
+        matches = difflib.get_scored_matches("appel",
+            ["ape", "apple", "peach", "puppy"])
+        self.assertEqual(matches, [(0.8, "apple"), (0.75, "ape")])
+
+    def test_scored_dog_is_dog(self):
+        matches = difflib.get_scored_matches("dog", ["dog"])
+        self.assertEqual(matches, [(1.0, "dog")])
+
+    def test_scored_dog_is_not_cat(self):
+        matches = difflib.get_scored_matches("dog", ["cat"])
+        self.assertEqual(matches, [])
+
+    def test_scored_n_is_one(self):
+        matches = difflib.get_scored_matches("dog", ["dog", "dog"], n=1)
+        self.assertEqual(len(matches), 1)
+        self.assertEqual(matches, [(1.0, "dog")])
+
+    def test_scored_cutoff_is_point_nine_five(self):
+        matches = difflib.get_scored_matches("dog", ["dog", "doge"],
+            cutoff=.95)
+        self.assertEqual(len(matches), 1)
+        self.assertEqual(matches, [(1.0, "dog")])
+
+    def test_negative_n_is_value_error(self):
+        self.assertRaises(ValueError,
+            difflib.get_close_matches, "a", ["a"], n=-1)
+
+    def test_negative_cutoff_is_value_error(self):
+        self.assertRaises(ValueError,
+            difflib.get_close_matches, "a", ["a"], cutoff=-.5)
+
+    def test_two_cutoff_is_value_error(self):
+        self.assertRaises(ValueError,
+            difflib.get_close_matches, "a", ["a"], cutoff=2.0)
+
+
 def test_main():
     difflib.HtmlDiff._default_prefix = 0
     Doctests = doctest.DocTestSuite(difflib)
     run_unittest(
         TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
-        TestOutputFormat, Doctests)
+        TestOutputFormat, TestGetCloseScoredMatches, Doctests)
 
 if __name__ == '__main__':
     test_main()