diff -r 84658272e923 Lib/pydoc.py --- a/Lib/pydoc.py Thu Sep 19 21:06:37 2013 -0500 +++ b/Lib/pydoc.py Wed Sep 25 12:31:41 2013 +0530 @@ -64,6 +64,8 @@ import time import tokenize import warnings +import ast +from token import tok_name from collections import deque from reprlib import Repr from traceback import extract_tb, format_exception_only @@ -202,22 +204,27 @@ return True return False -def source_synopsis(file): - line = file.readline() - while line[:1] == '#' or not line.strip(): - line = file.readline() - if not line: break - line = line.strip() - if line[:4] == 'r"""': line = line[1:] - if line[:3] == '"""': - line = line[3:] - if line[-1:] == '\\': line = line[:-1] - while not line.strip(): - line = file.readline() - if not line: break - result = line.split('"""')[0].strip() - else: result = None - return result +def source_synopsis(file_): + """Takes a file object and returns the one-line summary if present""" + if hasattr(file_, 'buffer'): + file_ = file_.buffer + if isinstance(file_, io.TextIOBase): + try: + file_ = io.BytesIO(bytes(file_.read(), 'utf-8')) + except UnicodeEncodeError: + # exception is raised if both utf-8 and latin-1 don't work + file_ = io.BytesIO(bytes(file_.read(), 'latin-1')) + + tokens = tokenize.tokenize(file_.readline) + + # tokenize always returns atleast ENCODING and ENDMARKER + for token in tokens: + token.name = tok_name[token.type] + if token.name not in ['COMMENT', 'NL', 'ENCODING']: + break + if token.name == 'STRING': + return ast.literal_eval(token.string).strip().split('\n')[0].strip() + return None def synopsis(filename, cache={}): """Get the one-line summary out of a module file.""" diff -r 84658272e923 Lib/test/test_pydoc.py --- a/Lib/test/test_pydoc.py Thu Sep 19 21:06:37 2013 -0500 +++ b/Lib/test/test_pydoc.py Wed Sep 25 12:31:41 2013 +0530 @@ -417,6 +417,33 @@ synopsis = pydoc.synopsis(TESTFN, {}) self.assertEqual(synopsis, 'line 1: h\xe9') + def test_synopsis_with_triple_single_quotes(self): + # test for issue 1185124 + # synopsis is extracted even for single quoted triple strings + self.addCleanup(unlink, TESTFN) + for encoding in ('ISO-8859-1', 'UTF-8'): + with open(TESTFN, 'w', encoding=encoding) as script: + if encoding != 'UTF-8': + print('#coding: {}'.format(encoding), file=script) + print("'''\nline 1: h\xe9", file=script) + print("line 2: hi'''", file=script) + synopsis = pydoc.synopsis(TESTFN, {}) + self.assertEqual(synopsis, 'line 1: h\xe9') + + def test_source_synopsis(self): + # extension to issue 1185124 + # source_synopsis should now work in all cases where __doc__ works + + # single quotes + example_string = "#!/usr/bin/python\n\n'Module summary'\nprint('hello')" + synopsis = pydoc.source_synopsis(StringIO(example_string)) + self.assertEqual(synopsis, 'Module summary') + + # double quotes + example_string = '"summary"' + synopsis = pydoc.source_synopsis(StringIO(example_string)) + self.assertEqual(synopsis, 'summary') + def test_splitdoc_with_description(self): example_string = "I Am A Doc\n\n\nHere is my description" self.assertEqual(pydoc.splitdoc(example_string),