diff -r b0866382064f Lib/pydoc.py --- a/Lib/pydoc.py Wed Sep 25 10:42:27 2013 -0400 +++ b/Lib/pydoc.py Sun Jan 05 16:43:38 2014 +0900 @@ -81,6 +81,7 @@ def getdoc(object): """Get the doc string or comments for an object.""" result = inspect.getdoc(object) or inspect.getcomments(object) + result = _encode(result) return result and re.sub('^ *\n', '', rstrip(result)) or '' def splitdoc(doc): @@ -182,6 +183,32 @@ return name, kind, cls, value return map(fixup, inspect.classify_class_attrs(object)) +# ----------------------------------------------------- Unicode support helpers + +try: + _unicode = unicode +except NameError: + # If Python is built without Unicode support, the unicode type + # will not exist. Fake one. + class _unicode(object): + pass + + _encoding = 'ascii' + def _encode(text, encoding='ascii'): + return text +else: + import locale + _encoding = locale.getpreferredencoding() + + def _encode(text, encoding=None): + if isinstance(text, unicode): + return text.encode(encoding or _encoding, 'xmlcharrefreplace') + else: + return text + +def _binstr(obj): + return obj.encode(_encoding) if isinstance(obj, _unicode) else str(obj) + # ----------------------------------------------------- module manipulation def ispackage(path): @@ -424,12 +451,13 @@ def page(self, title, contents): """Format an HTML page.""" - return ''' + return _encode(''' Python: %s + %s -''' % (title, contents) +''' % (title, contents), 'ascii') def heading(self, title, fgcol, bgcol, extras=''): """Format a page heading.""" @@ -606,12 +634,12 @@ filelink = '(built-in)' info = [] if hasattr(object, '__version__'): - version = str(object.__version__) + version = _binstr(object.__version__) if version[:11] == '$' + 'Revision: ' and version[-1:] == '$': version = strip(version[11:-1]) info.append('version %s' % self.escape(version)) if hasattr(object, '__date__'): - info.append(self.escape(str(object.__date__))) + info.append(self.escape(_binstr(object.__date__))) if info: head = head + ' (%s)' % join(info, ', ') docloc = self.getdocloc(object) @@ -694,11 +722,11 @@ result = result + self.bigsection( 'Data', '#ffffff', '#55aa55', join(contents, '
\n')) if hasattr(object, '__author__'): - contents = self.markup(str(object.__author__), self.preformat) + contents = self.markup(_binstr(object.__author__), self.preformat) result = result + self.bigsection( 'Author', '#ffffff', '#7799ee', contents) if hasattr(object, '__credits__'): - contents = self.markup(str(object.__credits__), self.preformat) + contents = self.markup(_binstr(object.__credits__), self.preformat) result = result + self.bigsection( 'Credits', '#ffffff', '#7799ee', contents) @@ -1116,16 +1144,16 @@ result = result + self.section('DATA', join(contents, '\n')) if hasattr(object, '__version__'): - version = str(object.__version__) + version = _binstr(object.__version__) if version[:11] == '$' + 'Revision: ' and version[-1:] == '$': version = strip(version[11:-1]) result = result + self.section('VERSION', version) if hasattr(object, '__date__'): - result = result + self.section('DATE', str(object.__date__)) + result = result + self.section('DATE', _binstr(object.__date__)) if hasattr(object, '__author__'): - result = result + self.section('AUTHOR', str(object.__author__)) + result = result + self.section('AUTHOR', _binstr(object.__author__)) if hasattr(object, '__credits__'): - result = result + self.section('CREDITS', str(object.__credits__)) + result = result + self.section('CREDITS', _binstr(object.__credits__)) return result def docclass(self, object, name=None, mod=None, *ignored): @@ -1375,7 +1403,7 @@ """Page through text by feeding it to another program.""" pipe = os.popen(cmd, 'w') try: - pipe.write(text) + pipe.write(_encode(text)) pipe.close() except IOError: pass # Ignore broken pipes caused by quitting the pager program. @@ -1385,7 +1413,7 @@ import tempfile filename = tempfile.mktemp() file = open(filename, 'w') - file.write(text) + file.write(_encode(text)) file.close() try: os.system(cmd + ' "' + filename + '"') @@ -1394,7 +1422,7 @@ def ttypager(text): """Page through text on a text terminal.""" - lines = split(plain(text), '\n') + lines = plain(_encode(plain(text), getattr(sys.stdout, 'encoding', _encoding))).split('\n') try: import tty fd = sys.stdin.fileno() @@ -1432,7 +1460,7 @@ def plainpager(text): """Simply print unformatted text. This is the ultimate fallback.""" - sys.stdout.write(plain(text)) + sys.stdout.write(_encode(plain(text), getattr(sys.stdout, 'encoding', _encoding))) def describe(thing): """Produce a short description of the given thing.""" diff -r b0866382064f Lib/test/test_pydoc.py --- a/Lib/test/test_pydoc.py Wed Sep 25 10:42:27 2013 -0400 +++ b/Lib/test/test_pydoc.py Sun Jan 05 16:43:38 2014 +0900 @@ -10,6 +10,7 @@ import pkgutil import unittest import xml.etree +import types import test.test_support from collections import namedtuple from test.script_helper import assert_python_ok @@ -428,6 +429,95 @@ self.assertIn('_asdict', helptext) +@unittest.skipUnless(test.test_support.have_unicode, + "test requires unicode support") +class TestUnicode(unittest.TestCase): + + def setUp(self): + # Better not to use unicode escapes in literals, lest the + # parser choke on it if Python has been built without + # unicode support. + self.Q = types.ModuleType( + 'Q', 'Rational numbers: \xe2\x84\x9a'.decode('utf8')) + self.Q.__version__ = '\xe2\x84\x9a'.decode('utf8') + self.Q.__date__ = '\xe2\x84\x9a'.decode('utf8') + self.Q.__author__ = '\xe2\x84\x9a'.decode('utf8') + self.Q.__credits__ = '\xe2\x84\x9a'.decode('utf8') + + self.assertIsInstance(self.Q.__doc__, unicode) + + def test_render_doc(self): + # render_doc is robust against unicode in docstrings + doc = pydoc.render_doc(self.Q) + self.assertIsInstance(doc, str) + + def test_encode(self): + # _encode is robust against characters out the specified encoding + self.assertEqual(pydoc._encode(self.Q.__doc__, 'ascii'), 'Rational numbers: ℚ') + + def test_pipepager(self): + # pipepager does not choke on unicode + doc = pydoc.render_doc(self.Q) + + saved, os.popen = os.popen, open + try: + with test.test_support.temp_cwd(): + pydoc.pipepager(doc, 'pipe') + self.assertEqual(open('pipe').read(), pydoc._encode(doc)) + finally: + os.popen = saved + + def test_tempfilepager(self): + # tempfilepager does not choke on unicode + doc = pydoc.render_doc(self.Q) + + output = {} + def mock_system(cmd): + import ast + output['content'] = open(ast.literal_eval(cmd.strip())).read() + saved, os.system = os.system, mock_system + try: + pydoc.tempfilepager(doc, '') + self.assertEqual(output['content'], pydoc._encode(doc)) + finally: + os.system = saved + + def test_plainpager(self): + # plainpager does not choke on unicode + doc = pydoc.render_doc(self.Q) + + # Note: captured_stdout is too permissive when it comes to + # unicode, and using it here would make the test always + # pass. + with test.test_support.temp_cwd(): + with open('output', 'w') as f: + saved, sys.stdout = sys.stdout, f + try: + pydoc.plainpager(doc) + finally: + sys.stdout = saved + self.assertIn('Rational numbers:', open('output').read()) + + def test_ttypager(self): + # ttypager does not choke on unicode + doc = pydoc.render_doc(self.Q) + # Test ttypager + with test.test_support.temp_cwd(), test.test_support.captured_stdin(): + with open('output', 'w') as f: + saved, sys.stdout = sys.stdout, f + try: + pydoc.ttypager(doc) + finally: + sys.stdout = saved + self.assertIn('Rational numbers:', open('output').read()) + + def test_htmlpage(self): + # html.page does not choke on unicode + with test.test_support.temp_cwd(): + with captured_stdout() as output: + pydoc.writedoc(self.Q) + self.assertEqual(output.getvalue(), 'wrote Q.html\n') + class TestHelper(unittest.TestCase): def test_keywords(self): self.assertEqual(sorted(pydoc.Helper.keywords), @@ -456,6 +546,7 @@ test.test_support.run_unittest(PydocDocTest, PydocImportTest, TestDescriptions, + TestUnicode, TestHelper) finally: reap_children()