Index: Lib/pydoc.py =================================================================== --- Lib/pydoc.py (revision 78302) +++ Lib/pydoc.py (working copy) @@ -52,7 +52,7 @@ # the current directory is changed with os.chdir(), an incorrect # path will be displayed. -import sys, imp, os, re, types, inspect, __builtin__, pkgutil +import sys, imp, os, re, types, inspect, __builtin__, pkgutil, locale from repr import Repr from string import expandtabs, find, join, lower, split, strip, rfind, rstrip from traceback import extract_tb @@ -1344,6 +1344,14 @@ finally: os.unlink(filename) +def encoded(text): + """Encode unicode text.""" + if isinstance(text, unicode): + output_encoding = (getattr(sys.stdout, 'encoding', None) or + locale.getpreferredencoding() or 'ascii') + return text.encode(output_encoding, 'replace') + return text + def plain(text): """Remove boldface formatting from text.""" return re.sub('.\b', '', text) @@ -1352,7 +1360,7 @@ """Page through text by feeding it to another program.""" pipe = os.popen(cmd, 'w') try: - pipe.write(text) + pipe.write(encoded(text)) pipe.close() except IOError: pass # Ignore broken pipes caused by quitting the pager program. @@ -1362,7 +1370,7 @@ import tempfile filename = tempfile.mktemp() file = open(filename, 'w') - file.write(text) + file.write(encoded(text)) file.close() try: os.system(cmd + ' "' + filename + '"') @@ -1371,6 +1379,7 @@ def ttypager(text): """Page through text on a text terminal.""" + text = encoded(text) lines = split(plain(text), '\n') try: import tty @@ -1409,6 +1418,7 @@ def plainpager(text): """Simply print unformatted text. This is the ultimate fallback.""" + text = encoded(text) sys.stdout.write(plain(text)) def describe(thing): Index: Lib/test/pydoc_mod.py =================================================================== --- Lib/test/pydoc_mod.py (revision 78302) +++ Lib/test/pydoc_mod.py (working copy) @@ -16,11 +16,12 @@ pass def doc_func(): - """ + u""" This function solves all of the world's problems: hunger lack of Python war + \xfcnicode\u2026 """ def nodoc_func(): Index: Lib/test/test_pydoc.py =================================================================== --- Lib/test/test_pydoc.py (revision 78302) +++ Lib/test/test_pydoc.py (working copy) @@ -1,21 +1,20 @@ import sys import os -import os.path import difflib +import locale import subprocess -import re import pydoc import inspect import unittest -import test.test_support from contextlib import contextmanager from test.test_support import ( + run_unittest, captured_stdout, TESTFN, forget, rmtree, EnvironmentVarGuard, reap_children) from test import pydoc_mod expected_text_pattern = \ -""" +u""" NAME test.pydoc_mod - This is a test module for test_pydoc @@ -55,6 +54,7 @@ hunger lack of Python war + \xfcnicode\u2026 \x20\x20\x20\x20 nodoc_func() @@ -74,7 +74,7 @@ """.strip() expected_html_pattern = \ -""" +u"""
 
@@ -138,7 +138,8 @@
doc_func()
This function solves all of the world's problems:
hunger
lack of Python
-war
+war
+\xfcnicode\u2026
nodoc_func()

@@ -203,9 +204,10 @@ output = doc.docmodule(module) - # cleanup the extra text formatting that pydoc preforms - patt = re.compile('\b.') - output = patt.sub('', output) + # go through the pydoc pager + with captured_stdout() as stdout: + pydoc.pager(output) + output = stdout.getvalue() return output.strip(), loc def print_diffs(text1, text2): @@ -216,6 +218,10 @@ tofile='got') print '\n' + ''.join(diffs) +def encoded(text): + "Return encoded string" + return text.encode(locale.getpreferredencoding(), 'replace') + class PyDocDocTest(unittest.TestCase): @@ -236,10 +242,33 @@ result, doc_loc = get_pydoc_text(pydoc_mod) expected_text = expected_text_pattern % \ (inspect.getabsfile(pydoc_mod), doc_loc) + expected_text = encoded(expected_text) if result != expected_text: print_diffs(expected_text, result) self.fail("outputs are not equal, see diff above") + def test_pager_unicode(self): + docstring = u"f\xfcr Elise" + with captured_stdout() as stdout: + pydoc.pager(docstring) + self.assertEqual(stdout.getvalue(), encoded(docstring)) + + def test_pager_bytes(self): + docstring = u"f\xfcr Elise" + for docbytes in docstring.encode('latin-1'), docstring.encode('utf-8'): + with captured_stdout() as stdout: + pydoc.pager(docbytes) + self.assertEqual(stdout.getvalue(), docbytes) + + def test_bogus_bytestring(self): + # This is preserved for backward compatibility. + # It should not be used, because it assumes that the + # docstring encoding is the same as the user preferred encoding. + def ludwig_van_b(): + """f\xfcr Elise""" + result, doc_loc = get_pydoc_text(ludwig_van_b) + self.assertIn(ludwig_van_b.__doc__, result) + def test_not_here(self): missing_module = "test.i_am_not_here" result = run_pydoc(missing_module) @@ -332,8 +361,7 @@ def test_main(): - test.test_support.run_unittest(PyDocDocTest, - TestDescriptions) + run_unittest(PyDocDocTest, TestDescriptions) if __name__ == "__main__": test_main()