diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -11,7 +11,7 @@ import unittest from test import support -from textwrap import TextWrapper, wrap, fill, dedent, indent +from textwrap import TextWrapper, wrap, fill, dedent, indent, summarize class BaseTestCase(unittest.TestCase): @@ -43,6 +43,10 @@ "\nexpected %r\n" "but got %r" % (expect, result)) + def check_summarize(self, text, width, expect, **kwargs): + result = summarize(text, width, **kwargs) + self.check(result, expect) + class WrapTestCase(BaseTestCase): @@ -777,12 +781,51 @@ self.assertEqual(indent(text, prefix, predicate), expect) +class SummarizeTestCase(BaseTestCase): + + def test_simple(self): + # Simple case: just words, spaces, and a bit of punctuation + text = "Hello there, how are you this fine day? I'm glad to hear it!" + + self.check_summarize(text, 18, "Hello there, (...)") + self.check_summarize(text, len(text), text) + self.check_summarize(text, len(text) - 1, + "Hello there, how are you this fine day? " + "I'm glad to (...)") + + def test_placeholder(self): + text = "Hello there, how are you this fine day? I'm glad to hear it!" + + self.check_summarize(text, 18, "Hello there, $$", placeholder='$$') + self.check_summarize(text, len(text), text, placeholder='$$') + self.check_summarize(text, len(text) - 1, + "Hello there, how are you this fine day? " + "I'm glad to hear $$", placeholder='$$') + + def test_empty_string(self): + self.check_summarize("", 6, "") + + def test_whitespace(self): + # Whitespace collapsing + text = """ + This is a paragraph that already has + line breaks and \t tabs too.""" + self.check_summarize(text, 62, + "This is a paragraph that already has line " + "breaks and tabs too.") + self.check_summarize(text, 61, + "This is a paragraph that already has line " + "breaks and (...)") + + def test_width_too_small_for_placeholder(self): + wrapper = TextWrapper(width=8) + wrapper.summarize("x" * 20, placeholder="(......)") + with self.assertRaises(ValueError): + wrapper.summarize("x" * 20, placeholder="(.......)") + + def test_main(): - support.run_unittest(WrapTestCase, - LongWordTestCase, - IndentTestCases, - DedentTestCase, - IndentTestCase) + support.run_unittest(__name__) if __name__ == '__main__': test_main() diff --git a/Lib/textwrap.py b/Lib/textwrap.py --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -19,6 +19,8 @@ # since 0xa0 is not in range(128). _whitespace = '\t\n\x0b\x0c\r ' +_default_placeholder = '(...)' + class TextWrapper: """ Object for wrapping/filling text. The public interface consists of @@ -277,6 +279,9 @@ return lines + def _chunkize(self, text): + text = self._munge_whitespace(text) + return self._split(text) # -- Public interface ---------------------------------------------- @@ -289,8 +294,7 @@ and all other whitespace characters (including newline) are converted to space. """ - text = self._munge_whitespace(text) - chunks = self._split(text) + chunks = self._chunkize(text) if self.fix_sentence_endings: self._fix_sentence_endings(chunks) return self._wrap_chunks(chunks) @@ -304,6 +308,32 @@ """ return "\n".join(self.wrap(text)) + def summarize(self, text, *, placeholder=_default_placeholder): + max_length = self.width + if max_length < len(placeholder): + raise ValueError("placeholder too large for max width") + sep = ' ' + sep_len = len(sep) + parts = [] + cur_len = 0 + chunks = self._chunkize(text) + for chunk in chunks: + if not chunk.strip(): + continue + chunk_len = len(chunk) + sep_len if parts else len(chunk) + if cur_len + chunk_len > max_length: + break + parts.append(chunk) + cur_len += chunk_len + else: + # No truncation necessary + return sep.join(parts) + max_truncated_length = max_length - len(placeholder) - sep_len + while parts and cur_len > max_truncated_length: + last = parts.pop() + cur_len -= len(last) + sep_len + return sep.join(parts + [placeholder]) + # -- Convenience interface --------------------------------------------- @@ -332,6 +362,21 @@ w = TextWrapper(width=width, **kwargs) return w.fill(text) +def summarize(text, width, *, placeholder=_default_placeholder, **kwargs): + """Truncate and collapse the given text to fit in the given width. + + The text first has its whitespace collapsed. If it then fits in + the *width*, it is returned unchanged. Otherwise, as many words + as possible are joined and then the placeholder is appended:: + + >>> textwrap.summarize("Hello world!", width=12) + 'Hello world!' + >>> textwrap.summarize("Hello world!", width=11) + 'Hello (...)' + """ + w = TextWrapper(width=width, **kwargs) + return w.summarize(text, placeholder=placeholder) + # -- Loosely related functionality -------------------------------------