diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst --- a/Doc/library/textwrap.rst +++ b/Doc/library/textwrap.rst @@ -10,11 +10,13 @@ -------------- -The :mod:`textwrap` module provides two convenience functions, :func:`wrap` and -:func:`fill`, as well as :class:`TextWrapper`, the class that does all the work, -and two utility functions, :func:`dedent` and :func:`indent`. If you're just wrapping or filling one -or two text strings, the convenience functions should be good enough; -otherwise, you should use an instance of :class:`TextWrapper` for efficiency. +The :mod:`textwrap` module provides three convenience functions, +:func:`wrap`, :func:`fill` and :func:`shorten`, as well as +:class:`TextWrapper`, the class that does all the work, and two utility +functions, :func:`dedent` and :func:`indent`. If you're just wrapping or +filling one or two text strings, the convenience functions should be good +enough; otherwise, you should use an instance of :class:`TextWrapper` for +efficiency. .. function:: wrap(text, width=70, **kwargs) @@ -39,10 +41,30 @@ otherwise, you should use an instance o In particular, :func:`fill` accepts exactly the same keyword arguments as :func:`wrap`. -Both :func:`wrap` and :func:`fill` work by creating a :class:`TextWrapper` -instance and calling a single method on it. That instance is not reused, so for -applications that wrap/fill many text strings, it will be more efficient for you -to create your own :class:`TextWrapper` object. + +.. function:: shorten(text, width=70, *, placeholder=" (...)") + + Truncate and collapse the given text to fit in the given width. + + The text first has its whitespace collapsed. If it then fits in + the *width*, it is returned unchanged. Otherwise, as many words + as possible are joined and then the *placeholder* is appended:: + + >>> textwrap.shorten("Hello world!", width=12) + 'Hello world!' + >>> textwrap.shorten("Hello world!", width=11) + 'Hello (...)' + >>> textwrap.shorten("Hello world", width=10, placeholder="...") + 'Hello...' + + .. versionadded:: 3.4 + + +:func:`wrap`, :func:`fill` and :func:`shorten` work by creating a +:class:`TextWrapper` instance and calling a single method on it. That +instance is not reused, so for applications that process many text +strings, it may be more efficient to create your own +:class:`TextWrapper` object. Text is preferably wrapped on whitespaces and right after the hyphens in hyphenated words; only then will long words be broken if necessary, unless @@ -235,7 +257,7 @@ in a block of text. was to always allow breaking hyphenated words. - :class:`TextWrapper` also provides two public methods, analogous to the + :class:`TextWrapper` also provides three public methods, analogous to the module-level convenience functions: .. method:: wrap(text) @@ -252,3 +274,14 @@ in a block of text. Wraps the single paragraph in *text*, and returns a single string containing the wrapped paragraph. + + .. function:: shorten(text, *, placeholder=" (...)") + + Truncate and collapse the given text to fit in :attr:`width` + characters. + + The text first has its whitespace collapsed. If it then fits in + :attr:`width`, it is returned unchanged. Otherwise, as many words + as possible are joined and then the *placeholder* is appended. + + .. versionadded:: 3.4 diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -9,9 +9,8 @@ # import unittest -from test import support -from textwrap import TextWrapper, wrap, fill, dedent, indent +from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten class BaseTestCase(unittest.TestCase): @@ -43,6 +42,10 @@ class BaseTestCase(unittest.TestCase): "\nexpected %r\n" "but got %r" % (expect, result)) + def check_shorten(self, text, width, expect, **kwargs): + result = shorten(text, width, **kwargs) + self.check(result, expect) + class WrapTestCase(BaseTestCase): @@ -777,12 +780,59 @@ class IndentTestCase(unittest.TestCase): self.assertEqual(indent(text, prefix, predicate), expect) -def test_main(): - support.run_unittest(WrapTestCase, - LongWordTestCase, - IndentTestCases, - DedentTestCase, - IndentTestCase) +class ShortenTestCase(BaseTestCase): + + def test_simple(self): + # Simple case: just words, spaces, and a bit of punctuation + text = "Hello there, how are you this fine day? I'm glad to hear it!" + + self.check_shorten(text, 18, "Hello there, (...)") + self.check_shorten(text, len(text), text) + self.check_shorten(text, len(text) - 1, + "Hello there, how are you this fine day? " + "I'm glad to (...)") + + def test_placeholder(self): + text = "Hello there, how are you this fine day? I'm glad to hear it!" + + self.check_shorten(text, 17, "Hello there,$$", placeholder='$$') + self.check_shorten(text, 18, "Hello there, how$$", placeholder='$$') + self.check_shorten(text, 18, "Hello there, $$", placeholder=' $$') + self.check_shorten(text, len(text), text, placeholder='$$') + self.check_shorten(text, len(text) - 1, + "Hello there, how are you this fine day? " + "I'm glad to hear$$", placeholder='$$') + + def test_empty_string(self): + self.check_shorten("", 6, "") + + def test_whitespace(self): + # Whitespace collapsing + text = """ + This is a paragraph that already has + line breaks and \t tabs too.""" + self.check_shorten(text, 62, + "This is a paragraph that already has line " + "breaks and tabs too.") + self.check_shorten(text, 61, + "This is a paragraph that already has line " + "breaks and (...)") + + self.check_shorten("hello world! ", 12, "hello world!") + self.check_shorten("hello world! ", 11, "hello (...)") + # The leading space is trimmed from the placeholder + # (it would be ugly otherwise). + self.check_shorten("hello world! ", 10, "(...)") + + def test_width_too_small_for_placeholder(self): + wrapper = TextWrapper(width=8) + wrapper.shorten("x" * 20, placeholder="(......)") + with self.assertRaises(ValueError): + wrapper.shorten("x" * 20, placeholder="(.......)") + + def test_first_word_too_long_but_placeholder_fits(self): + self.check_shorten("Helloo", 5, "(...)") + if __name__ == '__main__': - test_main() + unittest.main() diff --git a/Lib/textwrap.py b/Lib/textwrap.py --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -19,6 +19,8 @@ import re # since 0xa0 is not in range(128). _whitespace = '\t\n\x0b\x0c\r ' +_default_placeholder = ' (...)' + class TextWrapper: """ Object for wrapping/filling text. The public interface consists of @@ -277,6 +279,9 @@ class TextWrapper: return lines + def _chunkize(self, text): + text = self._munge_whitespace(text) + return self._split(text) # -- Public interface ---------------------------------------------- @@ -289,8 +294,7 @@ class TextWrapper: and all other whitespace characters (including newline) are converted to space. """ - text = self._munge_whitespace(text) - chunks = self._split(text) + chunks = self._chunkize(text) if self.fix_sentence_endings: self._fix_sentence_endings(chunks) return self._wrap_chunks(chunks) @@ -304,6 +308,36 @@ class TextWrapper: """ return "\n".join(self.wrap(text)) + def shorten(self, text, *, placeholder=_default_placeholder): + """shorten(text: str) -> str + + Truncate and collapse the given text to fit in 'self.width' columns. + """ + max_length = self.width + if max_length < len(placeholder.strip()): + raise ValueError("placeholder too large for max width") + sep = ' ' + sep_len = len(sep) + parts = [] + cur_len = 0 + chunks = self._chunkize(text) + for chunk in chunks: + if not chunk.strip(): + continue + chunk_len = len(chunk) + sep_len if parts else len(chunk) + if cur_len + chunk_len > max_length: + break + parts.append(chunk) + cur_len += chunk_len + else: + # No truncation necessary + return sep.join(parts) + max_truncated_length = max_length - len(placeholder) + while parts and cur_len > max_truncated_length: + last = parts.pop() + cur_len -= len(last) + sep_len + return (sep.join(parts) + placeholder).strip() + # -- Convenience interface --------------------------------------------- @@ -332,6 +366,21 @@ def fill(text, width=70, **kwargs): w = TextWrapper(width=width, **kwargs) return w.fill(text) +def shorten(text, width, *, placeholder=_default_placeholder, **kwargs): + """Truncate and collapse the given text to fit in the given width. + + The text first has its whitespace collapsed. If it then fits in + the *width*, it is returned unchanged. Otherwise, as many words + as possible are joined and then the placeholder is appended:: + + >>> textwrap.shorten("Hello world!", width=12) + 'Hello world!' + >>> textwrap.shorten("Hello world!", width=11) + 'Hello (...)' + """ + w = TextWrapper(width=width, **kwargs) + return w.shorten(text, placeholder=placeholder) + # -- Loosely related functionality -------------------------------------