Index: Doc/library/textwrap.rst =================================================================== --- Doc/library/textwrap.rst (revision 62424) +++ Doc/library/textwrap.rst (working copy) @@ -173,10 +173,20 @@ (Long words will be put on a line by themselves, in order to minimize the amount by which :attr:`width` is exceeded.) +.. attribute:: TextWrapper.break_on_hyphens + + (default: ``True``) if true, wrapping will occur preferably on whitespaces and + right after hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line breaks, + but you need to set :attr:`break_long_words` to false if you want truly + insecable words. Default behaviour in previous versions was to always allow + breaking hyphenated words. + + .. versionadded:: 2.6 + :class:`TextWrapper` also provides two public methods, analogous to the module-level convenience functions: - .. method:: TextWrapper.wrap(text) Wraps the single paragraph in *text* (a string) so every line is at most Index: Lib/textwrap.py =================================================================== --- Lib/textwrap.py (revision 62424) +++ Lib/textwrap.py (working copy) @@ -63,6 +63,10 @@ break_long_words (default: true) Break words longer than 'width'. If false, those words will not be broken, and some lines might be longer than 'width'. + break_on_hyphens (default: true) + Allow breaking hyphenated words. if true, wrapping will occur + preferably on whitespaces and right after hyphens part of + compound words. drop_whitespace (default: true) Drop leading and trailing whitespace from lines. """ @@ -85,6 +89,12 @@ r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + # This less funky little regex just split on recognized spaces. E.g. + # "Hello there -- you goof-ball, use the -b option!" + # splits into + # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ + wordsep_simple_re = re.compile(r'(\s+)') + # XXX this is not locale- or charset-aware -- string.lowercase # is US-ASCII only (and therefore English-only) sentence_end_re = re.compile(r'[%s]' # lowercase letter @@ -101,7 +111,8 @@ replace_whitespace=True, fix_sentence_endings=False, break_long_words=True, - drop_whitespace=True): + drop_whitespace=True, + break_on_hyphens=True): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -110,6 +121,7 @@ self.fix_sentence_endings = fix_sentence_endings self.break_long_words = break_long_words self.drop_whitespace = drop_whitespace + self.break_on_hyphens = break_on_hyphens # -- Private methods ----------------------------------------------- @@ -142,8 +154,15 @@ breaks into the following chunks: 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 'use', ' ', 'the', ' ', '-b', ' ', 'option!' + if break_on_hyphens is True, or in: + 'Look,', ' ', 'goof-ball', ' ', '--', ' ', + 'use', ' ', 'the', ' ', '-b', ' ', option!' + otherwise. """ - chunks = self.wordsep_re.split(text) + if self.break_on_hyphens is True: + chunks = self.wordsep_re.split(text) + else: + chunks = self.wordsep_simple_re.split(text) chunks = filter(None, chunks) # remove empty chunks return chunks Index: Lib/test/test_textwrap.py =================================================================== --- Lib/test/test_textwrap.py (revision 62424) +++ Lib/test/test_textwrap.py (working copy) @@ -360,6 +360,14 @@ ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-", "ball,", " ", "use", " ", "the", " ", "-b", " ", "option!"]) + def test_break_on_hyphens(self): + # Ensure that the break_on_hyphens attributes work + text = "yaba daba-doo" + self.check_wrap(text, 10, ["yaba daba-", "doo"], + break_on_hyphens=True) + self.check_wrap(text, 10, ["yaba", "daba-doo"], + break_on_hyphens=False) + def test_bad_width(self): # Ensure that width <= 0 is caught. text = "Whatever, it doesn't matter."