diff -r 19b2c54e5f09 Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py Wed Nov 12 10:23:44 2014 -0500 +++ b/Lib/test/test_textwrap.py Thu Nov 13 20:04:29 2014 +0200 @@ -184,6 +184,14 @@ What a mess! self.check_wrap(text, 42, ["this-is-a-useful-feature-for-reformatting-", "posts-from-tim-peters'ly"]) + expect = ("this-|is-|a-|useful-|feature-|for-|" + "reformatting-|posts-|from-|tim-|peters'ly").split('|') + self.check_wrap(text, 1, expect, break_long_words=False) + self.check_split(text, expect) + + self.check_split('e-mail', ['e-mail']) + self.check_split('Jelly-O', ['Jelly-O']) + self.check_split('half-a-crown', 'half-|a-|crown'.split('|')) def test_hyphenated_numbers(self): # Test that hyphenated numbers (eg. dates) are not broken like words. @@ -195,6 +203,7 @@ What a mess! 'released on 1994-02-15.']) self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.', 'Python 1.0.1 was released on 1994-02-15.']) + self.check_wrap(text, 1, text.split(), break_long_words=False) text = "I do all my shopping at 7-11." self.check_wrap(text, 25, ["I do all my shopping at", @@ -202,6 +211,7 @@ What a mess! self.check_wrap(text, 27, ["I do all my shopping at", "7-11."]) self.check_wrap(text, 29, ["I do all my shopping at 7-11."]) + self.check_wrap(text, 1, text.split(), break_long_words=False) def test_em_dash(self): # Test text with em-dashes @@ -326,6 +336,9 @@ What a mess! self.check_split("the ['wibble-wobble'] widget", ['the', ' ', "['wibble-", "wobble']", ' ', 'widget']) + self.check_split("what-d'you-call-it.", + "what-d'you-|call-|it.".split('|')) + def test_funky_parens (self): # Second part of SF bug #596434: long option strings inside # parentheses. diff -r 19b2c54e5f09 Lib/textwrap.py --- a/Lib/textwrap.py Wed Nov 12 10:23:44 2014 -0500 +++ b/Lib/textwrap.py Thu Nov 13 20:04:29 2014 +0200 @@ -79,10 +79,25 @@ class TextWrapper: # splits into # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). - wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + word_punct = r'[\w!"\'&.,?]' + letter = r'[^\d\W]' + wordsep_re = re.compile(r'''(?x) + ( # any whitespace + \s+ + | # em-dash between words + (?<=%(wp)s) -{2,} (?=\w) + | # word, possibly hyphenated + \S+? (?: + # hyphenated word + -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) + (?= %(lt)s -? %(lt)s) + | # end of word + (?=\s|\Z) + | # em-dash + (?<=%(wp)s) (?=-{2,}\w) + ) + )''' % {'wp': word_punct, 'lt': letter}) + del word_punct, letter # This less funky little regex just split on recognized spaces. E.g. # "Hello there -- you goof-ball, use the -b option!"