# HG changeset patch # User Kaarle Ritvanen # Date 1391535506 -7200 # Tue Feb 04 19:38:26 2014 +0200 # Node ID 867d5d7bec05c7c20035310132fc580ed183efca # Parent 854d05c13a8ec096e5f53b0ff38e60059d1d21f9 textwrap: Honor non-breaking spaces diff -r 854d05c13a8e -r 867d5d7bec05 Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py Tue Feb 04 18:18:27 2014 +0100 +++ b/Lib/test/test_textwrap.py Tue Feb 04 19:38:26 2014 +0200 @@ -428,6 +428,21 @@ text = "aa \xe4\xe4-\xe4\xe4" self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"]) + def test_non_breaking_space(self): + text = 'This is a sentence with non-breaking\xa0space.' + + self.check_wrap(text, 20, + ['This is a sentence', + 'with non-', + 'breaking\xa0space.'], + break_on_hyphens=True) + + self.check_wrap(text, 20, + ['This is a sentence', + 'with', + 'non-breaking\xa0space.'], + break_on_hyphens=False) + class MaxLinesTestCase(BaseTestCase): text = "Hello there, how are you this fine day? I'm glad to hear it!" diff -r 854d05c13a8e -r 867d5d7bec05 Lib/textwrap.py --- a/Lib/textwrap.py Tue Feb 04 18:18:27 2014 +0100 +++ b/Lib/textwrap.py Tue Feb 04 19:38:26 2014 +0200 @@ -80,15 +80,19 @@ # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + # any whitespace + (r'([%s]+|' % _whitespace) + + # hyphenated words + (r'[^%s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' % _whitespace) + + # em-dash + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' + ) # This less funky little regex just split on recognized spaces. E.g. # "Hello there -- you goof-ball, use the -b option!" # splits into # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ - wordsep_simple_re = re.compile(r'(\s+)') + wordsep_simple_re = re.compile(r'([%s]+)' % _whitespace) # XXX this is not locale- or charset-aware -- string.lowercase # is US-ASCII only (and therefore English-only)