diff -r 06ce648ac6ac Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py Mon Mar 17 19:22:59 2014 +0100 +++ b/Lib/test/test_textwrap.py Tue Mar 18 22:59:58 2014 +0200 @@ -428,6 +428,37 @@ text = "aa \xe4\xe4-\xe4\xe4" self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"]) + def test_non_breaking_space(self): + text = 'This is a sentence with non-breaking\N{NO-BREAK SPACE}space.' + + self.check_wrap(text, 20, + ['This is a sentence', + 'with non-', + 'breaking\N{NO-BREAK SPACE}space.'], + break_on_hyphens=True) + + self.check_wrap(text, 20, + ['This is a sentence', + 'with', + 'non-breaking\N{NO-BREAK SPACE}space.'], + break_on_hyphens=False) + + def test_narrow_non_breaking_space(self): + text = 'This is a sentence with non-breaking'\ + '\N{NARROW NO-BREAK SPACE}space.' + + self.check_wrap(text, 20, + ['This is a sentence', + 'with non-', + 'breaking\N{NARROW NO-BREAK SPACE}space.'], + break_on_hyphens=True) + + self.check_wrap(text, 20, + ['This is a sentence', + 'with', + 'non-breaking\N{NARROW NO-BREAK SPACE}space.'], + break_on_hyphens=False) + class MaxLinesTestCase(BaseTestCase): text = "Hello there, how are you this fine day? I'm glad to hear it!" diff -r 06ce648ac6ac Lib/textwrap.py --- a/Lib/textwrap.py Mon Mar 17 19:22:59 2014 +0100 +++ b/Lib/textwrap.py Tue Mar 18 22:59:58 2014 +0200 @@ -80,15 +80,19 @@ # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + # any whitespace + (r'([%s]+|' % _whitespace) + + # hyphenated words + (r'[^%s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' % _whitespace) + + # em-dash + r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' + ) # This less funky little regex just split on recognized spaces. E.g. # "Hello there -- you goof-ball, use the -b option!" # splits into # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ - wordsep_simple_re = re.compile(r'(\s+)') + wordsep_simple_re = re.compile(r'([%s]+)' % _whitespace) # XXX this is not locale- or charset-aware -- string.lowercase # is US-ASCII only (and therefore English-only)