# HG changeset patch
# User Kaarle Ritvanen <kaarle.ritvanen@datakunkku.fi>
# Date 1391535506 -7200
#      Tue Feb 04 19:38:26 2014 +0200
# Node ID 867d5d7bec05c7c20035310132fc580ed183efca
# Parent  854d05c13a8ec096e5f53b0ff38e60059d1d21f9
textwrap: Honor non-breaking spaces

diff -r 854d05c13a8e -r 867d5d7bec05 Lib/test/test_textwrap.py
--- a/Lib/test/test_textwrap.py	Tue Feb 04 18:18:27 2014 +0100
+++ b/Lib/test/test_textwrap.py	Tue Feb 04 19:38:26 2014 +0200
@@ -428,6 +428,21 @@
         text = "aa \xe4\xe4-\xe4\xe4"
         self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
 
+    def test_non_breaking_space(self):
+        text = 'This is a sentence with non-breaking\xa0space.'
+
+        self.check_wrap(text, 20,
+                        ['This is a sentence',
+                         'with non-',
+                         'breaking\xa0space.'],
+                        break_on_hyphens=True)
+
+        self.check_wrap(text, 20,
+                        ['This is a sentence',
+                         'with',
+                         'non-breaking\xa0space.'],
+                        break_on_hyphens=False)
+
 
 class MaxLinesTestCase(BaseTestCase):
     text = "Hello there, how are you this fine day?  I'm glad to hear it!"
diff -r 854d05c13a8e -r 867d5d7bec05 Lib/textwrap.py
--- a/Lib/textwrap.py	Tue Feb 04 18:18:27 2014 +0100
+++ b/Lib/textwrap.py	Tue Feb 04 19:38:26 2014 +0200
@@ -80,15 +80,19 @@
     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
     # (after stripping out empty strings).
     wordsep_re = re.compile(
-        r'(\s+|'                                  # any whitespace
-        r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|'   # hyphenated words
-        r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
+        # any whitespace
+        (r'([%s]+|' % _whitespace) +
+        # hyphenated words
+        (r'[^%s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' % _whitespace) +
+        # em-dash
+        r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))'
+    )
 
     # This less funky little regex just split on recognized spaces. E.g.
     #   "Hello there -- you goof-ball, use the -b option!"
     # splits into
     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
-    wordsep_simple_re = re.compile(r'(\s+)')
+    wordsep_simple_re = re.compile(r'([%s]+)' % _whitespace)
 
     # XXX this is not locale- or charset-aware -- string.lowercase
     # is US-ASCII only (and therefore English-only)