diff --git a/Lib/textwrap.py b/Lib/textwrap.py --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -79,10 +79,22 @@ class TextWrapper: # splits into # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! # (after stripping out empty strings). + word_punct = r'[\w\!\"\'\&\.\,\?]' wordsep_re = re.compile( - r'(\s+|' # any whitespace - r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words - r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash + r'(' + # any whitespace + r'\s+' + # -- + r'|(?<=\s)-{{2,}}(?=\s)' + # -- + r'|(?<={wp})-{{2,}}(?=\w)' + # word, possibly punctuated or hyphenated + r'|\S*?\w{wp}*(?:-\d\w+)*(?:[^-\w\s]+|-(?=\w)|--+(?:[^-\w]*$))?' + # catch-all for other stuff, until next space + r'|\S+' + r')' + .format(wp=word_punct)) + del word_punct # This less funky little regex just split on recognized spaces. E.g. # "Hello there -- you goof-ball, use the -b option!"