Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(11108)

Side by Side Diff: Lib/textwrap.py

Issue 20491: textwrap: Non-breaking space not honored
Patch Set: Created 3 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/test/test_textwrap.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 """Text wrapping and filling. 1 """Text wrapping and filling.
2 """ 2 """
3 3
4 # Copyright (C) 1999-2001 Gregory P. Ward. 4 # Copyright (C) 1999-2001 Gregory P. Ward.
5 # Copyright (C) 2002, 2003 Python Software Foundation. 5 # Copyright (C) 2002, 2003 Python Software Foundation.
6 # Written by Greg Ward <gward@python.net> 6 # Written by Greg Ward <gward@python.net>
7 7
8 import re 8 import re
9 9
10 __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] 10 __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
11 11
12 # Hardcode the recognized whitespace characters to the US-ASCII 12 # Hardcode the recognized whitespace characters to the US-ASCII
13 # whitespace characters. The main reason for doing this is that in 13 # whitespace characters. The main reason for doing this is that
14 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales 14 # some Unicode spaces (like \u00a0) are non-breaking whitespaces.
15 # that character winds up in string.whitespace. Respecting
16 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the
17 # same as any other whitespace char, which is clearly wrong (it's a
18 # *non-breaking* space), 2) possibly cause problems with Unicode,
19 # since 0xa0 is not in range(128).
20 _whitespace = '\t\n\x0b\x0c\r ' 15 _whitespace = '\t\n\x0b\x0c\r '
21 16
22 class TextWrapper: 17 class TextWrapper:
23 """ 18 """
24 Object for wrapping/filling text. The public interface consists of 19 Object for wrapping/filling text. The public interface consists of
25 the wrap() and fill() methods; the other methods are just there for 20 the wrap() and fill() methods; the other methods are just there for
26 subclasses to override in order to tweak the default behaviour. 21 subclasses to override in order to tweak the default behaviour.
27 If you want to completely replace the main wrapping algorithm, 22 If you want to completely replace the main wrapping algorithm,
28 you'll probably have to override _wrap_chunks(). 23 you'll probably have to override _wrap_chunks().
29 24
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 unicode_whitespace_trans[ord(x)] = uspace 69 unicode_whitespace_trans[ord(x)] = uspace
75 70
76 # This funky little regex is just the trick for splitting 71 # This funky little regex is just the trick for splitting
77 # text up into word-wrappable chunks. E.g. 72 # text up into word-wrappable chunks. E.g.
78 # "Hello there -- you goof-ball, use the -b option!" 73 # "Hello there -- you goof-ball, use the -b option!"
79 # splits into 74 # splits into
80 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 75 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
81 # (after stripping out empty strings). 76 # (after stripping out empty strings).
82 word_punct = r'[\w!"\'&.,?]' 77 word_punct = r'[\w!"\'&.,?]'
83 letter = r'[^\d\W]' 78 letter = r'[^\d\W]'
79 whitespace = r'[%s]' % re.escape(_whitespace)
80 nowhitespace = '[^' + whitespace[1:]
84 wordsep_re = re.compile(r''' 81 wordsep_re = re.compile(r'''
85 ( # any whitespace 82 ( # any whitespace
86 \s+ 83 %(ws)s+
87 | # em-dash between words 84 | # em-dash between words
88 (?<=%(wp)s) -{2,} (?=\w) 85 (?<=%(wp)s) -{2,} (?=\w)
89 | # word, possibly hyphenated 86 | # word, possibly hyphenated
90 \S+? (?: 87 %(nws)s+? (?:
91 # hyphenated word 88 # hyphenated word
92 -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) 89 -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
93 (?= %(lt)s -? %(lt)s) 90 (?= %(lt)s -? %(lt)s)
94 | # end of word 91 | # end of word
95 (?=\s|\Z) 92 (?=%(ws)s|\Z)
96 | # em-dash 93 | # em-dash
97 (?<=%(wp)s) (?=-{2,}\w) 94 (?<=%(wp)s) (?=-{2,}\w)
98 ) 95 )
99 )''' % {'wp': word_punct, 'lt': letter}, re.VERBOSE) 96 )''' % {'wp': word_punct, 'lt': letter,
100 del word_punct, letter 97 'ws': whitespace, 'nws': nowhitespace},
98 re.VERBOSE)
99 del word_punct, letter, nowhitespace
101 100
102 # This less funky little regex just split on recognized spaces. E.g. 101 # This less funky little regex just split on recognized spaces. E.g.
103 # "Hello there -- you goof-ball, use the -b option!" 102 # "Hello there -- you goof-ball, use the -b option!"
104 # splits into 103 # splits into
105 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ 104 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
106 wordsep_simple_re = re.compile(r'(\s+)') 105 wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
106 del whitespace
107 107
108 # XXX this is not locale- or charset-aware -- string.lowercase 108 # XXX this is not locale- or charset-aware -- string.lowercase
109 # is US-ASCII only (and therefore English-only) 109 # is US-ASCII only (and therefore English-only)
110 sentence_end_re = re.compile(r'[a-z]' # lowercase letter 110 sentence_end_re = re.compile(r'[a-z]' # lowercase letter
111 r'[\.\!\?]' # sentence-ending punct. 111 r'[\.\!\?]' # sentence-ending punct.
112 r'[\"\']?' # optional end-of-quote 112 r'[\"\']?' # optional end-of-quote
113 r'\Z') # end of chunk 113 r'\Z') # end of chunk
114
115 114
116 def __init__(self, 115 def __init__(self,
117 width=70, 116 width=70,
118 initial_indent="", 117 initial_indent="",
119 subsequent_indent="", 118 subsequent_indent="",
120 expand_tabs=True, 119 expand_tabs=True,
121 replace_whitespace=True, 120 replace_whitespace=True,
122 fix_sentence_endings=False, 121 fix_sentence_endings=False,
123 break_long_words=True, 122 break_long_words=True,
124 drop_whitespace=True, 123 drop_whitespace=True,
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after
480 def prefixed_lines(): 479 def prefixed_lines():
481 for line in text.splitlines(True): 480 for line in text.splitlines(True):
482 yield (prefix + line if predicate(line) else line) 481 yield (prefix + line if predicate(line) else line)
483 return ''.join(prefixed_lines()) 482 return ''.join(prefixed_lines())
484 483
485 484
486 if __name__ == "__main__": 485 if __name__ == "__main__":
487 #print dedent("\tfoo\n\tbar") 486 #print dedent("\tfoo\n\tbar")
488 #print dedent(" \thello there\n \t how are you?") 487 #print dedent(" \thello there\n \t how are you?")
489 print(dedent("Hello there.\n This is indented.")) 488 print(dedent("Hello there.\n This is indented."))
OLDNEW
« no previous file with comments | « Lib/test/test_textwrap.py ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+