diff -r 53fc0c475260 Lib/textwrap.py --- a/Lib/textwrap.py Sun Jul 03 18:22:14 2011 -0700 +++ b/Lib/textwrap.py Sun Jul 03 22:10:21 2011 -0400 @@ -33,6 +33,11 @@ width (default: 70) the maximum width of wrapped lines (unless break_long_words is false) + width_func (default: len) + the function used to determine the width of a given string. For + default monospaced text, len(text) is the default, but the user + may provide a custom function if the font being used has different + sizes for each letter. initial_indent (default: "") string that will be prepended to the first line of wrapped output. Counts towards the line's width. @@ -61,6 +66,9 @@ compound words. drop_whitespace (default: true) Drop leading and trailing whitespace from lines. + beautiful (default: false) + Whether to run the text through _beautify, which redistributes + the wrapping so that the text is more aesthetically pleasing """ unicode_whitespace_trans = {} @@ -102,8 +110,11 @@ fix_sentence_endings=False, break_long_words=True, drop_whitespace=True, - break_on_hyphens=True): + break_on_hyphens=True, + width_function=len, + beautiful=False): self.width = width + self.width_func = width_function self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent self.expand_tabs = expand_tabs @@ -112,6 +123,7 @@ self.break_long_words = break_long_words self.drop_whitespace = drop_whitespace self.break_on_hyphens = break_on_hyphens + self.beautiful=beautiful # -- Private methods ----------------------------------------------- @@ -225,6 +237,10 @@ # from a stack of chucks. chunks.reverse() + # If self.beautiful is true, whitespace will be dropped in + # self._beautify, thus it should not be dropped here. + drop_whitespace = self.drop_whitespace and not self.beautiful + while chunks: # Start the list of chunks that will make up the current line. @@ -239,15 +255,15 @@ indent = self.initial_indent # Maximum width for this line. - width = self.width - len(indent) + width = self.width - self.width_func(indent) # First chunk on line is whitespace -- drop it, unless this # is the very beginning of the text (ie. no lines started yet). - if self.drop_whitespace and chunks[-1].strip() == '' and lines: + if drop_whitespace and chunks[-1].strip() == '' and lines: del chunks[-1] while chunks: - l = len(chunks[-1]) + l = self.width_func(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: @@ -260,11 +276,11 @@ # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: + if chunks and self.width_func(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) # If the last chunk on this line is all whitespace, drop it. - if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': + if drop_whitespace and cur_line and cur_line[-1].strip() == '': del cur_line[-1] # Convert current line back to a string and store it in list @@ -274,6 +290,85 @@ return lines + def _beautify(self, lines): + """_beautify(lines : [list]) -> [list] + + Clean up a paragraph of wrapped text by minimizing the space + at the ends of all the lines. The algorithm used in _wrap_chunks + employs the minimum length algorithm for text wrapping, which + puts as many words onto a line as possible. While quick, this + tends to create a ragged, less aesthetically-pleasing result. + This function goes back over the wrapped text in reverse and + better distributes the lines. It does so by calculating the "cost" + of a line, which is defined as the square of how much space is + left at the end of the line. + """ + changed = True + lines = map(self._split, lines) + + # The function makes decisions to change lines based on the cost + # difference between removing a word from one line and placing + # it on another. Thus whenever a line is changed, every line below + # it needs to be redistributed. So this loop goes over the lines + # until no more changes can be made. + while changed: + + # Reset changed to False so we don't get an infinite loop. + changed = False + + # Since raggedness appears mainly in the bottom lines, start + # from the bottom and move upward. + for lineno in xrange(len(lines) - 1, 0, -1): + line_here, line_above = lines[lineno], lines[lineno - 1] + + # Continue to move chunks from the line above (line_above) + # to this line (line_here) until we reach maximum efficiency. + while True: + cost_before = (self._line_cost(line_here) + + self._line_cost(line_above)) + + line_here = line_above[-1:] + line_here + line_above = line_above[:-1] + + cost_after = (self._line_cost(line_here) + + self._line_cost(line_above)) + + # Subtracting after from before means positive differences + # represent an increase in efficiency. + cost_diff = cost_before - cost_after + + # Does the new arrangement improve the cost? + if cost_diff > 0.0: + # Yes, put them back in the original list of lines. + lines[lineno] = line_here + lines[lineno - 1] = line_above + changed = True + else: + # No, we've reached maximum efficiency. + break + + # Drop whitespace on the beginning and ends of lines if specified. + if self.drop_whitespace: + return [''.join(line).strip() for line in lines] + else: + return [''.join(line) for line in lines] + + def _line_cost(self, string): + """_cost(string : [str/list]) -> [float] + + Calculate the cost of a line of text (either in string form + or a list of chunks) by squaring the amount of remaining + space on the line. + """ + # Allow for a list of chunks as input. + if isinstance(string, list): + string = ''.join(string) + + # Return infinity if the line goes beyond maximum width. + if self.width_func(string) > self.width: + return float('inf') + return float((self.width - self.width_func(string)) ** 2) + # -- Public interface ---------------------------------------------- @@ -290,7 +385,11 @@ chunks = self._split(text) if self.fix_sentence_endings: self._fix_sentence_endings(chunks) - return self._wrap_chunks(chunks) + lines = self._wrap_chunks(chunks) + + if self.beautiful: + lines = self._beautify(lines) + return lines def fill(self, text): """fill(text : string) -> string