diff -r ab162f925761 Lib/textwrap.py --- a/Lib/textwrap.py Mon Jul 11 01:39:35 2011 +0200 +++ b/Lib/textwrap.py Mon Jul 11 22:06:25 2011 -0400 @@ -61,6 +61,9 @@ compound words. drop_whitespace (default: true) Drop leading and trailing whitespace from lines. + beautiful (default: false) + Whether to run the text through _beautify, which redistributes + the wrapping so that the text is more aesthetically pleasing """ unicode_whitespace_trans = {} @@ -102,7 +105,8 @@ fix_sentence_endings=False, break_long_words=True, drop_whitespace=True, - break_on_hyphens=True): + break_on_hyphens=True, + beautiful=False): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -112,6 +116,7 @@ self.break_long_words = break_long_words self.drop_whitespace = drop_whitespace self.break_on_hyphens = break_on_hyphens + self.beautiful=beautiful # -- Private methods ----------------------------------------------- @@ -225,6 +230,10 @@ # from a stack of chucks. chunks.reverse() + # If self.beautiful is true, whitespace will be dropped in + # self._beautify, thus it should not be dropped here. + drop_whitespace = self.drop_whitespace and not self.beautiful + while chunks: # Start the list of chunks that will make up the current line. @@ -243,7 +252,7 @@ # First chunk on line is whitespace -- drop it, unless this # is the very beginning of the text (ie. no lines started yet). - if self.drop_whitespace and chunks[-1].strip() == '' and lines: + if drop_whitespace and chunks[-1].strip() == '' and lines: del chunks[-1] while chunks: @@ -264,7 +273,7 @@ self._handle_long_word(chunks, cur_line, cur_len, width) # If the last chunk on this line is all whitespace, drop it. - if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': + if drop_whitespace and cur_line and cur_line[-1].strip() == '': del cur_line[-1] # Convert current line back to a string and store it in list @@ -274,6 +283,84 @@ return lines + def _beautify(self, lines): + """_beautify(lines : [list]) -> [list] + + Clean up a paragraph of wrapped text by minimizing the space + at the ends of all the lines. The algorithm used in _wrap_chunks + employs the minimum length algorithm for text wrapping, which + puts as many words onto a line as possible. While quick, this + tends to create a ragged, less aesthetically-pleasing result. + This function goes back over the wrapped text in reverse and + better distributes the lines. It does so by calculating the "cost" + of a line, which is defined as the square of how much space is + left at the end of the line. + """ + changed = True + lines = map(self._split, lines) + + # The function makes decisions to change lines based on the cost + # difference between removing a word from one line and placing + # it on another. Thus whenever a line is changed, every line below + # it needs to be redistributed. So this loop goes over the lines + # until no more changes can be made. + while changed: + + # Reset changed to False so we don't get an infinite loop. + changed = False + + # Since raggedness appears mainly in the bottom lines, start + # from the bottom and move upward. + for lineno in range(len(lines) - 1, 0, -1): + line_here, line_above = lines[lineno], lines[lineno - 1] + + # Continue to move chunks from the line above (line_above) + # to this line (line_here) until we reach maximum efficiency. + while True: + cost_before = (self._line_cost(line_here) + + self._line_cost(line_above)) + + line_here = line_above[-1:] + line_here + line_above = line_above[:-1] + + cost_after = (self._line_cost(line_here) + + self._line_cost(line_above)) + + # Subtracting after from before means positive differences + # represent an increase in efficiency. + cost_diff = cost_before - cost_after + + # Does the new arrangement improve the cost? + if cost_diff > 0.0: + # Yes, put them back in the original list of lines. + lines[lineno] = line_here + lines[lineno - 1] = line_above + changed = True + else: + # No, we've reached maximum efficiency. + break + + # Drop whitespace on the beginning and ends of lines if specified. + if self.drop_whitespace: + return [''.join(line).strip() for line in lines] + else: + return [''.join(line) for line in lines] + + def _line_cost(self, string): + """_cost(string : [str/list]) -> [float] + + Calculate the cost of a line of text (either in string form + or a list of chunks) by squaring the amount of remaining + space on the line. + """ + # Allow for a list of chunks as input. + if isinstance(string, list): + string = ''.join(string) + + # Return infinity if the line goes beyond maximum width. + if len(string) > self.width: + return float('inf') + return float((self.width - len(string)) ** 2) # -- Public interface ---------------------------------------------- @@ -290,7 +377,11 @@ chunks = self._split(text) if self.fix_sentence_endings: self._fix_sentence_endings(chunks) - return self._wrap_chunks(chunks) + lines = self._wrap_chunks(chunks) + + if self.beautiful: + lines = self._beautify(lines) + return lines def fill(self, text): """fill(text : string) -> string