diff -r 669d6b5c1734 Lib/test/test_textwrap.py --- a/Lib/test/test_textwrap.py Sat Jul 18 16:31:29 2015 -0700 +++ b/Lib/test/test_textwrap.py Sun Jul 19 02:40:18 2015 +0000 @@ -443,6 +443,10 @@ Did you say "supercalifragilisticexpialidocious?" How *do* you spell that odd word, anyways? ''' + self.text_cjk = '''\ +Did you say "いろはにほへとちりぬるをいろはにほ?" +How りぬ るをいろはにほり ぬるは, anyways? +'''.decode('utf-8') def test_break_long(self): # Wrap text with long words and lots of punctuation @@ -455,6 +459,14 @@ self.check_wrap(self.text, 50, ['Did you say "supercalifragilisticexpialidocious?"', 'How *do* you spell that odd word, anyways?']) + self.check_wrap(self.text_cjk, 30, + ['Did you say "いろはにほへとち'.decode('utf-8'), + 'りぬるをいろはにほ?" How りぬ'.decode('utf-8'), + 'るをいろはにほり ぬるは,'.decode('utf-8'), + 'anyways?']) + self.check_wrap(self.text_cjk, 50, + ['Did you say "いろはにほへとちりぬるをいろはにほ?"'.decode('utf-8'), + 'How りぬ るをいろはにほり ぬるは, anyways?'.decode('utf-8')]) # SF bug 797650. Prevent an infinite loop by making sure that at # least one character gets split off on every pass. diff -r 669d6b5c1734 Lib/textwrap.py --- a/Lib/textwrap.py Sat Jul 18 16:31:29 2015 -0700 +++ b/Lib/textwrap.py Sun Jul 19 02:40:18 2015 +0000 @@ -7,7 +7,7 @@ __revision__ = "$Id$" -import string, re +import string, re, unicodedata try: _unicode = unicode @@ -225,8 +225,9 @@ # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: - cur_line.append(reversed_chunks[-1][:space_left]) - reversed_chunks[-1] = reversed_chunks[-1][space_left:] + chunk_start, chunk_end = slice_cjk(reversed_chunks[-1], space_left) + cur_line.append(chunk_start) + reversed_chunks[-1] = chunk_end # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- @@ -283,7 +284,7 @@ del chunks[-1] while chunks: - l = len(chunks[-1]) + l = len_cjk(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: @@ -296,7 +297,7 @@ # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: + if chunks and len_cjk(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) # If the last chunk on this line is all whitespace, drop it. @@ -419,6 +420,31 @@ text = re.sub(r'(?m)^' + margin, '', text) return text + +# -- CJK support ------------------------------------------------------ + +def len_cjk(text): + if isinstance(text, str): + return len(text) + l = 0 + for char in text: + if unicodedata.east_asian_width(unicode(char)) in ('F', 'W'): + l = l + 2 + else: + l = l + 1 + return l + +def slice_cjk(text, space_left): + if isinstance(text, str): + return text[:space_left], text[space_left:] + i = 1 + while len_cjk(text[:i]) <= space_left: + # <= and index i-1 + # to catch the last double length char of odd line + i = i + 1 + return text[:i-1], text[i-1:] + + if __name__ == "__main__": #print dedent("\tfoo\n\tbar") #print dedent(" \thello there\n \t how are you?") diff -r 669d6b5c1734 Misc/ACKS --- a/Misc/ACKS Sat Jul 18 16:31:29 2015 -0700 +++ b/Misc/ACKS Sun Jul 19 02:40:18 2015 +0000 @@ -452,6 +452,7 @@ Santiago Gala Yitzchak Gale Matthew Gallagher +Florent Gallaire Quentin Gallet-Gilles Riccardo Attilio Galli Raymund Galvin