diff --git a/Lib/email/header.py b/Lib/email/header.py --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -348,6 +348,7 @@ self._splitchars = splitchars self._lines = [] self._current_line = _Accumulator(headerlen) + self._fws = ' \t' def __str__(self): self.newline() @@ -377,7 +378,7 @@ # syntax; we just try to break on semi-colons, then commas, then # whitespace. Eventually, this should be pluggable. if charset.header_encoding is None: - for ch in self._splitchars: + for i, ch in enumerate(self._splitchars): if ch in string: break else: @@ -391,7 +392,7 @@ self._current_line.reset(self._continuation_ws) self._current_line.push(encoded_string) else: - self._ascii_split(string, ch) + self._ascii_split(string, i) return # Otherwise, we're doing either a Base64 or a quoted-printable # encoding which means we don't need to split the line on syntactic @@ -428,7 +429,7 @@ while True: yield self._maxlen - self._continuation_ws_len - def _ascii_split(self, string, ch): + def _ascii_split(self, string, chindex): holding = _Accumulator() # Split the line on the split character, preserving it. If the split # character is whitespace RFC 2822 $2.2.3 requires us to fold on the @@ -437,6 +438,11 @@ # (e.g. comma or semicolon), the folding should happen after the split # character. But then in that case, we need to add our own # continuation whitespace -- although won't that break unfolding? + if chindex >= len(self._splitchars): + # No more splitting possible, we'll just have to emit it as is. + self._current_line.push(string) + return + ch = self._splitchars[chindex] for part, splitpart, nextpart in _spliterator(ch, string): if not splitpart: # No splitpart means this is the last chunk. Put this part diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -750,6 +750,115 @@ Test""") + def test_long_run_with_semi_header_splitter(self): + # Issue 11492 + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'test@dom.ain' + msg['References'] = SPACE.join([''] * 10) + '; abc' + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: + + ; + abc + +Test""") + + def test_long_header_with_whitespace_runs(self): + # Issue XXXXX + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'test@dom.ain' + msg['References'] = SPACE.join([' '] * 10) + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: + + + +Test""") + + def test_splitter_split_on_punctuation_only_if_fws(self): + # Issue XXXXX + eq = self.ndiffAssertEqual + msg = Message() + msg['From'] = 'test@dom.ain' + msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but' + 'they;arenotlegal;fold,points') + msg.set_payload('Test') + sfp = StringIO() + g = Generator(sfp) + g.flatten(msg) + eq(sfp.getvalue(), """\ +From: test@dom.ain +References: + thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points + +Test""") + + def test_last_split_chunk_does_not_fit(self): + eq = self.ndiffAssertEqual + h = Header('Subject: the first part of this is short, but_the_second' + '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' + '_all_by_itself') + eq(h.encode(), """\ +Subject: the first part of this is short, + but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") + + def test_splittable_leading_char_followed_by_overlong_unsplitable(self): + eq = self.ndiffAssertEqual + h = Header(', but_the_second' + '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' + '_all_by_itself') + eq(h.encode(), """\ +, + but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") + + def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self): + eq = self.ndiffAssertEqual + h = Header(', , but_the_second' + '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line' + '_all_by_itself') + eq(h.encode(), """\ +, , + but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""") + + def test_trailing_splitable_on_overlong_unsplitable(self): + eq = self.ndiffAssertEqual + h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_' + 'be_on_a_line_all_by_itself;') + eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_" + "be_on_a_line_all_by_itself;") + + def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self): + eq = self.ndiffAssertEqual + h = Header('; ' + 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' + 'be_on_a_line_all_by_itself;') + eq(h.encode(), """\ +; + this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") + + def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self): + eq = self.ndiffAssertEqual + h = Header('this is a test where we need to have more than one line ' + 'before; our final line that is just too big to fit;; ' + 'this_part_does_not_fit_within_maxlinelen_and_thus_should_' + 'be_on_a_line_all_by_itself;') + eq(h.encode(), """\ +this is a test where we need to have more than one line before; + our final line that is just too big to fit;; + this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""") + def test_no_split_long_header(self): eq = self.ndiffAssertEqual hstr = 'References: ' + 'x' * 80