*** email-2.4.3/email/Header.py.orig Tue Oct 15 01:50:57 2002 --- email-2.4.3/email/Header.py Wed Dec 25 15:37:55 2002 *************** *** 4,9 **** --- 4,10 ---- """Header encoding and decoding functionality.""" import re + import string from types import StringType, UnicodeType import email.quopriMIME *************** ecre = re.compile(r''' *** 47,52 **** --- 48,56 ---- \?= # literal ?= ''', re.VERBOSE | re.IGNORECASE) + nonasciiword = '[^\t ]*[^\t !-~][^\t ]*' + nonasciiwords = re.compile('^('+nonasciiword+'(?:[\t ]'+nonasciiword+')*)') + asciiwords = re.compile('^([\t !-~]*[\t ]|[\t !-~]+$)') # Helpers *************** class Header: *** 399,404 **** --- 403,452 ---- joiner = NL + self._continuation_ws return joiner.join(chunks) + def _split_nonascii(self, s, charset): + if charset == '8bit': + return [(s, charset)] + elif charset == 'us-ascii': + return [(s, charset)] + elif not isinstance(charset, Charset): + charset = Charset(charset) + splittable = charset.to_splittable(s) + chunks = [] + while len(splittable): + # 'ASCII words' need not be encoded. Its charset may be us-ascii. + match = asciiwords.search(splittable) + if match: + splittable = splittable[match.end(1):] + words = match.group(1) + #if len(chunks): + # # prev. is 'non-ASCII words'. SPACE is preserved. + # words = string.lstrip(words) + if len(splittable): + # next is 'non-ASCII words'. SPACE is preserved. + words = string.rstrip(words) + encoded = charset.from_splittable(words, False) + chunks.append((encoded, USASCII)) + continue + # 'non-ASCII words' must be encoded, where 'non-ASCII words' + # contain: + # - non-ASCII string. + # - non-SPACE ASCII string(s) concatinated with non-ASCII string, + # as ASCII word and 'encoded-word' must be separated. + # - SPACE between 'non-ASCII words', as SPACE between + # 'encoded-word's is not displayed. + match = nonasciiwords.search(splittable) + if match: + splittable = splittable[match.end(1):] + words = match.group(1) + encoded = charset.from_splittable(words, False) + chunks.append((encoded, charset)) + continue + # Broken? + encoded = charset.from_splittable(splittable, False) + chunks.append((encoded, charset)) + break + return chunks + def encode(self): """Encode a message header into an RFC-compliant format. *************** class Header: *** 419,423 **** """ newchunks = [] for s, charset in self._chunks: ! newchunks += self._split(s, charset, True) return self._encode_chunks(newchunks) --- 467,473 ---- """ newchunks = [] for s, charset in self._chunks: ! chunks = self._split_nonascii(s, charset) ! for s, charset in chunks: ! newchunks += self._split(s, charset, True) return self._encode_chunks(newchunks)