Index: Lib/email/quoprimime.py =================================================================== --- Lib/email/quoprimime.py (revision 88281) +++ Lib/email/quoprimime.py (working copy) @@ -56,6 +56,7 @@ hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]') bqre = re.compile(r'[^ !-<>-~\t]') +bqre2 = re.compile(r'[^ !-<>-~\t\r\n]') @@ -109,6 +110,9 @@ def quote(c): return "=%02X" % ord(c) +def quote_match(m): + c = m.group(0)[0] + return "=%02X" % ord(c) def header_encode(header, charset="iso-8859-1", keep_eols=False, @@ -190,6 +194,12 @@ 76 characters). Long lines will have the `soft linefeed' quoted-printable character "=" appended to them, so the decoded text will be identical to the original text. + + This algorithm is considerably more efficient than the one included with + Python. The only difference I am aware of is that the Python algorithm will + output a line with length maxlinelen + 1 when the input line is exactly + maxlinelen long and ends in ' ' or '\t'. I consider this a bug in the + Python implementation. """ if not body: return body @@ -197,66 +207,45 @@ if not binary: body = fix_eols(body) - # BAW: We're accumulating the body text by string concatenation. That - # can't be very efficient, but I don't have time now to rewrite it. It - # just feels like this algorithm could be more efficient. - encoded_body = '' - lineno = -1 - # Preserve line endings here so we can check later to see an eol needs to - # be added to the output later. - lines = body.splitlines(1) - for line in lines: - # But strip off line-endings for processing this line. - if line.endswith(CRLF): - line = line[:-2] - elif line[-1] in CRLF: - line = line[:-1] + # quote speacial characters + body = bqre2.sub(quote_match, body) - lineno += 1 - encoded_line = '' - prev = None + eq_eol = '=' + eol + # leave space for the '=' at the end of a line + maxlinelen1 = maxlinelen - 1 + + encoded_body = [] + for line in body.splitlines(): + # break up the line into pieces no longer than maxlinelen - 1 + i, j = 0, maxlinelen1 linelen = len(line) - # Now we need to examine every character to see if it needs to be - # quopri encoded. BAW: again, string concatenation is inefficient. - for j in range(linelen): - c = line[j] - prev = c - if bqre.match(c): - c = quote(c) - elif j+1 == linelen: - # Check for whitespace at end of line; special case - if c not in ' \t': - encoded_line += c - prev = c - continue - # Check to see to see if the line has reached its maximum length - if len(encoded_line) + len(c) >= maxlinelen: - encoded_body += encoded_line + '=' + eol - encoded_line = '' - encoded_line += c - # Now at end of line.. - if prev and prev in ' \t': - # Special case for whitespace at end of file - if lineno + 1 == len(lines): - prev = quote(prev) - if len(encoded_line) + len(prev) > maxlinelen: - encoded_body += encoded_line + '=' + eol + prev - else: - encoded_body += encoded_line + prev - # Just normal whitespace at end of line - else: - encoded_body += encoded_line + prev + '=' + eol - encoded_line = '' - # Now look at the line we just finished and it has a line ending, we - # need to add eol to the end of the line. - if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF: - encoded_body += encoded_line + eol - else: - encoded_body += encoded_line - encoded_line = '' - return encoded_body + while j < linelen: + # make sure we don't break up an escape sequence + if line[j-2] == '=': + j = j - 2 + elif line[j - 1] == '=': + j = j - 1 + encoded_body.append(line[i:j] + '=') + i, j = j, j + maxlinelen1 + # handle rest of line, special case if line ends in whitespace + if line and line[-1] in ' \t': + # match Python implementation "bug" + #if i > 0 and linelen == i + 1: + # encoded_body[-1] = encoded_body[-1][:-1] + line[-1] + eq_eol + #else: + # encoded_body.append(line[i:] + eq_eol) + encoded_body.append(line[i:] + eq_eol) + else: + encoded_body.append(line[i:]) + # add back final newline if present + if body[-1] in CRLF: + encoded_body.append('') + + return eol.join(encoded_body) + + # For convenience and backwards compatibility w/ standard base64 module body_encode = encode encodestring = encode