| OLD | NEW |
| 1 # Copyright (C) 2001-2010 Python Software Foundation | 1 # Copyright (C) 2001-2010 Python Software Foundation |
| 2 # Author: Barry Warsaw | 2 # Author: Barry Warsaw |
| 3 # Contact: email-sig@python.org | 3 # Contact: email-sig@python.org |
| 4 | 4 |
| 5 """Classes to generate plain text from a message object tree.""" | 5 """Classes to generate plain text from a message object tree.""" |
| 6 | 6 |
| 7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] | 7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] |
| 8 | 8 |
| 9 import re | 9 import re |
| 10 import sys | 10 import sys |
| 11 import time | 11 import time |
| 12 import random | 12 import random |
| 13 import warnings | 13 import warnings |
| 14 | 14 |
| 15 from io import StringIO, BytesIO | 15 from io import StringIO, BytesIO |
| 16 from email import policy | 16 from email._policybase import compat32 |
| 17 from email.header import Header | 17 from email.header import Header |
| 18 from email.message import _has_surrogates | 18 from email.utils import _has_surrogates |
| 19 import email.charset as _charset | 19 import email.charset as _charset |
| 20 | 20 |
| 21 UNDERSCORE = '_' | 21 UNDERSCORE = '_' |
| 22 NL = '\n' # XXX: no longer used by the code below. | 22 NL = '\n' # XXX: no longer used by the code below. |
| 23 | 23 |
| 24 fcre = re.compile(r'^From ', re.MULTILINE) | 24 fcre = re.compile(r'^From ', re.MULTILINE) |
| 25 | 25 |
| 26 | 26 |
| 27 | 27 |
| 28 class Generator: | 28 class Generator: |
| 29 """Generates output from a Message object tree. | 29 """Generates output from a Message object tree. |
| 30 | 30 |
| 31 This basic generator writes the message to the given file object as plain | 31 This basic generator writes the message to the given file object as plain |
| 32 text. | 32 text. |
| 33 """ | 33 """ |
| 34 # | 34 # |
| 35 # Public interface | 35 # Public interface |
| 36 # | 36 # |
| 37 | 37 |
| 38 def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *, | 38 def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *, |
| 39 policy=policy.default): | 39 policy=None): |
| 40 """Create the generator for message flattening. | 40 """Create the generator for message flattening. |
| 41 | 41 |
| 42 outfp is the output file-like object for writing the message to. It | 42 outfp is the output file-like object for writing the message to. It |
| 43 must have a write() method. | 43 must have a write() method. |
| 44 | 44 |
| 45 Optional mangle_from_ is a flag that, when True (the default), escapes | 45 Optional mangle_from_ is a flag that, when True (the default), escapes |
| 46 From_ lines in the body of the message by putting a `>' in front of | 46 From_ lines in the body of the message by putting a `>' in front of |
| 47 them. | 47 them. |
| 48 | 48 |
| 49 Optional maxheaderlen specifies the longest length for a non-continued | 49 Optional maxheaderlen specifies the longest length for a non-continued |
| 50 header. When a header line is longer (in characters, with tabs | 50 header. When a header line is longer (in characters, with tabs |
| 51 expanded to 8 spaces) than maxheaderlen, the header will split as | 51 expanded to 8 spaces) than maxheaderlen, the header will split as |
| 52 defined in the Header class. Set maxheaderlen to zero to disable | 52 defined in the Header class. Set maxheaderlen to zero to disable |
| 53 header wrapping. The default is 78, as recommended (but not required) | 53 header wrapping. The default is 78, as recommended (but not required) |
| 54 by RFC 2822. | 54 by RFC 2822. |
| 55 | 55 |
| 56 The policy keyword specifies a policy object that controls a number of | 56 The policy keyword specifies a policy object that controls a number of |
| 57 aspects of the generator's operation. The default policy maintains | 57 aspects of the generator's operation. The default policy maintains |
| 58 backward compatibility. | 58 backward compatibility. |
| 59 | 59 |
| 60 """ | 60 """ |
| 61 self._fp = outfp | 61 self._fp = outfp |
| 62 self._mangle_from_ = mangle_from_ | 62 self._mangle_from_ = mangle_from_ |
| 63 self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else | 63 self.maxheaderlen = maxheaderlen |
| 64 policy.max_line_length) | |
| 65 self.policy = policy | 64 self.policy = policy |
| 66 | 65 |
| 67 def write(self, s): | 66 def write(self, s): |
| 68 # Just delegate to the file object | 67 # Just delegate to the file object |
| 69 self._fp.write(s) | 68 self._fp.write(s) |
| 70 | 69 |
| 71 def flatten(self, msg, unixfrom=False, linesep=None): | 70 def flatten(self, msg, unixfrom=False, linesep=None): |
| 72 r"""Print the message object tree rooted at msg to the output file | 71 r"""Print the message object tree rooted at msg to the output file |
| 73 specified when the Generator instance was created. | 72 specified when the Generator instance was created. |
| 74 | 73 |
| 75 unixfrom is a flag that forces the printing of a Unix From_ delimiter | 74 unixfrom is a flag that forces the printing of a Unix From_ delimiter |
| 76 before the first object in the message tree. If the original message | 75 before the first object in the message tree. If the original message |
| 77 has no From_ delimiter, a `standard' one is crafted. By default, this | 76 has no From_ delimiter, a `standard' one is crafted. By default, this |
| 78 is False to inhibit the printing of any From_ delimiter. | 77 is False to inhibit the printing of any From_ delimiter. |
| 79 | 78 |
| 80 Note that for subobjects, no From_ line is printed. | 79 Note that for subobjects, no From_ line is printed. |
| 81 | 80 |
| 82 linesep specifies the characters used to indicate a new line in | 81 linesep specifies the characters used to indicate a new line in |
| 83 the output. The default value is determined by the policy. | 82 the output. The default value is determined by the policy. |
| 84 | 83 |
| 85 """ | 84 """ |
| 86 # We use the _XXX constants for operating on data that comes directly | 85 # We use the _XXX constants for operating on data that comes directly |
| 87 # from the msg, and _encoded_XXX constants for operating on data that | 86 # from the msg, and _encoded_XXX constants for operating on data that |
| 88 # has already been converted (to bytes in the BytesGenerator) and | 87 # has already been converted (to bytes in the BytesGenerator) and |
| 89 # inserted into a temporary buffer. | 88 # inserted into a temporary buffer. |
| 90 self._NL = linesep if linesep is not None else self.policy.linesep | 89 policy = msg.policy if self.policy is None else self.policy |
| 90 if linesep is not None: |
| 91 policy = policy.clone(linesep=linesep) |
| 92 if self.maxheaderlen is not None: |
| 93 policy = policy.clone(max_line_length=self.maxheaderlen) |
| 94 self._NL = policy.linesep |
| 91 self._encoded_NL = self._encode(self._NL) | 95 self._encoded_NL = self._encode(self._NL) |
| 92 self._EMPTY = '' | 96 self._EMPTY = '' |
| 93 self._encoded_EMTPY = self._encode('') | 97 self._encoded_EMTPY = self._encode('') |
| 94 if unixfrom: | 98 p = self.policy |
| 95 ufrom = msg.get_unixfrom() | 99 try: |
| 96 if not ufrom: | 100 self.policy = policy |
| 97 ufrom = 'From nobody ' + time.ctime(time.time()) | 101 if unixfrom: |
| 98 self.write(ufrom + self._NL) | 102 ufrom = msg.get_unixfrom() |
| 99 self._write(msg) | 103 if not ufrom: |
| 104 ufrom = 'From nobody ' + time.ctime(time.time()) |
| 105 self.write(ufrom + self._NL) |
| 106 self._write(msg) |
| 107 finally: |
| 108 self.policy = p |
| 100 | 109 |
| 101 def clone(self, fp): | 110 def clone(self, fp): |
| 102 """Clone this generator with the exact same options.""" | 111 """Clone this generator with the exact same options.""" |
| 103 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) | 112 return self.__class__(fp, |
| 113 self._mangle_from_, |
| 114 None, # Use policy setting, which we've adjusted |
| 115 policy=self.policy) |
| 104 | 116 |
| 105 # | 117 # |
| 106 # Protected interface - undocumented ;/ | 118 # Protected interface - undocumented ;/ |
| 107 # | 119 # |
| 108 | 120 |
| 109 # Note that we use 'self.write' when what we are writing is coming from | 121 # Note that we use 'self.write' when what we are writing is coming from |
| 110 # the source, and self._fp.write when what we are writing is coming from a | 122 # the source, and self._fp.write when what we are writing is coming from a |
| 111 # buffer (because the Bytes subclass has already had a chance to transform | 123 # buffer (because the Bytes subclass has already had a chance to transform |
| 112 # the data in its write method in that case). This is an entirely | 124 # the data in its write method in that case). This is an entirely |
| 113 # pragmatic split determined by experiment; we could be more general by | 125 # pragmatic split determined by experiment; we could be more general by |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 168 meth = getattr(self, '_handle_' + generic, None) | 180 meth = getattr(self, '_handle_' + generic, None) |
| 169 if meth is None: | 181 if meth is None: |
| 170 meth = self._writeBody | 182 meth = self._writeBody |
| 171 meth(msg) | 183 meth(msg) |
| 172 | 184 |
| 173 # | 185 # |
| 174 # Default handlers | 186 # Default handlers |
| 175 # | 187 # |
| 176 | 188 |
| 177 def _write_headers(self, msg): | 189 def _write_headers(self, msg): |
| 178 for h, v in msg.items(): | 190 for h, v in msg.raw_items(): |
| 179 self.write('%s: ' % h) | 191 self.write(self.policy.fold(h, v)) |
| 180 if isinstance(v, Header): | |
| 181 self.write(v.encode( | |
| 182 maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL) | |
| 183 else: | |
| 184 # Header's got lots of smarts, so use it. | |
| 185 header = Header(v, maxlinelen=self._maxheaderlen, | |
| 186 header_name=h) | |
| 187 self.write(header.encode(linesep=self._NL)+self._NL) | |
| 188 # A blank line always separates headers from body | 192 # A blank line always separates headers from body |
| 189 self.write(self._NL) | 193 self.write(self._NL) |
| 190 | 194 |
| 191 # | 195 # |
| 192 # Handlers for writing types and subtypes | 196 # Handlers for writing types and subtypes |
| 193 # | 197 # |
| 194 | 198 |
| 195 def _handle_text(self, msg): | 199 def _handle_text(self, msg): |
| 196 payload = msg.get_payload() | 200 payload = msg.get_payload() |
| 197 if payload is None: | 201 if payload is None: |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 258 # close-delimiter transport-padding | 262 # close-delimiter transport-padding |
| 259 self.write(self._NL + '--' + boundary + '--') | 263 self.write(self._NL + '--' + boundary + '--') |
| 260 if msg.epilogue is not None: | 264 if msg.epilogue is not None: |
| 261 self.write(self._NL) | 265 self.write(self._NL) |
| 262 self.write(msg.epilogue) | 266 self.write(msg.epilogue) |
| 263 | 267 |
| 264 def _handle_multipart_signed(self, msg): | 268 def _handle_multipart_signed(self, msg): |
| 265 # The contents of signed parts has to stay unmodified in order to keep | 269 # The contents of signed parts has to stay unmodified in order to keep |
| 266 # the signature intact per RFC1847 2.1, so we disable header wrapping. | 270 # the signature intact per RFC1847 2.1, so we disable header wrapping. |
| 267 # RDM: This isn't enough to completely preserve the part, but it helps. | 271 # RDM: This isn't enough to completely preserve the part, but it helps. |
| 268 old_maxheaderlen = self._maxheaderlen | 272 p = self.policy |
| 273 self.policy = p.clone(max_line_length=0) |
| 269 try: | 274 try: |
| 270 self._maxheaderlen = 0 | |
| 271 self._handle_multipart(msg) | 275 self._handle_multipart(msg) |
| 272 finally: | 276 finally: |
| 273 self._maxheaderlen = old_maxheaderlen | 277 self.policy = p |
| 274 | 278 |
| 275 def _handle_message_delivery_status(self, msg): | 279 def _handle_message_delivery_status(self, msg): |
| 276 # We can't just write the headers directly to self's file object | 280 # We can't just write the headers directly to self's file object |
| 277 # because this will leave an extra newline between the last header | 281 # because this will leave an extra newline between the last header |
| 278 # block and the boundary. Sigh. | 282 # block and the boundary. Sigh. |
| 279 blocks = [] | 283 blocks = [] |
| 280 for part in msg.get_payload(): | 284 for part in msg.get_payload(): |
| 281 s = self._new_buffer() | 285 s = self._new_buffer() |
| 282 g = self.clone(s) | 286 g = self.clone(s) |
| 283 g.flatten(part, unixfrom=False, linesep=self._NL) | 287 g.flatten(part, unixfrom=False, linesep=self._NL) |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 340 def _compile_re(cls, s, flags): | 344 def _compile_re(cls, s, flags): |
| 341 return re.compile(s, flags) | 345 return re.compile(s, flags) |
| 342 | 346 |
| 343 | 347 |
| 344 class BytesGenerator(Generator): | 348 class BytesGenerator(Generator): |
| 345 """Generates a bytes version of a Message object tree. | 349 """Generates a bytes version of a Message object tree. |
| 346 | 350 |
| 347 Functionally identical to the base Generator except that the output is | 351 Functionally identical to the base Generator except that the output is |
| 348 bytes and not string. When surrogates were used in the input to encode | 352 bytes and not string. When surrogates were used in the input to encode |
| 349 bytes, these are decoded back to bytes for output. If the policy has | 353 bytes, these are decoded back to bytes for output. If the policy has |
| 350 must_be_7bit set true, then the message is transformed such that the | 354 cte_type set to 7bit, then the message is transformed such that the |
| 351 non-ASCII bytes are properly content transfer encoded, using the | 355 non-ASCII bytes are properly content transfer encoded, using the charset |
| 352 charset unknown-8bit. | 356 unknown-8bit. |
| 353 | 357 |
| 354 The outfp object must accept bytes in its write method. | 358 The outfp object must accept bytes in its write method. |
| 355 """ | 359 """ |
| 356 | 360 |
| 357 # Bytes versions of this constant for use in manipulating data from | 361 # Bytes versions of this constant for use in manipulating data from |
| 358 # the BytesIO buffer. | 362 # the BytesIO buffer. |
| 359 _encoded_EMPTY = b'' | 363 _encoded_EMPTY = b'' |
| 360 | 364 |
| 361 def write(self, s): | 365 def write(self, s): |
| 362 self._fp.write(s.encode('ascii', 'surrogateescape')) | 366 self._fp.write(s.encode('ascii', 'surrogateescape')) |
| 363 | 367 |
| 364 def _new_buffer(self): | 368 def _new_buffer(self): |
| 365 return BytesIO() | 369 return BytesIO() |
| 366 | 370 |
| 367 def _encode(self, s): | 371 def _encode(self, s): |
| 368 return s.encode('ascii') | 372 return s.encode('ascii') |
| 369 | 373 |
| 370 def _write_headers(self, msg): | 374 def _write_headers(self, msg): |
| 371 # This is almost the same as the string version, except for handling | 375 # This is almost the same as the string version, except for handling |
| 372 # strings with 8bit bytes. | 376 # strings with 8bit bytes. |
| 373 for h, v in msg._headers: | 377 for h, v in msg.raw_items(): |
| 374 self.write('%s: ' % h) | 378 self._fp.write(self.policy.fold_binary(h, v)) |
| 375 if isinstance(v, str): | |
| 376 if _has_surrogates(v): | |
| 377 if not self.policy.must_be_7bit: | |
| 378 # If we have raw 8bit data in a byte string, we have no
idea | |
| 379 # what the encoding is. There is no safe way to split t
his | |
| 380 # string. If it's ascii-subset, then we could do a norm
al | |
| 381 # ascii split, but if it's multibyte then we could break
the | |
| 382 # string. There's no way to know so the least harm seem
s to | |
| 383 # be to not split the string and risk it being too long. | |
| 384 self.write(v+NL) | |
| 385 continue | |
| 386 h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h) | |
| 387 else: | |
| 388 h = Header(v, header_name=h) | |
| 389 else: | |
| 390 # Assume it is a Header-like object. | |
| 391 h = v | |
| 392 self.write(h.encode(linesep=self._NL, | |
| 393 maxlinelen=self._maxheaderlen)+self._NL) | |
| 394 # A blank line always separates headers from body | 379 # A blank line always separates headers from body |
| 395 self.write(self._NL) | 380 self.write(self._NL) |
| 396 | 381 |
| 397 def _handle_text(self, msg): | 382 def _handle_text(self, msg): |
| 398 # If the string has surrogates the original source was bytes, so | 383 # If the string has surrogates the original source was bytes, so |
| 399 # just write it back out. | 384 # just write it back out. |
| 400 if msg._payload is None: | 385 if msg._payload is None: |
| 401 return | 386 return |
| 402 if _has_surrogates(msg._payload) and not self.policy.must_be_7bit: | 387 if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': |
| 403 self.write(msg._payload) | 388 self.write(msg._payload) |
| 404 else: | 389 else: |
| 405 super(BytesGenerator,self)._handle_text(msg) | 390 super(BytesGenerator,self)._handle_text(msg) |
| 406 | 391 |
| 407 @classmethod | 392 @classmethod |
| 408 def _compile_re(cls, s, flags): | 393 def _compile_re(cls, s, flags): |
| 409 return re.compile(s.encode('ascii'), flags) | 394 return re.compile(s.encode('ascii'), flags) |
| 410 | 395 |
| 411 | 396 |
| 412 | 397 |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 467 }, file=self) | 452 }, file=self) |
| 468 | 453 |
| 469 | 454 |
| 470 | 455 |
| 471 # Helper used by Generator._make_boundary | 456 # Helper used by Generator._make_boundary |
| 472 _width = len(repr(sys.maxsize-1)) | 457 _width = len(repr(sys.maxsize-1)) |
| 473 _fmt = '%%0%dd' % _width | 458 _fmt = '%%0%dd' % _width |
| 474 | 459 |
| 475 # Backward compatibility | 460 # Backward compatibility |
| 476 _make_boundary = Generator._make_boundary | 461 _make_boundary = Generator._make_boundary |
| OLD | NEW |