Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(2308)

Side by Side Diff: Lib/email/generator.py

Issue 14731: Enhance Policy framework in preparation for adding "eamil6" policy as provisional
Patch Set: Created 1 year ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/email/feedparser.py ('k') | Lib/email/message.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (C) 2001-2010 Python Software Foundation 1 # Copyright (C) 2001-2010 Python Software Foundation
2 # Author: Barry Warsaw 2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org 3 # Contact: email-sig@python.org
4 4
5 """Classes to generate plain text from a message object tree.""" 5 """Classes to generate plain text from a message object tree."""
6 6
7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] 7 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
8 8
9 import re 9 import re
10 import sys 10 import sys
11 import time 11 import time
12 import random 12 import random
13 import warnings 13 import warnings
14 14
15 from io import StringIO, BytesIO 15 from io import StringIO, BytesIO
16 from email import policy 16 from email._policybase import compat32
17 from email.header import Header 17 from email.header import Header
18 from email.message import _has_surrogates 18 from email.utils import _has_surrogates
19 import email.charset as _charset 19 import email.charset as _charset
20 20
21 UNDERSCORE = '_' 21 UNDERSCORE = '_'
22 NL = '\n' # XXX: no longer used by the code below. 22 NL = '\n' # XXX: no longer used by the code below.
23 23
24 fcre = re.compile(r'^From ', re.MULTILINE) 24 fcre = re.compile(r'^From ', re.MULTILINE)
25 25
26 26
27 27
28 class Generator: 28 class Generator:
29 """Generates output from a Message object tree. 29 """Generates output from a Message object tree.
30 30
31 This basic generator writes the message to the given file object as plain 31 This basic generator writes the message to the given file object as plain
32 text. 32 text.
33 """ 33 """
34 # 34 #
35 # Public interface 35 # Public interface
36 # 36 #
37 37
38 def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *, 38 def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
39 policy=policy.default): 39 policy=None):
40 """Create the generator for message flattening. 40 """Create the generator for message flattening.
41 41
42 outfp is the output file-like object for writing the message to. It 42 outfp is the output file-like object for writing the message to. It
43 must have a write() method. 43 must have a write() method.
44 44
45 Optional mangle_from_ is a flag that, when True (the default), escapes 45 Optional mangle_from_ is a flag that, when True (the default), escapes
46 From_ lines in the body of the message by putting a `>' in front of 46 From_ lines in the body of the message by putting a `>' in front of
47 them. 47 them.
48 48
49 Optional maxheaderlen specifies the longest length for a non-continued 49 Optional maxheaderlen specifies the longest length for a non-continued
50 header. When a header line is longer (in characters, with tabs 50 header. When a header line is longer (in characters, with tabs
51 expanded to 8 spaces) than maxheaderlen, the header will split as 51 expanded to 8 spaces) than maxheaderlen, the header will split as
52 defined in the Header class. Set maxheaderlen to zero to disable 52 defined in the Header class. Set maxheaderlen to zero to disable
53 header wrapping. The default is 78, as recommended (but not required) 53 header wrapping. The default is 78, as recommended (but not required)
54 by RFC 2822. 54 by RFC 2822.
55 55
56 The policy keyword specifies a policy object that controls a number of 56 The policy keyword specifies a policy object that controls a number of
57 aspects of the generator's operation. The default policy maintains 57 aspects of the generator's operation. The default policy maintains
58 backward compatibility. 58 backward compatibility.
59 59
60 """ 60 """
61 self._fp = outfp 61 self._fp = outfp
62 self._mangle_from_ = mangle_from_ 62 self._mangle_from_ = mangle_from_
63 self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else 63 self.maxheaderlen = maxheaderlen
64 policy.max_line_length)
65 self.policy = policy 64 self.policy = policy
66 65
67 def write(self, s): 66 def write(self, s):
68 # Just delegate to the file object 67 # Just delegate to the file object
69 self._fp.write(s) 68 self._fp.write(s)
70 69
71 def flatten(self, msg, unixfrom=False, linesep=None): 70 def flatten(self, msg, unixfrom=False, linesep=None):
72 r"""Print the message object tree rooted at msg to the output file 71 r"""Print the message object tree rooted at msg to the output file
73 specified when the Generator instance was created. 72 specified when the Generator instance was created.
74 73
75 unixfrom is a flag that forces the printing of a Unix From_ delimiter 74 unixfrom is a flag that forces the printing of a Unix From_ delimiter
76 before the first object in the message tree. If the original message 75 before the first object in the message tree. If the original message
77 has no From_ delimiter, a `standard' one is crafted. By default, this 76 has no From_ delimiter, a `standard' one is crafted. By default, this
78 is False to inhibit the printing of any From_ delimiter. 77 is False to inhibit the printing of any From_ delimiter.
79 78
80 Note that for subobjects, no From_ line is printed. 79 Note that for subobjects, no From_ line is printed.
81 80
82 linesep specifies the characters used to indicate a new line in 81 linesep specifies the characters used to indicate a new line in
83 the output. The default value is determined by the policy. 82 the output. The default value is determined by the policy.
84 83
85 """ 84 """
86 # We use the _XXX constants for operating on data that comes directly 85 # We use the _XXX constants for operating on data that comes directly
87 # from the msg, and _encoded_XXX constants for operating on data that 86 # from the msg, and _encoded_XXX constants for operating on data that
88 # has already been converted (to bytes in the BytesGenerator) and 87 # has already been converted (to bytes in the BytesGenerator) and
89 # inserted into a temporary buffer. 88 # inserted into a temporary buffer.
90 self._NL = linesep if linesep is not None else self.policy.linesep 89 policy = msg.policy if self.policy is None else self.policy
90 if linesep is not None:
91 policy = policy.clone(linesep=linesep)
92 if self.maxheaderlen is not None:
93 policy = policy.clone(max_line_length=self.maxheaderlen)
94 self._NL = policy.linesep
91 self._encoded_NL = self._encode(self._NL) 95 self._encoded_NL = self._encode(self._NL)
92 self._EMPTY = '' 96 self._EMPTY = ''
93 self._encoded_EMTPY = self._encode('') 97 self._encoded_EMTPY = self._encode('')
94 if unixfrom: 98 p = self.policy
95 ufrom = msg.get_unixfrom() 99 try:
96 if not ufrom: 100 self.policy = policy
97 ufrom = 'From nobody ' + time.ctime(time.time()) 101 if unixfrom:
98 self.write(ufrom + self._NL) 102 ufrom = msg.get_unixfrom()
99 self._write(msg) 103 if not ufrom:
104 ufrom = 'From nobody ' + time.ctime(time.time())
105 self.write(ufrom + self._NL)
106 self._write(msg)
107 finally:
108 self.policy = p
100 109
101 def clone(self, fp): 110 def clone(self, fp):
102 """Clone this generator with the exact same options.""" 111 """Clone this generator with the exact same options."""
103 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) 112 return self.__class__(fp,
113 self._mangle_from_,
114 None, # Use policy setting, which we've adjusted
115 policy=self.policy)
104 116
105 # 117 #
106 # Protected interface - undocumented ;/ 118 # Protected interface - undocumented ;/
107 # 119 #
108 120
109 # Note that we use 'self.write' when what we are writing is coming from 121 # Note that we use 'self.write' when what we are writing is coming from
110 # the source, and self._fp.write when what we are writing is coming from a 122 # the source, and self._fp.write when what we are writing is coming from a
111 # buffer (because the Bytes subclass has already had a chance to transform 123 # buffer (because the Bytes subclass has already had a chance to transform
112 # the data in its write method in that case). This is an entirely 124 # the data in its write method in that case). This is an entirely
113 # pragmatic split determined by experiment; we could be more general by 125 # pragmatic split determined by experiment; we could be more general by
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 meth = getattr(self, '_handle_' + generic, None) 180 meth = getattr(self, '_handle_' + generic, None)
169 if meth is None: 181 if meth is None:
170 meth = self._writeBody 182 meth = self._writeBody
171 meth(msg) 183 meth(msg)
172 184
173 # 185 #
174 # Default handlers 186 # Default handlers
175 # 187 #
176 188
177 def _write_headers(self, msg): 189 def _write_headers(self, msg):
178 for h, v in msg.items(): 190 for h, v in msg.raw_items():
179 self.write('%s: ' % h) 191 self.write(self.policy.fold(h, v))
180 if isinstance(v, Header):
181 self.write(v.encode(
182 maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
183 else:
184 # Header's got lots of smarts, so use it.
185 header = Header(v, maxlinelen=self._maxheaderlen,
186 header_name=h)
187 self.write(header.encode(linesep=self._NL)+self._NL)
188 # A blank line always separates headers from body 192 # A blank line always separates headers from body
189 self.write(self._NL) 193 self.write(self._NL)
190 194
191 # 195 #
192 # Handlers for writing types and subtypes 196 # Handlers for writing types and subtypes
193 # 197 #
194 198
195 def _handle_text(self, msg): 199 def _handle_text(self, msg):
196 payload = msg.get_payload() 200 payload = msg.get_payload()
197 if payload is None: 201 if payload is None:
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
258 # close-delimiter transport-padding 262 # close-delimiter transport-padding
259 self.write(self._NL + '--' + boundary + '--') 263 self.write(self._NL + '--' + boundary + '--')
260 if msg.epilogue is not None: 264 if msg.epilogue is not None:
261 self.write(self._NL) 265 self.write(self._NL)
262 self.write(msg.epilogue) 266 self.write(msg.epilogue)
263 267
264 def _handle_multipart_signed(self, msg): 268 def _handle_multipart_signed(self, msg):
265 # The contents of signed parts has to stay unmodified in order to keep 269 # The contents of signed parts has to stay unmodified in order to keep
266 # the signature intact per RFC1847 2.1, so we disable header wrapping. 270 # the signature intact per RFC1847 2.1, so we disable header wrapping.
267 # RDM: This isn't enough to completely preserve the part, but it helps. 271 # RDM: This isn't enough to completely preserve the part, but it helps.
268 old_maxheaderlen = self._maxheaderlen 272 p = self.policy
273 self.policy = p.clone(max_line_length=0)
269 try: 274 try:
270 self._maxheaderlen = 0
271 self._handle_multipart(msg) 275 self._handle_multipart(msg)
272 finally: 276 finally:
273 self._maxheaderlen = old_maxheaderlen 277 self.policy = p
274 278
275 def _handle_message_delivery_status(self, msg): 279 def _handle_message_delivery_status(self, msg):
276 # We can't just write the headers directly to self's file object 280 # We can't just write the headers directly to self's file object
277 # because this will leave an extra newline between the last header 281 # because this will leave an extra newline between the last header
278 # block and the boundary. Sigh. 282 # block and the boundary. Sigh.
279 blocks = [] 283 blocks = []
280 for part in msg.get_payload(): 284 for part in msg.get_payload():
281 s = self._new_buffer() 285 s = self._new_buffer()
282 g = self.clone(s) 286 g = self.clone(s)
283 g.flatten(part, unixfrom=False, linesep=self._NL) 287 g.flatten(part, unixfrom=False, linesep=self._NL)
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 def _compile_re(cls, s, flags): 344 def _compile_re(cls, s, flags):
341 return re.compile(s, flags) 345 return re.compile(s, flags)
342 346
343 347
344 class BytesGenerator(Generator): 348 class BytesGenerator(Generator):
345 """Generates a bytes version of a Message object tree. 349 """Generates a bytes version of a Message object tree.
346 350
347 Functionally identical to the base Generator except that the output is 351 Functionally identical to the base Generator except that the output is
348 bytes and not string. When surrogates were used in the input to encode 352 bytes and not string. When surrogates were used in the input to encode
349 bytes, these are decoded back to bytes for output. If the policy has 353 bytes, these are decoded back to bytes for output. If the policy has
350 must_be_7bit set true, then the message is transformed such that the 354 cte_type set to 7bit, then the message is transformed such that the
351 non-ASCII bytes are properly content transfer encoded, using the 355 non-ASCII bytes are properly content transfer encoded, using the charset
352 charset unknown-8bit. 356 unknown-8bit.
353 357
354 The outfp object must accept bytes in its write method. 358 The outfp object must accept bytes in its write method.
355 """ 359 """
356 360
357 # Bytes versions of this constant for use in manipulating data from 361 # Bytes versions of this constant for use in manipulating data from
358 # the BytesIO buffer. 362 # the BytesIO buffer.
359 _encoded_EMPTY = b'' 363 _encoded_EMPTY = b''
360 364
361 def write(self, s): 365 def write(self, s):
362 self._fp.write(s.encode('ascii', 'surrogateescape')) 366 self._fp.write(s.encode('ascii', 'surrogateescape'))
363 367
364 def _new_buffer(self): 368 def _new_buffer(self):
365 return BytesIO() 369 return BytesIO()
366 370
367 def _encode(self, s): 371 def _encode(self, s):
368 return s.encode('ascii') 372 return s.encode('ascii')
369 373
370 def _write_headers(self, msg): 374 def _write_headers(self, msg):
371 # This is almost the same as the string version, except for handling 375 # This is almost the same as the string version, except for handling
372 # strings with 8bit bytes. 376 # strings with 8bit bytes.
373 for h, v in msg._headers: 377 for h, v in msg.raw_items():
374 self.write('%s: ' % h) 378 self._fp.write(self.policy.fold_binary(h, v))
375 if isinstance(v, str):
376 if _has_surrogates(v):
377 if not self.policy.must_be_7bit:
378 # If we have raw 8bit data in a byte string, we have no idea
379 # what the encoding is. There is no safe way to split t his
380 # string. If it's ascii-subset, then we could do a norm al
381 # ascii split, but if it's multibyte then we could break the
382 # string. There's no way to know so the least harm seem s to
383 # be to not split the string and risk it being too long.
384 self.write(v+NL)
385 continue
386 h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
387 else:
388 h = Header(v, header_name=h)
389 else:
390 # Assume it is a Header-like object.
391 h = v
392 self.write(h.encode(linesep=self._NL,
393 maxlinelen=self._maxheaderlen)+self._NL)
394 # A blank line always separates headers from body 379 # A blank line always separates headers from body
395 self.write(self._NL) 380 self.write(self._NL)
396 381
397 def _handle_text(self, msg): 382 def _handle_text(self, msg):
398 # If the string has surrogates the original source was bytes, so 383 # If the string has surrogates the original source was bytes, so
399 # just write it back out. 384 # just write it back out.
400 if msg._payload is None: 385 if msg._payload is None:
401 return 386 return
402 if _has_surrogates(msg._payload) and not self.policy.must_be_7bit: 387 if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
403 self.write(msg._payload) 388 self.write(msg._payload)
404 else: 389 else:
405 super(BytesGenerator,self)._handle_text(msg) 390 super(BytesGenerator,self)._handle_text(msg)
406 391
407 @classmethod 392 @classmethod
408 def _compile_re(cls, s, flags): 393 def _compile_re(cls, s, flags):
409 return re.compile(s.encode('ascii'), flags) 394 return re.compile(s.encode('ascii'), flags)
410 395
411 396
412 397
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
467 }, file=self) 452 }, file=self)
468 453
469 454
470 455
471 # Helper used by Generator._make_boundary 456 # Helper used by Generator._make_boundary
472 _width = len(repr(sys.maxsize-1)) 457 _width = len(repr(sys.maxsize-1))
473 _fmt = '%%0%dd' % _width 458 _fmt = '%%0%dd' % _width
474 459
475 # Backward compatibility 460 # Backward compatibility
476 _make_boundary = Generator._make_boundary 461 _make_boundary = Generator._make_boundary
OLDNEW
« no previous file with comments | « Lib/email/feedparser.py ('k') | Lib/email/message.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld cbc36f91f3f7