diff -r 7520f1bf0a81 -r b22698463737 .hgtags --- a/.hgtags Sun Jul 17 22:50:12 2011 -0500 +++ b/.hgtags Tue Jul 19 13:24:56 2011 -0400 @@ -91,3 +91,5 @@ cfa9364997c7f2e67b9cbb45c3a5fa3bba4e4999 v3.2.1rc1 5df549718fb4841ff521fe051f6b54f290fad5d8 v3.2.1rc2 ac1f7e5c05104d557d5acd922e95625ba5d1fe10 v3.2.1 +9ea05c98fa413b1f9476167ce5f08786a04923b8 policy_for_review +0526a3fbf8a6e118790578387e7c8e18feb3642f policy_final diff -r 7520f1bf0a81 -r b22698463737 Doc/library/email.errors.rst --- a/Doc/library/email.errors.rst Sun Jul 17 22:50:12 2011 -0500 +++ b/Doc/library/email.errors.rst Tue Jul 19 13:24:56 2011 -0400 @@ -58,32 +58,109 @@ :class:`~email.mime.nonmultipart.MIMENonMultipart` (e.g. :class:`~email.mime.image.MIMEImage`). -Here's the list of the defects that the :class:`~email.mime.parser.FeedParser` -can find while parsing messages. Note that the defects are added to the message -where the problem was found, so for example, if a message nested inside a -:mimetype:`multipart/alternative` had a malformed header, that nested message -object would have a defect, but the containing messages would not. -All defect classes are subclassed from :class:`email.errors.MessageDefect`, but -this class is *not* an exception! +The following exceptions are only raised if +:attr:`~email.policy.Policy.raise_on_defect` is set to ``True``, which it is +not by default. If :attr:`~email.policy.Policy.raise_on_defect` is ``False`` +(the default), then instead the exception instances are added to the +``defects`` attribute of the message or header where the problem was +found (or are otherwise disposed of according to custom :mod:`~email.policy` +settings). -* :class:`NoBoundaryInMultipartDefect` -- A message claimed to be a multipart, - but had no :mimetype:`boundary` parameter. -* :class:`StartBoundaryNotFoundDefect` -- The start boundary claimed in the - :mailheader:`Content-Type` header was never found. +Here is the list of the defects that the :class:`~email.parser.FeedParser` +can find while parsing messages: -* :class:`FirstHeaderLineIsContinuationDefect` -- The message had a continuation - line as its first header line. -* :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the - middle of a header block. +.. exception:: MessageDefect -* :class:`MalformedHeaderDefect` -- A header was found that was missing a colon, - or was otherwise malformed. + Base class for all message defect classes. -* :class:`MultipartInvariantViolationDefect` -- A message claimed to be a - :mimetype:`multipart`, but no subparts were found. Note that when a message has - this defect, its :meth:`is_multipart` method may return false even though its - content type claims to be :mimetype:`multipart`. +.. exception:: NoBoundaryInMultipartDefect + + A message claimed to be a multipart, but had no :mimetype:`boundary` + parameter. + + +.. exception:: StartBoundaryNotFoundDefect + + The start boundary claimed in the :mailheader:`Content-Type` header was + never found. + + +.. exception:: FirstHeaderLineIsContinuationDefect + + The message had a continuation line as its first header line. + + +.. exception:: MisplacedEnvelopeHeaderDefect + + A "Unix From" header was found in the middle of a header block. + + +.. exception:: MalformedHeaderDefect + + A header was found that was missing a colon, or was otherwise malformed. + + +.. exception:: MultipartInvariantViolationDefect + + A message claimed to be a :mimetype:`multipart`, but no subparts were found. + Note that when a message has this defect, its :meth:`is_multipart` method + may return false even though its content type claims to be + :mimetype:`multipart`. + + +.. exception:: InvalidMultipartContentTransferEncodingDefect + + An invalid content transfer encoding was set on the multipart itself. + + +.. exception:: DuplicateHeaderDefect + + Multiple instances of a required-to-be-unique header were found. + + +Here is the list of the defects that the parser may find while parsing +individual headers: + + +.. exception:: HeaderDefect + + Base class for all header-specific defects. + + +.. exception:: InvalidHeaderDefect + + Header is not valid, message gives details. + + +.. exception:: HeaderMissingRequiredValue + + A header that must have a value had none + + +.. exception:: NonPrintableDefect + + ASCII characters outside the ascii-printable range found. + + +.. exception:: ObsoleteHeaderDefect + + Header uses syntax declared obsolete by :rfc:`5322` + + +.. exception:: UndecodableBytesDefect + + Header contained bytes that could not be decoded + + +.. exception:: InvalidBase64PaddingDefect + + Base64 encoded sequence had an incorrect length. + + +.. exception:: InvalidBase64CharactersDefect + + Base64 encoded sequence had characters not in base64 alphabet. diff -r 7520f1bf0a81 -r b22698463737 Doc/library/email.header.rst --- a/Doc/library/email.header.rst Sun Jul 17 22:50:12 2011 -0500 +++ b/Doc/library/email.header.rst Tue Jul 19 13:24:56 2011 -0400 @@ -59,7 +59,7 @@ Optional *charset* serves two purposes: it has the same meaning as the *charset* argument to the :meth:`append` method. It also sets the default character set for all subsequent :meth:`append` calls that omit the *charset* argument. If - *charset* is not provided in the constructor (the default), the ``us-ascii`` + *charset* is not provided in the constructor (the default), the ``utf-8`` character set is used both as *s*'s initial charset and as the default for subsequent :meth:`append` calls. @@ -77,6 +77,8 @@ Optional *errors* is passed straight through to the :meth:`append` method. + .. versionchanged:: 3.3 default charset changed to ``utf-8``. + .. method:: append(s, charset=None, errors='strict') @@ -94,7 +96,8 @@ decoded with that character set. If *s* is an instance of :class:`str`, then *charset* is a hint specifying - the character set of the characters in the string. + the character set of the characters in the string. The default if no + *charset* was specified in the constructor is ``utf-8``. In either case, when producing an :rfc:`2822`\ -compliant header using :rfc:`2047` rules, the string will be encoded using the output codec of @@ -192,3 +195,353 @@ :class:`Header` instance. Optional *maxlinelen*, *header_name*, and *continuation_ws* are as in the :class:`Header` constructor. + +Custom Header Objects +^^^^^^^^^^^^^^^^^^^^^ + +XXX: This section documents the parts of new style headers that have been +implemented so far. When the full implementation is ready, this section will +move to the top and the classes and functions above will be moved to a +"backward compatibility" section at the end. + +Headers are represented by customized subclasses of ``str``. The particular +class used to represent a given header is determined by the +:attr:`~email.policy.Policy.header_factory` of the :mod:`~email.policy` in +effect when the headers are created. This section documents the particular +:class:`~email.header.HeaderFactory` implemented by the email package for +handling :RFC:`2822` compliant email messages. The minimum requirements of the +header API are documented in :ref:`header-factory-api`. + +Under the default :mod:`~email.policy`, all headers produced by +:class:`~email.header.HeaderFactory` have :class:`~email.header.BaseHeader` as +their last base class. Each header class has an additional base class that is +determined by the type of the header. For example, many headers have the class +:class:`~email.header.UnstructuredHeader` as their other base class. This +specialized class for a header is determined by the name of the header, using a +lookup table stored in the :class:`~email.header.HeaderFactory`. All of this +is managed transparently for the typical application program, but interfaces +are provided for modifying the default behavior for use by more complex +applications. + +The sections below will first document the header base classes and their +attributes, followed by the API for modifying the behavior of +:class:`~email.header.HeaderFactory`. + + +.. class:: BaseHeader(name, unparsed, unfolded=None, *, use_decoded=False) + + The base class constructor implements the behavior required by the + :ref:`header-factory-api` and returns the object representing the header. + See that documentation for the meaning of the arguments. + + In Python 3.3 this class implements a backward compatibility heuristic: if + *unfolded* is ``None`` but *unparsed* contains either encoded words or + linesep characters, then it is treated as a "source value" and parsed as if + it had been given as the value of *unfolded*. This is to support old + application programs that set headers to such values using the ``Message`` + header interface. It generates a :exc:`DeprecationWarning`, and will no + longer be supported in Python 3.4. + + This base class takes care of setting the following read-only properties: + + + .. attribute:: name + + The name of the header. This is exactly the value passed in to the + header constructor for *name*; that is, case is preserved. + + + .. attribute:: value + + The string value of the header. If the *unfolded* was passed to the + constructor and it contains non-ASCII characters, these characters will + be replaced by the Unicode ``unknown character`` codepoint in the + ``value`` attribute. Specialized headers may also perform certain + canonicalization operations (for example, the value of a date header is + always in fully RFC compliant form, regardless of the input). + + + .. attribute:: source + + The string version of the value that was obtained from the input source. + May be ``None`` if the header was created by the application program + manipulating the ``Message`` object. Includes any folding whitespace in + the original input. This value may be a string decoded from binary using + the ``ascii`` codec and the ``surrogateescape`` error handler, and should + not normally be used by an application program. A + :mod:`~email.generator` may use it to reconstruct the original message, + depending on the policy in effect. + + + .. attribute:: defects + + A tuple of :exc:`~email.errors.HeaderDefect` instances reporting any + RFC compliance problems found during parsing. The email package tries to + be complete about detecting compliance issues. See the :mod:`errors` + module for a discussion of the types of defects that may be reported. + + .. attribute:: max_count + + The maximum number of headers of this type that can have the same + ``name``. A value of ``None`` means unlimited. The ``BaseHeader`` value + for this attribute is ``None``; it is expected that specialized header + classes will override this value as needed. + + + ``BaseHeader`` by itself cannot be used to create a header object. It + defines a protocol that each specialized header cooperates with in order to + produce the header object. Specifically, ``BaseHeader`` requires that + the specialized class provide a ``classmethod`` named ``parse``. This + method is called as follows:: + + parse(string, kwds) + + ``kwds`` is a dictionary containing one pre-initialized key, ``defects``. + ``defects`` is an empty list. The parse method should append any detected + defects to this list. On return, the ``kwds`` dictionary *must* contain + values for at least the keys ``decoded`` and ``defects``. ``decoded`` + should be the string to be assigned to the ``value`` attribute (and as the + string value of the header if :attr:`~email.policy.Policy.use_decoded` is + ``True``). The parse method should assume that *string* may contain + transport encoded parts, but should correctly handle all valid unicode + characters as well so that it can parse un-encoded header values. + + ``BaseHeader`` then creates the header instance, and calls its ``init`` + method. The specialized class only needs to provide an ``init`` method if + it wishes to set additional attributes beyond those provided by + ``BaseHeader`` itself. Such an ``init`` method should look like this:: + + def init(self, *args, **kw): + self._myattr = kw.pop('myattr') + super().init(*args, **kw) + + That is, anything extra that the specialized class puts in to the ``kwds`` + dictionary should be removed and handled, and the remaining contents of + ``kw`` (and ``args``) passed to the ``BaseHeader`` ``init`` method. + + +.. class:: UnstructuredHeader + + An "unstructured" header is the default type of header in :rfc:`2822`. + Any header that does not have a specified syntax is treated as + unstructured. The classic example of an unstructured header is the + :mailheader:`Subject` header. + + In :rfc:`2822`, an unstructured header is a run of arbitrary text. + :rfc:`2047`, however, introduces an :rfc:`2822` compatible mechanism for + encoding non-ASCII text as ASCII characters within a header value. When an + *unfolded* argument is passed to the constructor, the ``UnstructuredHeader`` + parser converts such encoded words back in to the original unicode, + following the :rfc:`2047` rules for unstructured text. The parser uses + heuristics to correctly (hopefully) decode certain non-compliant encoded + words. Defects are registered in such cases, as well as defects for issues + such as invalid characters within the encoded words or the non-encoded text. + + This header type provides no additional attributes. + + +.. class:: DateHeader + + :rfc:`2822` specifies a very specific format for dates within email headers. + The ``DateHeader`` parser recognizes that date format, as well as + recognizing a number of variant forms that are sometimes found "in the + wild". + + When creating a ``DateHeader``, *unparsed* may be a + :class:`~datetime.datetime` instance. This means, for example, that + the following code is valid and does what one would expect:: + + msg['Date'] = datetime(2011, 7, 15, 21) + + Since this is a naive ``datetime`` it will be interpreted as a UTC + timestamp, and the resulting value will have a timezone of ``-0000``. Much + more useful is to use the :func:`~email.utils.localtime` function from the + :mod:`~email.utils` module:: + + msg['Date'] = utils.localtime() + + This example sets the date header to the current time and date using + the current timezone offset. + + This header type provides the following additional attributes: + + + .. attribute:: datetime + + If the header value can be recognized as a valid date of one form or + another, this attribute will contain a :class:`~datetime.datetime` + instance representing that date. If the timezone of the input date is + specified as ``-0000`` (indicating it is in UTC but contains no + information about the source timezone), then ``datetime`` will be a naive + :class:`~datetime.datetime`. If a specific timezone offset is found + (including `+0000`), then ``datetime`` will contain an aware datetime that + uses :class:`datetime.timezone` to record the timezone offset. + + + The ``value`` attribute is set by formatting the ``datetime`` according + to the :rfc:`2822` rules; that is, it is set to:: + + utils.format_datetime(self.datetime) + + +.. class:: AddressHeader + + Address headers are one of the most complex structured header types. + The ``AddressHeader`` class provides a generic interface to any address + header. + + This header type provides the following additional attributes: + + + .. attribute:: groups + + A list of :class:`~email.header.Group` objects encoding the addresses and + groups found in the header value. + + + .. attribute:: addresses + + A list of :class:`~email.header.Address` objects encoding the all of the + individual addresses from the header value. If the header value contains + any groups, the individual addresses from the group are included in the + list at the point where the group occurs in the value (that is, the list + of addresses is "flattened" into a one dimensional list). + + + Note that since headers are immutable, the returned lists may be freely + modified without affecting the header from which they were obtained. + + +.. class:: SingleAddressHeader + + A subclass of :class:`~email.header.AddressHeader` that adds one + additional attribute: + + + .. attribute:: address + + The single address encoded by the header value. If the header value + actually contains more than one address (which would be a violation of + the RFC under the default :mod:`policy`), accessing this attribute will + result in a :exc:`ValueError`. + + +Each of the above classes also has a ``Unique`` variant (for example, +``UniqueUnstructuredHeader``). The only difference is that in the ``Unique`` +variant, ``max_count`` is set to 1. + + +.. class:: HeaderFactory(base_class=BaseHeader, \ + default_class=UnstructuredHeader, \ + use_default_map=True) + + This is the factory used by the :data:`~policy.default` policy, which uses + the default constructor arguments. ``HeaderFactory`` builds the class used + to create a header instance dynamically, using *base_class* and a + specialized class retrieved from a registry that it holds. When a given + header name does not appear in the registry, the class specified by + *default_class* is used as the specialized class. When *use_default_map* + is ``True`` (the default), the standard mapping of header names to classes + is copied in to the registry during initialization. *base_class* is always + the last class in the generated class's ``__bases__`` list. + + The default mappings are: + + :subject: UniqueUnstructuredHeader + :date: UniqueDateHeader + :resent-date: DateHeader + :orig-date: UniqueDateHeader + :sender: UniqueSingleAddressHeader + :resent-sender: SingleAddressHeader + :to: UniqueAddressHeader + :resent-to: AddressHeader + :cc: UniqueAddressHeader + :resent-cc: AddressHeader + :from: UniqueAddressHeader + :resent-from: AddressHeader + :reply-to: UniqueAddressHeader + + ``HeaderFactory`` has the following methods: + + + .. method:: map_to_type(self, name, cls) + + *name* is the name of the header to be mapped. It will be converted to + lower case in the registry. *cls* is the specialized class to be used, + along with *base_class*, to create the class used to instantiate headers + that match *name*. + + + .. method:: __getitem__(name) + + Construct and return a class to handle creating a *name* header. + + + .. method:: __call__(name, unparsed, unfolded=None, use_decoded=False) + + Implements the :ref:`header-factory-api` by retrieving the specialized + header associated with *name* from the registry (using *default_class* + if *name* does not appear in the registry) and composing it with + *base_class* to produce a class, and then calling the constructed + class's constructor, passing it the same argument list, and finally + returning the class instance created thereby. + + +.. XXX: These need some refinement. + +.. class:: Group(value, display_name, addresses) + + ``Group`` is a subclass of ``str``. *value* is its string value. + *display_name* and *addresses* initialize the respective properties. + + .. attribute:: name + + The "display name" of the group. If the group represents a single + address rather than a real group, *name* will be ``None``. + + + .. attribute:: addresses + + A tuple of :class:`~email.header.Address` objects representing + the individual addresses (if any) in the group. + + +.. class:: Address(value, name, username, domain, defects) + + ``Address`` is a subclass of ``str``. *value* is its string value. + *name* is the "display name" of the address, if any. *username* + and *domain* are the corresponding parts of the addresses's + ``addr_spec``. *defects* is a list of defects detected when the + source string was parsed to obtain the address parts. + + .. attribute:: name + + The "display name" portion of the address, unquoted. + + + .. attribute:: username + + The username portion of the ``addr_spec``, unquoted. + + + .. attribute:: domain + + The domain portion of the ``addr_spec``. + + + .. attribute:: defects + + A tuple of :exc:`~email.errors.HeaderDefect` instances indicating + any RFC conformance defects found while parsing the value to + obtain the address elements. + + .. attribute:: addr_spec + + The ``addr_spec`` portion of the address, formatted according to + :rfc:`2822` rules, with minimal quoting. + + + .. attribute:: reformatted + + The address formatted according to :rfc:`2822` rules with the + minimum required quoting. diff -r 7520f1bf0a81 -r b22698463737 Doc/library/email.message.rst --- a/Doc/library/email.message.rst Sun Jul 17 22:50:12 2011 -0500 +++ b/Doc/library/email.message.rst Tue Jul 19 13:24:56 2011 -0400 @@ -8,32 +8,37 @@ The central class in the :mod:`email` package is the :class:`Message` class, imported from the :mod:`email.message` module. It is the base class for the :mod:`email` object model. :class:`Message` provides the core functionality for -setting and querying header fields, and for accessing message bodies. +setting and querying headers, and for accessing message bodies. Conceptually, a :class:`Message` object consists of *headers* and *payloads*. -Headers are :rfc:`2822` style field names and values where the field name and + +Headers are RFC 2822 style field names and values where the field name and value are separated by a colon. The colon is not part of either the field name -or the field value. +or the field value. Headers are stored and returned in case-preserving form +but are matched case-insensitively. There may also be a single envelope +header, also known as the *Unix-From* header or the ``From_`` header. Headers +are represented by specialized objects, which are subclasses of ``str`` with +additional attributes. See :mod:`~email.header` for more information on the +objects that represent headers. -Headers are stored and returned in case-preserving form but are matched -case-insensitively. There may also be a single envelope header, also known as -the *Unix-From* header or the ``From_`` header. The payload is either a string -in the case of simple message objects or a list of :class:`Message` objects for -MIME container documents (e.g. :mimetype:`multipart/\*` and -:mimetype:`message/rfc822`). +The message payload is either a string in the case of simple message objects or +a list of :class:`Message` objects for MIME container documents (e.g. +:mimetype:`multipart/\*` and :mimetype:`message/rfc822`). :class:`Message` objects provide a mapping style interface for accessing the -message headers, and an explicit interface for accessing both the headers and -the payload. It provides convenience methods for generating a flat text +message headers, and an explicit interface for accessing both the headers +and the payload. It provides convenience methods for generating a flat text representation of the message object tree, for accessing commonly used header parameters, and for recursively walking over the object tree. Here are the methods of the :class:`Message` class: -.. class:: Message() +.. class:: Message(policy=email.policy.default) - The constructor takes no arguments. + The constructor takes a single optional argument, the :mod:`~email.policy` + that should be used when parsing header values. It defaults to the default + policy, which is designed for RFC compliant email messages. .. method:: as_string(unixfrom=False, maxheaderlen=0) @@ -190,9 +195,9 @@ .. method:: __contains__(name) - Return true if the message object has a field named *name*. Matching is + Return true if the message object has a header named *name*. Matching is done case-insensitively and *name* should not include the trailing colon. - Used for the ``in`` operator, e.g.:: + Used for the ``in`` operator:: if 'message-id' in myMessage: print('Message-ID:', myMessage['message-id']) @@ -200,24 +205,23 @@ .. method:: __getitem__(name) - Return the value of the named header field. *name* should not include the - colon field separator. If the header is missing, ``None`` is returned; a + Return the header named *name*. *name* should not include the colon + field separator. If there is no such header, ``None`` is returned; a :exc:`KeyError` is never raised. - Note that if the named field appears more than once in the message's - headers, exactly which of those field values will be returned is - undefined. Use the :meth:`get_all` method to get the values of all the - extant named headers. + Note that if there is more than one header with the name *name* in the + message's headers, exactly which one will be returned is undefined. Use + the :meth:`get_all` method to get all of the headers with the given name. .. method:: __setitem__(name, val) - Add a header to the message with field name *name* and value *val*. The - field is appended to the end of the message's existing fields. + Add a header to the message with name *name* and value *val*. The header + is appended to the end of the message's existing headers. Note that this does *not* overwrite or delete any existing header with the same name. If you want to ensure that the new header is the only one present in the - message with field name *name*, delete the field first, e.g.:: + message with the given name, delete all headers by that name first:: del msg['subject'] msg['subject'] = 'Python roolz!' @@ -225,40 +229,44 @@ .. method:: __delitem__(name) - Delete all occurrences of the field with name *name* from the message's - headers. No exception is raised if the named field isn't present in the - headers. + Delete all headers with the name *name* from the message's headers. No + exception is raised if there is no header named *name* in the headers. .. method:: keys() - Return a list of all the message's header field names. + Return a list of the names of all the headers in the message's header + block, in the order in which they originally appeared or were added. + Unlike a dictionary, a name may appear in the list more than once. .. method:: values() - Return a list of all the message's field values. + Return a list of the headers from the message's header block, in the same + order as their names are returned by the :meth:`keys` method. .. method:: items() - Return a list of 2-tuples containing all the message's field headers and - values. + Return a list of ``(name, header)`` pairs for all of the message's + headers, in the same order as the names are returned by the :meth:`keys` + method. .. method:: get(name, failobj=None) - Return the value of the named header field. This is identical to - :meth:`__getitem__` except that optional *failobj* is returned if the - named header is missing (defaults to ``None``). + Return the named header. This is identical to :meth:`__getitem__` except + that optional *failobj* is returned if there is no header named *name* + (defaults to ``None``). + Here are some additional useful methods: .. method:: get_all(name, failobj=None) - Return a list of all the values for the field named *name*. If there are - no such named headers in the message, *failobj* is returned (defaults to + Return a list of all the headers named *name*. If there are no headers + with that name in the message, *failobj* is returned (defaults to ``None``). @@ -266,7 +274,7 @@ Extended header setting. This method is similar to :meth:`__setitem__` except that additional header parameters can be provided as keyword - arguments. *_name* is the header field to add and *_value* is the + arguments. *_name* is the name of the header to add and *_value* is the *primary* value for the header. For each item in the keyword argument dictionary *_params*, the key is @@ -304,8 +312,8 @@ .. method:: replace_header(_name, _value) Replace a header. Replace the first header found in the message that - matches *_name*, retaining header order and field name case. If no - matching header was found, a :exc:`KeyError` is raised. + matches *_name*, retaining header order and the case of the header name. + If no matching header was found, a :exc:`KeyError` is raised. .. method:: get_content_type() diff -r 7520f1bf0a81 -r b22698463737 Doc/library/email.policy.rst --- a/Doc/library/email.policy.rst Sun Jul 17 22:50:12 2011 -0500 +++ b/Doc/library/email.policy.rst Tue Jul 19 13:24:56 2011 -0400 @@ -127,6 +127,20 @@ :const:`False` (the default), defects will be passed to the :meth:`register_defect` method. + .. attribute:: decoded_headers + + This attribute controls what the parser will set as the string value of a + header during message parsing. If false (the default), the value of a + header will be set equal to the value of its 'source' attribute. If + True, the value of a header will be set equal to the 'value' attribute. + In Python 3.4 the default value of this attribute will become True [#]_. + + .. attribute:: header_factory + + A callable object that will be called during header parsing. See + :ref:`header-factory-api` for more information about this object. + + :mod:`Policy` object also have the following methods: .. method:: handle_defect(obj, defect) @@ -154,6 +168,94 @@ values as the current instance, except where those attributes are given new values by the keyword arguments. + .. method:: make_header(self, name, unparsed, unfolded=None) + + Convenience wrapper for calling the + :attr:`~email.policy.Policy.header_factory`. + + Return a specialized header object representing the header named *name*. + *unparsed* is the value obtained from the source data, including any + linesep characters. *unfolded* is the same value with any folding + whitespace removed. If ``None``, *unparsed* should be the decoded header + value (that is, it should be a unicode string without encoded words or + linesep characters). + + ``make_header`` calls the :attr:`~policy.Policy.header_factory`, passing + it these three arguments plus the value of + :attr:`~policy.Policy.decoded_headers`. + + See :ref:`header-factory-api` for more details. + + +.. _header-factory-api: + +Header Factory API +^^^^^^^^^^^^^^^^^^ + +Each header found in a message is represented in the message model by a +separate object, a header object. See :mod:`~email.message` for details on how +the header objects are accessed via the message. ``policy`` provides an +application programming interface for controlling how the header objects are +created from the source data. The API consists of two components: the method +:meth:`~email.policy.Policy.make_header`, and the attribute +:attr:`~email.policy.Policy.header_factory`. ``make_header`` is a simple +wrapper around a call to the callable object stored in ``header_factory``. + +``header_factory`` is an arbitrary callable object that takes from two to four +arguments: + + header = header_factory(name, unparsed, unfolded=None, use_decoded=True) + +The `header_factory` is responsible for returning a header object. To +interoperate with the rest of the package, the returned object is required +to be a subclass of ``str`` that provides at a minimum the following +additional attributes: + + :source: The value exactly as obtained from the source, or None. + + :value: The "idealized" string representation of the value. + + :defects: A possibly empty list of :class:`~email.errors.HeaderDefect` + instances. + +The class is free to provide any additional attributes that it wishes based +on the parsed data. Header objects should be immutable. + +The ``header_factory`` itself may provide an additional API beyond being +callable. The standard :class:`~email.header.HeaderFactory` provides such a +API to allow an application control of what specific header objects are +produced for various headers, but the rest of the email package does not depend +on this API. + +The email package will call the ``header_factory`` from two contexts: from a +parser, or during manipulation of a ``Message`` object (that is, during +addition or modification of headers on the ``Message``). + +The defaults shown above are those used by the standard header factory, +:class:`~email.header.HeaderFactory`. + +*name* is the name of the header to be created. + +In the parsing context, *unparsed* will be the value obtained from the source +data, including (for data that comes from a data source rather than being +generated by the application program) any linesep characters, but not including +the "name" portion of the field. The header factory is responsible for +recording this value in the ``source`` attribute of the returned header, and if +``use_decoded`` is ``False`` it should become the string value of the header. +*unfolded* is the same value with just the folding linesep characters removed. +This parameter is separate because a parser may have more knowledge of actual +message line breaks than would be apparent from examining the *unparsed* +string. For example, this would be true in the case of a file containing both +os.linesep strings and individual ``\r`` or ``\n`` characters. (Note that any +such individual characters that are present in *unfolded_value* will be +reported as RFC compliance defects under the default policy.) + +When called in the context of message modification, ``make_header`` is called +with two arguments, *name* and *unparsed*. In this case the header factory +should use the *unparsed* as the *value*, and set the *source* to ``None``. +That is, a two argument call is actually providing the unicode string value, +with no encoded words or linesep characters, and there is no *source*. + Policy Instances ^^^^^^^^^^^^^^^^ @@ -180,3 +282,15 @@ .. data:: strict :attr:`raise_on_defect` is set to :const:`True`. + +.. data:: future_defaults + + :attr:`unfold_headers` is set to ``True``. + + +.. rubric:: Footnotes + +.. [#] If desired, the deprecation warning can be suppressed while continuing + to use decodes_headers = False by explicitly specifying a ``False`` + value for that policy attribute (the actual default value for the + attribute in the default policy is ``None``). diff -r 7520f1bf0a81 -r b22698463737 Doc/library/email.util.rst --- a/Doc/library/email.util.rst Sun Jul 17 22:50:12 2011 -0500 +++ b/Doc/library/email.util.rst Tue Jul 19 13:24:56 2011 -0400 @@ -112,6 +112,32 @@ ``False``. The default is ``False``. +.. function:: format_datetime(dt, usegmt=False) + + Like ``formatdate``, but the input is a :mod:`datetime` instance. If it is + a naive datetime, it is assumed to be "UTC with no information about the + source timezone", and the conventional ``-0000`` is used for the timezone. + If it is an aware ``datetime``, then the numeric timezone offset is used. + If it is an aware timezone with offset zero, then *usegmt* may be set to + ``True``, in which case the string ``GMT`` is used instead of the numeric + timezone offset. This provides a way to generate standards conformant HTTP + date headers. + + +.. function:: localtime(dt=None) + + Return local time as an aware datetime object. If called without + arguments, return current time. Otherwise *dt* argument should be a + :class:`~datetime.datetime` instance, and it is converted to the local time + zone according to the system time zone database. If *dt* is naive (that + is, ``dt.tzinfo`` is ``None``), it is assumed to be in local time. In this + case, a positive or zero value for *isdst* causes ``localtime`` to presume + initially that summer time (for example, Daylight Saving Time) is or is not + (respectively) in effect for the specified time. A negative value for + *isdst* causes the ``localtime`` to attempt to divine whether summer time + is in effect for the specified time. + + .. function:: make_msgid(idstring=None, domain=None) Returns a string suitable for an :rfc:`2822`\ -compliant diff -r 7520f1bf0a81 -r b22698463737 Lib/email/__init__.py --- a/Lib/email/__init__.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/__init__.py Tue Jul 19 13:24:56 2011 -0400 @@ -4,7 +4,7 @@ """A package for parsing, handling, and generating email messages.""" -__version__ = '5.1.0' +__version__ = '6.0.0a1' __all__ = [ 'base64mime', diff -r 7520f1bf0a81 -r b22698463737 Lib/email/_encoded_words.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/email/_encoded_words.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,100 @@ +""" Routines for manipulating RFC2047 encoded words. """ + +# An ecoded word looks like this: +# +# =?charset?cte?encoded_string?= +# +# for more information about charset see the charset module. Here it is a one +# of the preferred MIME charset names (hopefully, you never know when parsing). +# cte, Content Transfer Encoding, is either 'q' or 'b' (ignoring case). In +# theory other letters could be used for other encodings, but in practice this +# (almost?) never happens. (XXX There could be a public API for adding entries +# to to the CTE tables, but YAGNI for now.) 'q' is Quoted Printable, 'b' is +# Base64. The meaning of encode_string should be obvious. +# +# The general interface for a CTE decoder is that it takes the encoded_string as +# its argument, and returns a tuple (cte_decoded_string, defects). The +# cte_decoded_string is the original binary that was encoded using the +# specified cte. 'defects' is a list of MessageDefect instances indicating any +# problems encountered during conversion. + +import re +import base64 +import binascii +from email import errors + +# +# Quoted Printable +# + +_q_byte_subber = re.compile(br'=([a-fA-F0-9]{2})').sub + +def decode_q(encoded): + encoded = encoded.replace(b'_', b' ') + return _q_byte_subber( + lambda m: bytes([int(m.group(1), 16)]), encoded), [] + +# +# Base64 +# + +def decode_b(encoded): + defects = [] + pad_err = len(encoded) % 4 + if pad_err: + defects.append(errors.InvalidBase64PaddingDefect()) + padded_encoded = encoded + b'==='[:4-pad_err] + else: + padded_encoded = encoded + try: + return base64.b64decode(padded_encoded, validate=True), defects + except binascii.Error: + # Since we had correct padding, this must an invalid char error. + defects = [errors.InvalidBase64CharactersDefect()] + # The non-alphabet characters are ignored as far as padding + # goes, but we don't know how many there are. So we'll just + # try various padding lengths until something works. + for i in 0, 1, 2, 3: + try: + return base64.b64decode(encoded+b'='*i, validate=False), defects + except binascii.Error: + if i==0: + defects.append(errors.InvalidBase64PaddingDefect()) + else: + # This should never happen. + raise AssertionError("unexpected binascii.Error") + +cte_decoders = { + 'q': decode_q, + 'b': decode_b, + } + +def decode(ew): + """Decode encoded word and return (string, defects) tuple. + + An encoded word has the form: + + =?charset?cte?encoded_string?= + + This function expects exactly such a string, and returns the encoded_string + decoded first from its Content Transfer Encoding and then from the + resulting bytes into unicode using the specified charset. If the + cte-decoded string does not successfully decode using the specified + character set, a defect added to the defects list and the unknown octets + are replaced by the unicode 'unknown' character \uFDFF. + + """ + #XXX: We could perhaps do some heuristic recovery here. + _, charset, cte, cte_string, _ = ew.split('?') + cte = cte.lower() + # Recover the original bytes and do CTE decoding. + bstring = cte_string.encode('ascii', 'surrogateescape') + bstring, defects = cte_decoders[cte](bstring) + # Turn the CTE decoded bytes into unicode. + try: + string = bstring.decode(charset) + except UnicodeError: + defects.append(errors.UndecodableBytesDefect()) + string = bstring.decode(charset, 'replace') + # XXX: more code to handle malformed ews? + return string, defects diff -r 7520f1bf0a81 -r b22698463737 Lib/email/_header_value_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/email/_header_value_parser.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,1564 @@ +"""Header value parser implementing various email-related RFC parsing rules. + +The parsing methods defined in this module implement various email related +parsing rules. Principal among them is RFC 5322, which is the followon +to RFC 2822 and primarily a clarification of the former. It also implements +RFC 2047 encoded word decoding. + +RFC 5322 goes to considerable trouble to maintain backward compatibility with +RFC 822 in the parse phase, while cleaning up the structure on the generation +phase. This parser supports correct RFC 5322 generation by tagging white space +as folding white space only when folding is allowed in the non-obsolete rule +sets. Actually, the parser is even more generous when accepting input than RFC +5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages. +Where possible deviations from the standard are annotated on the 'defects' +attribute of tokens that deviate. + +The general structure of the parser follows RFC 5322, and uses its terminology +where there is a direct correspondence. Where the implementation requires a +somewhat different structure than that used by the formal grammar, new terms +that mimic the closest existing terms are used. Thus, it really helps to have +a copy of RFC 5322 handy when studying this code. + +Input to the parser is a string that has already been unfolded according to +RFC 5322 rules. According to the RFC this unfolding is the very first step, and +this parser leaves the unfolding step to a higher level message parser, which +will have already detected the line breaks that need unfolding while +determining the beginning and end of each header. + +The output of the parser is a TokenList object, which is a list subclass. A +TokenList is a recursive data structure. The terminal nodes of the structure +are Terminal objects, which are subclasses of str. These do not correspond +directly to terminal objects in the formal grammar, but are instead more +practical higher level combinations of true terminals. + +All TokenList and Terminal objects have a 'value' attribute, which produces the +semantically meaningful value of that part of the parse subtree. The value of +all whitespace tokens (no matter how many sub-tokens they may contain) is a +single space, as per the RFC rules. This includes 'CFWS', which is herein +included in the general class of whitespace tokens. There is one exception to +the rule that whitespace tokens are collapsed into single spaces in values: in +the value of a 'bare-quoted-string' (a quoted-string with no leading or +trailing whitespace), any whitespace that appeared between the quotation marks +is preserved in the returned value. Note that in all Terminal strings quoted +pairs are turned into their unquoted values. + +All TokenList and Terminal objects also have a string value, which attempts to +be a "canonical" representation of the RFC-compliant form of the substring that +produced the parsed subtree, including minimal use of quoted pair quoting. +Whitespace runs are not collapsed. + +Comment tokens also have a 'content' attribute providing the string found +between the parens (including any nested comments) with whitespace preserved. + +All TokenList and Terminal objects have a 'defects' attribute which is a +possibly empty list all of the defects found while creating the token. Defects +may appear on any token in the tree, and a composite list of all defects in the +subtree is available through the 'all_defects' attribute of any node. (For +Terminal notes x.defects == x.all_defects.) + +Each object in a parse tree is called a 'token', and each has a 'token_type' +attribute that gives the name from the RFC 5322 grammar that it represents. +Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that +may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters. +It is returned in place of lists of (ctext/quoted-pair) and +(qtext/quoted-pair). + +XXX: provide complete list of token types. +""" + +import re +from email import _encoded_words as _ew +from email import errors +from email import utils + +# +# Useful constants and functions +# + +WSP = set(' \t') +CFWS_LEADER = WSP | set('(') +SPECIALS = set(r'()<>@,:;.\"[]') +ATOM_ENDS = SPECIALS | WSP +# '.', '"', and '(' do not end phrases in order to support obs-phrase +PHRASE_ENDS = SPECIALS - set('."(') + +def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + +# +# TokenList and its subclasses +# + +class TokenList(list): + + token_type = None + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + self.defects = [] + + def __str__(self): + return ''.join(str(x) for x in self) + + def __repr__(self): + return '{}({})'.format(self.__class__.__name__, + super().__repr__()) + + @property + def value(self): + return ''.join(x.value for x in self if x.value) + + @property + def all_defects(self): + return sum((x.all_defects for x in self), self.defects) + + def pprint(self, indent=''): + print('{}{}/{}('.format( + indent, + self.__class__.__name__, + self.token_type)) + for token in self: + token.pprint(indent+' ') + if self.defects: + extra = ' Defects: {}'.format(self.defects) + else: + extra = '' + print('{}){}'.format(indent, extra)) + + +class WhiteSpaceTokenList(TokenList): + + @property + def value(self): + return ' ' + + @property + def comments(self): + return [x.content for x in self if x.token_type=='comment'] + +class UnstructuredTokenList(TokenList): + + token_type = 'unstructured' + + +class Phrase(TokenList): + + token_type = 'phrase' + + +class Word(TokenList): + + token_type = 'word' + + +class CFWSList(WhiteSpaceTokenList): + + token_type = 'cfws' + + +class Atom(TokenList): + + token_type = 'atom' + + +class QuotedString(TokenList): + + token_type = 'quoted-string' + + @property + def content(self): + for x in self: + if x.token_type == 'bare-quoted-string': + return x.value + + @property + def quoted_value(self): + res = [] + for x in self: + if x.token_type == 'bare-quoted-string': + res.append(str(x)) + else: + res.append(x.value) + return ''.join(res) + + +class BareQuotedString(QuotedString): + + token_type = 'bare-quoted-string' + + def __str__(self): + return quote_string(''.join(self)) + + @property + def value(self): + return ''.join(str(x) for x in self) + + +class Comment(WhiteSpaceTokenList): + + token_type = 'comment' + + def __str__(self): + return ''.join(sum([ + ["("], + [self.quote(x) for x in self], + [")"], + ], [])) + + def quote(self, value): + if value.token_type == 'comment': + return str(value) + return str(value).replace('\\', '\\\\').replace( + '(', '\(').replace( + ')', '\)') + + @property + def content(self): + return ''.join(str(x) for x in self) + + @property + def comments(self): + return [self.content] + +class AddressList(TokenList): + + token_type = 'address-list' + + @property + def addresses(self): + return [x for x in self if x.token_type=='address'] + + @property + def mailboxes(self): + return sum((x.mailboxes + for x in self if x.token_type=='address'), []) + + @property + def all_mailboxes(self): + return sum((x.all_mailboxes + for x in self if x.token_type=='address'), []) + + +class Address(TokenList): + + token_type = 'address' + + @property + def display_name(self): + if self[0].token_type == 'group': + return self[0].display_name + + @property + def mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if self[0].token_type == 'mailbox': + return [self[0]] + elif self[0].token_type == 'invalid-mailbox': + return [self[0]] + return self[0].all_mailboxes + +class MailboxList(TokenList): + + token_type = 'mailbox-list' + + @property + def mailboxes(self): + return [x for x in self if x.token_type=='mailbox'] + + @property + def all_mailboxes(self): + return [x for x in self + if x.token_type in ('mailbox', 'invalid-mailbox')] + + +class GroupList(TokenList): + + token_type = 'group-list' + + @property + def mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].mailboxes + + @property + def all_mailboxes(self): + if not self or self[0].token_type != 'mailbox-list': + return [] + return self[0].all_mailboxes + + +class Group(TokenList): + + token_type = "group" + + @property + def mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].mailboxes + + @property + def all_mailboxes(self): + if self[2].token_type != 'group-list': + return [] + return self[2].all_mailboxes + + @property + def display_name(self): + return self[0].display_name + + +class NameAddr(TokenList): + + token_type = 'name-addr' + + @property + def display_name(self): + if len(self) == 1: + return None + return self[0].display_name + + @property + def local_part(self): + return self[-1].local_part + + @property + def domain(self): + return self[-1].domain + + @property + def route(self): + return self[-1].route + + @property + def addr_spec(self): + return self[-1].addr_spec + + +class AngleAddr(TokenList): + + token_type = 'angle-addr' + + @property + def local_part(self): + for x in self: + if x.token_type == 'addr-spec': + return x.local_part + + @property + def domain(self): + for x in self: + if x.token_type == 'addr-spec': + return x.domain + + @property + def route(self): + for x in self: + if x.token_type == 'obs-route': + return x.domains + + @property + def addr_spec(self): + for x in self: + if x.token_type == 'addr-spec': + return x.addr_spec + + +class ObsRoute(TokenList): + + token_type = 'obs-route' + + @property + def domains(self): + return [x.domain for x in self if x.token_type == 'domain'] + + +class Mailbox(TokenList): + + token_type = 'mailbox' + + @property + def display_name(self): + if self[0].token_type == 'name-addr': + return self[0].display_name + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + return self[0].domain + + @property + def route(self): + if self[0].token_type == 'name-addr': + return self[0].route + + @property + def addr_spec(self): + return self[0].addr_spec + + +class InvalidMailbox(TokenList): + + token_type = 'invalid-mailbox' + + @property + def display_name(self): + return None + + local_part = domain = route = addr_spec = display_name + + +class Domain(TokenList): + + token_type = 'domain' + + @property + def domain(self): + return ''.join(super().value.split()) + + +class DotAtom(TokenList): + + token_type = 'dot-atom' + + +class DotAtomText(TokenList): + + token_type = 'dot-atom-text' + + +class AddrSpec(TokenList): + + token_type = 'addr-spec' + + @property + def local_part(self): + return self[0].local_part + + @property + def domain(self): + if len(self) < 3: + return None + return self[-1].domain + + @property + def value(self): + if len(self) < 3: + return self[0].value + return self[0].value.rstrip()+self[1].value+self[2].value.lstrip() + + @property + def addr_spec(self): + nameset = set(self.local_part) + if len(nameset) > len(nameset-ATOM_ENDS): + lp = quote_string(self.local_part) + else: + lp = self.local_part + if self.domain is not None: + return lp + '@' + self.domain + return lp + + +class ObsLocalPart(TokenList): + + token_type = 'obs-local-part' + + +class DisplayName(TokenList): + + token_type = 'display-name' + + @property + def display_name(self): + res = TokenList(self) + if res[0].token_type == 'cfws': + res.pop(0) + else: + if res[0][0].token_type == 'cfws': + res[0] = TokenList(res[0][1:]) + if res[-1].token_type == 'cfws': + res.pop() + else: + if res[-1][-1].token_type == 'cfws': + res[-1] = TokenList(res[-1][:-1]) + return res.value + + @property + def value(self): + quote = False + if self.defects: + quote = True + else: + for x in self: + if x.token_type == 'quoted-string': + quote = True + if quote: + pre = post = '' + if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + pre = ' ' + if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + post = ' ' + return pre+quote_string(self.display_name)+post + else: + return super().value + + +class LocalPart(TokenList): + + token_type = 'local-part' + + @property + def value(self): + if self[0].token_type == "quoted-string": + return self[0].quoted_value + else: + return self[0].value + + @property + def local_part(self): + res = TokenList(self[0]) + if res[0].token_type == 'cfws': + res.pop(0) + else: + if res[0][0].token_type == 'cfws': + res[0] = TokenList(res[0][1:]) + if res[-1].token_type == 'cfws': + res.pop() + else: + if res[-1][-1].token_type == 'cfws': + res[-1] = TokenList(res[-1][:-1]) + return res.value + + +class DomainLiteral(TokenList): + + token_type = 'domain-literal' + + @property + def domain(self): + return ''.join(super().value.split()) + + @property + def ip(self): + for x in self: + if x.token_type == 'ptext': + return x.value + + +# +# Terminal classes and instances +# + +class Terminal(str): + + def __new__(cls, value, token_type): + self = super().__new__(cls, value) + self.token_type = token_type + self.defects = [] + return self + + def __repr__(self): + return "{}({})".format(self.__class__.__name__, super().__repr__()) + + @property + def all_defects(self): + return list(self.defects) + + def pprint(self, indent=''): + print("{}{}/{}({})".format( + indent, + self.__class__.__name__, + self.token_type, + super().__repr__())) + + +class WhiteSpaceTerminal(Terminal): + + @property + def value(self): + return ' ' + + +class ValueTerminal(Terminal): + + @property + def value(self): + return self + + +# XXX these need to become classes and used as instances so +# that a program can't change them in a parse tree and screw +# up other parse trees. Maybe should have tests for that, too. +DOT = ValueTerminal('.', 'dot') +ListSeparator = ValueTerminal(',', 'list-separator') +RouteComponentMarker = ValueTerminal('@', 'route-component-marker') + +# +# Parser +# + +"""Parse strings according to RFC822/2047/2822/5322 rules. + +This is a stateless parser. Each get_XXX function accepts a string and +returns either a Terminal or a TokenList representing the RFC object named +by the method and a string containing the remaining unparsed characters +from the input. Thus a parser method consumes the next syntactic construct +of a given type and returns a token representing the construct plus the +unparsed remainder of the input string. + +For example, if the first element of a structured header is a 'phrase', +then: + + phrase, value = get_phrase(value) + +returns the complete phrase from the start of the string value, plus any +characters left in the string after the phrase is removed. + +""" + +_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split +_non_atom_end_matcher = re.compile(r"[^{}]+".format( + ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']','\]'))).match +_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall + +def _sanitize(string): + # Turn any escaped bytes into unicode 'unknown' char. + original_bytes = string.encode('ascii', 'surrogateescape') + return original_bytes.decode('ascii', 'replace') + +def _validate_xtext(xtext): + """If input token contains ASCII non-printables, register a defect.""" + + non_printables = _non_printable_finder(xtext) + if non_printables: + xtext.defects.append(errors.NonPrintableDefect(non_printables)) + +def _get_ptext_to_endchars(value, endchars): + """Scan printables/quoted-pairs until endchars and return unquoted ptext. + + This function turns a run of qcontent, ccontent-without-comments, or + dtext-with-quoted-printables into a single string by unquoting any + quoted printables. It returns the string, the remaining value, and + a flag that is True iff there were any quoted printables decoded. + + """ + fragment, *remainder = _wsp_splitter(value, 1) + vchars = [] + escape = False + had_qp = False + for pos in range(len(fragment)): + if fragment[pos] == '\\': + if escape: + escape = False + had_qp = True + else: + escape = True + continue + if escape: + escape = False + elif fragment[pos] in endchars: + break + vchars.append(fragment[pos]) + else: + pos = pos + 1 + return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp + +def _decode_ew_run(value): + """ Decode a run of RFC2047 encoded words. + + _decode_ew_run(value) -> (text, value, defects) + + Scans the supplied value for a run of tokens that look like they are RFC + 2047 encoded words, decodes those words into text according to RFC 2047 + rules (whitespace between encoded words is discarded), and returns the text + and the remaining value (including any leading whitespace on the remaining + value), as well as a list of any defects encoutered while decoding. The + input value may not have any leading whitespace. + + """ + res = [] + defects = [] + last_ws = '' + while value: + try: + tok, ws, value = _wsp_splitter(value, 1) + except ValueError: + tok, ws, value = value, '', '' + if not (tok.startswith('=?') and tok.endswith('?=')): + return ''.join(res), last_ws + tok + ws + value, defects + text, new_defects = _ew.decode(tok) + res.append(text) + defects.extend(new_defects) + last_ws = ws + return ''.join(res), last_ws, defects + +def get_unstructured(value): + """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct + obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS) + obs-utext = %d0 / obs-NO-WS-CTL / LF / CR + + obs-NO-WS-CTL is control characters except WSP/CR/LF. + + So, basically, we have printable runs, plus control characters or nulls in + the obsolete syntax, separated by whitespace. Since RFC 2047 uses the + obsolete syntax in its specification, but requires whitespace on either + side of the encoded words, I can see no reason to need to separate the + non-printable-non-whitespace from the printable runs if they occur, so we + parse this into xtext tokens separated by WSP tokens. + + Because an 'unstructured' value must by definition constitute the entire + value, this 'get' routine does not return a remaining value, only the + parsed TokenList. + + """ + # XXX: but what about bare CR and LF? They might signal the start or + # end of an encoded word. + + # The dance with the ws is so that we combine leading ws that is encoded + # in an encoded word with trailing whitespace leading up to it. Not + # strictly necessary, but probably what the composer intended. + unstructured = UnstructuredTokenList() + last_ws = '' + while value: + try: + tok, ws, value = _wsp_splitter(value, 1) + except ValueError: + tok, ws, value = value, '', '' + if not tok: + if ws: + last_ws += ws + if not value: + break + continue + if tok.startswith('=?') and tok.endswith('?='): + text, value, defects = _decode_ew_run(tok + ws + value) + value = text + value + unstructured.defects.extend(defects) + else: + if last_ws: + unstructured.append(WhiteSpaceTerminal(last_ws, 'fws')) + if utils._has_surrogates(tok): + tok = _sanitize(tok) + vtext = ValueTerminal(tok, 'vtext') + _validate_xtext(vtext) + unstructured.append(vtext) + last_ws = ws + if last_ws: + unstructured.append(WhiteSpaceTerminal(last_ws, 'fws')) + return unstructured + +def get_qp_ctext(value): + """ctext = + + This is not the RFC ctext, since we are handling nested comments in comment + and unquoting quoted-pairs here. We allow anything except the '()' + characters, but if we find any ASCII other than the RFC defined printable + ASCII an NonPrintableDefect is added to the token's defects list. Since + quoted pairs are converted to their unquoted values, what is returned is + a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value + is ' '. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '()') + ptext = WhiteSpaceTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_qcontent(value): + """qcontent = qtext / quoted-pair + + We allow anything except the DQUOTE character, but if we find any ASCII + other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Any quoted pairs are converted to their + unquoted values, so what is returned is a 'ptext' token. In this case it + is a ValueTerminal. + + """ + ptext, value, _ = _get_ptext_to_endchars(value, '"') + ptext = ValueTerminal(ptext, 'ptext') + _validate_xtext(ptext) + return ptext, value + +def get_atext(value): + """atext = + + We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to + the token's defects list if we find non-atext characters. + """ + m = _non_atom_end_matcher(value) + if not m: + raise errors.HeaderParseError( + "expected atext but found '{}'".format(value)) + atext = m.group() + value = value[len(atext):] + atext = ValueTerminal(atext, 'atext') + _validate_xtext(atext) + return atext, value + +def get_fws(value): + """FWS = 1*WSP + + This isn't the RFC definition. We're using fws to represent tokens where + folding can be done, but when we are parsing the *un*folding has already + been done so we don't need to watch out for CRLF. + + """ + newvalue = value.lstrip() + fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') + return fws, newvalue + +def get_bare_quoted_string(value): + """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE + + A quoted-string without the leading or trailing white space. Its + value is the text between the quote marks, with whitespace + preserved and quoted pairs decoded. + """ + if value[0] != '"': + raise errors.HeaderParseError( + "expected '\"' but found '{}'".format(value)) + bare_quoted_string = BareQuotedString() + value = value[1:] + while value and value[0] != '"': + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_qcontent(value) + bare_quoted_string.append(token) + if not value: + bare_quoted_string.defects.append(errors.InvalidHeaderDefect( + "end of header inside quoted string")) + return bare_quoted_string, value + return bare_quoted_string, value[1:] + +def get_comment(value): + """comment = "(" *([FWS] ccontent) [FWS] ")" + ccontent = ctext / quoted-pair / comment + + We handle nested comments here, and quoted-pair in our qp-ctext routine. + """ + if value and value[0] != '(': + raise errors.HeaderParseError( + "expected '(' but found '{}'".format(value)) + comment = Comment() + value = value[1:] + while value and value[0] != ")": + if value[0] in WSP: + token, value = get_fws(value) + elif value[0] == '(': + token, value = get_comment(value) + else: + token, value = get_qp_ctext(value) + comment.append(token) + if not value: + comment.defects.append(errors.InvalidHeaderDefect( + "end of header inside comment")) + return comment, value + return comment, value[1:] + +def get_cfws(value): + """CFWS = (1*([FWS] comment) [FWS]) / FWS + + """ + cfws = CFWSList() + while value and value[0] in CFWS_LEADER: + if value[0] in WSP: + token, value = get_fws(value) + else: + token, value = get_comment(value) + cfws.append(token) + return cfws, value + +def get_quoted_string(value): + """quoted-string = [CFWS] [CFWS] + + 'bare-quoted-string' is an intermediate class defined by this + parser and not by the RFC grammar. It is the quoted string + without any attached CFWS. + """ + quoted_string = QuotedString() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + token, value = get_bare_quoted_string(value) + quoted_string.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + quoted_string.append(token) + return quoted_string, value + +def get_atom(value): + """atom = [CFWS] 1*atext [CFWS] + + """ + atom = Atom() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + if value and value[0] in ATOM_ENDS: + raise errors.HeaderParseError( + "expected atom but found '{}'".format(value)) + token, value = get_atext(value) + atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + atom.append(token) + return atom, value + +def get_dot_atom_text(value): + """ dot-text = 1*atext *("." 1*atext) + + """ + dot_atom_text = DotAtomText() + if not value or value[0] in ATOM_ENDS: + raise errors.HeaderParseError("expected atom at a start of " + "dot-atom-text but found '{}'".format(value)) + while value and value[0] not in ATOM_ENDS: + token, value = get_atext(value) + dot_atom_text.append(token) + if value and value[0] == '.': + dot_atom_text.append(DOT) + value = value[1:] + if dot_atom_text[-1] is DOT: + raise errors.HeaderParseError("expected atom at end of dot-atom-text " + "but found '{}'".format('.'+value)) + return dot_atom_text, value + +def get_dot_atom(value): + """ dot-atom = [CFWS] dot-atom-text [CFWS] + + """ + dot_atom = DotAtom() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + token, value = get_dot_atom_text(value) + dot_atom.append(token) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + dot_atom.append(token) + return dot_atom, value + +def get_word(value): + """word = atom / quoted-string + + Either atom or quoted-string may start with CFWS. We have to peel off this + CFWS first to determine which type of word to parse. Afterward we splice + the leading CFWS, if any, into the parsed sub-token. + + If neither an atom or a quoted-string is found before the next special, a + HeaderParseError is raised. + + The token returned is either an Atom or a QuotedString, as appropriate. + This means the 'word' level of the formal grammar is not represented in the + parse tree; this is because having that extra layer when manipulating the + parse tree is more confusing than it is helpful. + + """ + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + else: + leader = None + if value[0]=='"': + token, value = get_quoted_string(value) + elif value[0] in SPECIALS: + raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' " + "but found '{}'".format(value)) + else: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + return token, value + +def get_phrase(value): + """ phrase = 1*word / obs-phrase + obs-phrase = word *(word / "." / CFWS) + + This means a phrase can be a sequence of words, periods, and CFWS in any + order as long as it starts with at least one word. If anything other than + words is detected, an ObsoleteHeaderDefect is added to the token's defect + list. We also accept a phrase that starts with CFWS followed by a dot; + this is registered as an InvalidHeaderDefect, since it is not supported by + even the obsolete grammar. + + """ + phrase = Phrase() + try: + token, value = get_word(value) + phrase.append(token) + except errors.HeaderParseError: + phrase.defects.append(errors.InvalidHeaderDefect( + "phrase does not start with word")) + while value and value[0] not in PHRASE_ENDS: + if value[0]=='.': + phrase.append(DOT) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "period in 'phrase'")) + value = value[1:] + else: + try: + token, value = get_word(value) + except errors.HeaderParseError: + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "comment found without atom")) + else: + raise + phrase.append(token) + return phrase, value + +def get_local_part(value): + """ local-part = dot-atom / quoted-string / obs-local-part + obs-local-part = word *("." word) + + """ + local_part = LocalPart() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected local-part but found '{}'".format(value)) + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + token, value = get_word(value) + if leader is not None: + token[:0] = [leader] + local_part.append(token) + if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + obs_local_part = ObsLocalPart() + if local_part[0].token_type == 'dot-atom': + obs_local_part[:] = local_part[0] + else: + obs_local_part.append(local_part[0]) + while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS): + if value[0] == '.': + obs_local_part.append(DOT) + value = value[1:] + else: + if value[0]=='\\': + obs_local_part.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "'\\' character outside of quoted-string/ccontent")) + continue + else: + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "missing '.' between words")) + token, value = get_word(value) + obs_local_part.append(token) + if obs_local_part.defects: + local_part.defects.append(errors.InvalidHeaderDefect( + "local-part is not dot-atom, quoted-string, or obs-local-part")) + obs_local_part.token_type = 'invalid-obs-local-part' + else: + local_part.defects.append(errors.ObsoleteHeaderDefect( + "local-part is not a dot-atom (contains CFWS)")) + local_part[0] = obs_local_part + return local_part, value + +def get_dtext(value): + """ dtext = / obs-dtext + obs-dtext = obs-NO-WS-CTL / quoted-pair + + We allow anything except the excluded characters, but but if we find any + ASCII other than the RFC defined printable ASCII an NonPrintableDefect is + added to the token's defects list. Quoted pairs are converted to their + unquoted values, so what is returned is a ptext token, in this case a + ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is + added to the returned token's defect list. + + """ + ptext, value, had_qp = _get_ptext_to_endchars(value, '[]') + ptext = ValueTerminal(ptext, 'ptext') + if had_qp: + ptext.defects.append(errors.ObsoleteHeaderDefect( + "quoted printable found in domain-literal")) + _validate_xtext(ptext) + return ptext, value + +def _check_for_early_dl_end(value, domain_literal): + if value: + return False + domain_literal.append(errors.InvalidHeaderDefect( + "end of input inside domain-literal")) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + return True + +def get_domain_literal(value): + """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] + + """ + domain_literal = DomainLiteral() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + if not value: + raise errors.HeaderParseError("expected domain-literal") + if value[0] != '[': + raise errors.HeaderParseError("expected '[' at start of domain-literal " + "but found '{}'".format(value)) + value = value[1:] + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + token, value = get_dtext(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] in WSP: + token, value = get_fws(value) + domain_literal.append(token) + if _check_for_early_dl_end(value, domain_literal): + return domain_literal, value + if value[0] != ']': + raise errors.HeaderParseError("expected ']' at end of domain-literal " + "but found '{}'".format(value)) + domain_literal.append(ValueTerminal(']', 'domain-literal-end')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + domain_literal.append(token) + return domain_literal, value + +def get_domain(value): + """ domain = dot-atom / domain-literal / obs-domain + obs-domain = atom *("." atom)) + + """ + domain = Domain() + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected domain but found '{}'".format(value)) + if value[0] == '[': + token, value = get_domain_literal(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + return domain, value + try: + token, value = get_dot_atom(value) + except errors.HeaderParseError: + token, value = get_atom(value) + if leader is not None: + token[:0] = [leader] + domain.append(token) + if value and value[0] == '.': + domain.defects.append(errors.ObsoleteHeaderDefect( + "domain is not a dot-atom (contains CFWS)")) + if domain[0].token_type == 'dot-atom': + domain[:] = domain[0] + while value and value[0] == '.': + domain.append(DOT) + token, value = get_atom(value[1:]) + domain.append(token) + return domain, value + +def get_addr_spec(value): + """ addr-spec = local-part "@" domain + + """ + addr_spec = AddrSpec() + token, value = get_local_part(value) + addr_spec.append(token) + if not value or value[0] != '@': + #raise errors.HeaderParseError( + # "expected @domain but found '{}'".format(value)) + addr_spec.defects.append(errors.InvalidHeaderDefect( + "add-spec local part with no domain")) + return addr_spec, value + addr_spec.append(ValueTerminal('@', 'address-at-symbol')) + token, value = get_domain(value[1:]) + addr_spec.append(token) + return addr_spec, value + +def get_obs_route(value): + """ obs-route = obs-domain-list ":" + obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain]) + + Returns an obs-route token with the appropriate sub-tokens (that is, + there is no obs-domain-list in the parse tree). + """ + obs_route = ObsRoute() + while value and (value[0]==',' or value[0] in CFWS_LEADER): + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + elif value[0] == ',': + obs_route.append(ListSeparator) + value = value[1:] + if not value or value[0] != '@': + raise errors.HeaderParseError( + "expected obs-route domain but found '{}'".format(value)) + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + while value and value[0]==',': + obs_route.append(ListSeparator) + value = value[1:] + if not value: + break + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + obs_route.append(token) + if value[0] == '@': + obs_route.append(RouteComponentMarker) + token, value = get_domain(value[1:]) + obs_route.append(token) + if not value: + raise errors.HeaderParseError("end of header while parsing obs-route") + if value[0] != ':': + raise errors.HeaderParseError( "expected ':' marking end of " + "obs-route but found '{}'".format(value)) + obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker')) + return obs_route, value[1:] + +def get_angle_addr(value): + """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr + obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS] + + """ + angle_addr = AngleAddr() + if value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected angle-addr but found '{}'".format(value)) + angle_addr.append(ValueTerminal('<', 'angle-addr-start')) + value = value[1:] + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + try: + token, value = get_obs_route(value) + angle_addr.defects.append(errors.ObsoleteHeaderDefect( + "obsolete route specification in angle-addr")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected addr-spec or but found '{}'".format(value)) + angle_addr.append(token) + token, value = get_addr_spec(value) + angle_addr.append(token) + if value and value[0] == '>': + value = value[1:] + else: + angle_addr.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on angle-addr")) + angle_addr.append(ValueTerminal('>', 'angle-addr-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + angle_addr.append(token) + return angle_addr, value + +def get_display_name(value): + """ display-name = phrase + + Because this is simply a name-rule, we don't return a display-name + token containing a phrase, but rather a display-name token with + the content of the phrase. + + """ + display_name = DisplayName() + token, value = get_phrase(value) + display_name.extend(token[:]) + display_name.defects = token.defects[:] + return display_name, value + + +def get_name_addr(value): + """ name-addr = [display-name] angle-addr + + """ + name_addr = NameAddr() + # Both the optional display name and the angle-addr can start with cfws. + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(leader)) + if value[0] != '<': + if value[0] in PHRASE_ENDS: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) + token, value = get_display_name(value) + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(token)) + if leader is not None: + token[0][:0] = [leader] + leader = None + name_addr.append(token) + token, value = get_angle_addr(value) + if leader is not None: + token[:0] = [leader] + name_addr.append(token) + return name_addr, value + +def get_mailbox(value): + """ mailbox = name-addr / addr-spec + + """ + # The only way to figure out if we are dealing with a name-addr or an + # addr-spec is to try parsing each one. + mailbox = Mailbox() + try: + token, value = get_name_addr(value) + except errors.HeaderParseError: + try: + token, value = get_addr_spec(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected mailbox but found '{}'".format(value)) + if any(isinstance(x, errors.InvalidHeaderDefect) + for x in token.all_defects): + mailbox.token_type = 'invalid-mailbox' + mailbox.append(token) + return mailbox, value + +def get_invalid_mailbox(value, endchars): + """ Read everything up to one of the chars in endchars. + + This is outside the formal grammar. The InvalidMailbox TokenList that is + returned acts like a Mailbox, but the data attributes are None. + + """ + invalid_mailbox = InvalidMailbox() + while value and value[0] not in endchars: + if value[0] in PHRASE_ENDS: + invalid_mailbox.append(ValueTerminal(value[0], + 'misplaced-special')) + value = value[1:] + else: + token, value = get_phrase(value) + invalid_mailbox.append(token) + return invalid_mailbox, value + +def get_mailbox_list(value): + """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS]) + + For this routine we go outside the formal grammar in order to improve error + handling. We recognize the end of the mailbox list only at the end of the + value or at a ';' (the group terminator). This is so that we can turn + invalid mailboxes into InvalidMailbox tokens and continue parsing any + remaining valid mailboxes. We also allow all mailbox entries to be null, + and this condition is handled appropriately at a higher level. + + """ + mailbox_list = MailboxList() + while value and value[0] != ';': + try: + token, value = get_mailbox(value) + mailbox_list.append(token) + except errors.HeaderParseError: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] in ',;': + mailbox_list.append(leader) + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + elif value[0] == ',': + mailbox_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in mailbox-list")) + else: + token, value = get_invalid_mailbox(value, ',;') + if leader is not None: + token[:0] = [leader] + mailbox_list.append(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] not in ',;': + # Crap after mailbox; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = mailbox_list[-1] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',;') + mailbox.extend(token) + mailbox_list.defects.append(errors.InvalidHeaderDefect( + "invalid mailbox in mailbox-list")) + if value and value[0] == ',': + mailbox_list.append(ListSeparator) + value = value[1:] + return mailbox_list, value + + +def get_group_list(value): + """ group-list = mailbox-list / CFWS / obs-group-list + obs-group-list = 1*([CFWS] ",") [CFWS] + + """ + group_list = GroupList() + if not value: + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header before group-list")) + return group_list, value + leader = None + if value and value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value: + # This should never happen in email parsing, since CFWS-only is a + # legal alternative to group-list in a group, which is the only + # place group-list appears. + group_list.defects.append(errors.InvalidHeaderDefect( + "end of header in group-list")) + group_list.append(leader) + return group_list, value + if value[0] == ';': + group_list.append(leader) + return group_list, value + token, value = get_mailbox_list(value) + if len(token.mailboxes)==0: + if leader is not None: + group_list.append(leader) + group_list.extend(token) + group_list.defects.append(errors.ObsoleteHeaderDefect( + "group-list with empty entries")) + return group_list, value + if leader is not None: + token[:0] = [leader] + group_list.append(token) + return group_list, value + +def get_group(value): + """ group = display-name ":" [group-list] ";" [CFWS] + + """ + group = Group() + token, value = get_display_name(value) + if not value or value[0] != ':': + raise errors.HeaderParseError("expected ':' at end of group " + "display name but found '{}'".format(value)) + group.append(token) + group.append(ValueTerminal(':', 'group-display-name-terminator')) + value = value[1:] + if value and value[0] == ';': + group.append(ValueTerminal(';', 'group-terminator')) + return group, value[1:] + token, value = get_group_list(value) + group.append(token) + if not value: + group.defects.append(errors.InvalidHeaderDefect( + "end of header in group")) + if value[0] != ';': + raise errors.HeaderParseError( + "expected ';' at end of group but found {}".format(value)) + group.append(ValueTerminal(';', 'group-terminator')) + value = value[1:] + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + group.append(token) + return group, value + +def get_address(value): + """ address = mailbox / group + + Note that counter-intuitively, an address can be either a single address or + a list of addresses (a group). This is why the returned Address object has + a 'mailboxes' attribute which treats a single address as a list of length + one. When you need to differentiate between to two cases, extract the single + element, which is either a mailbox or a group token. + + """ + # The formal grammar isn't very helpful when parsing an address. mailbox + # and group, especially when allowing for obsolete forms, start off very + # similarly. It is only when you reach one of @, <, or : that you know + # what you've got. So, we try each one in turn, starting with the more + # likely of the two. We could perhaps make this more efficient by looking + # for a phrase and then branching based on the next character, but that + # would be a premature optimization. + address = Address() + try: + token, value = get_group(value) + except errors.HeaderParseError: + try: + token, value = get_mailbox(value) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected address but found '{}'".format(value)) + address.append(token) + return address, value + +def get_address_list(value): + """ address_list = (address *("," address)) / obs-addr-list + obs-addr-list = *([CFWS] ",") address *("," [address / CFWS]) + + We depart from the formal grammar here by continuing to parse until the end + of the input, assuming the input to be entirely composed of an + address-list. This is always true in email parsing, and allows us + to skip invalid addresses to parse additional valid ones. + + """ + address_list = AddressList() + while value: + try: + token, value = get_address(value) + address_list.append(token) + except errors.HeaderParseError as err: + leader = None + if value[0] in CFWS_LEADER: + leader, value = get_cfws(value) + if not value or value[0] == ',': + address_list.append(leader) + address_list.defects.append(errors.ObsoleteHeaderDefect( + "address-list entry with no content")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + elif value[0] == ',': + address_list.defects.append(errors.ObsoleteHeaderDefect( + "empty element in address-list")) + else: + token, value = get_invalid_mailbox(value, ',') + if leader is not None: + token[:0] = [leader] + address_list.append(Address([token])) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value and value[0] != ',': + # Crap after address; treat it as an invalid mailbox. + # The mailbox info will still be available. + mailbox = address_list[-1][0] + mailbox.token_type = 'invalid-mailbox' + token, value = get_invalid_mailbox(value, ',') + mailbox.extend(token) + address_list.defects.append(errors.InvalidHeaderDefect( + "invalid address in address-list")) + if value: # Must be a , at this point. + address_list.append(ValueTerminal(',', 'list-separator')) + value = value[1:] + return address_list, value diff -r 7520f1bf0a81 -r b22698463737 Lib/email/charset.py --- a/Lib/email/charset.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/charset.py Tue Jul 19 13:24:56 2011 -0400 @@ -11,11 +11,10 @@ from functools import partial -import email.base64mime -import email.quoprimime - from email import errors -from email.encoders import encode_7or8bit +from email import encoders +from email import base64mime +from email import quoprimime @@ -271,7 +270,7 @@ elif self.body_encoding == BASE64: return 'base64' else: - return encode_7or8bit + return encoders.encode_7or8bit def get_output_charset(self): """Return the output character set. @@ -364,16 +363,16 @@ def _get_encoder(self, header_bytes): if self.header_encoding == BASE64: - return email.base64mime + return base64mime elif self.header_encoding == QP: - return email.quoprimime + return quoprimime elif self.header_encoding == SHORTEST: - len64 = email.base64mime.header_length(header_bytes) - lenqp = email.quoprimime.header_length(header_bytes) + len64 = base64mime.header_length(header_bytes) + lenqp = quoprimime.header_length(header_bytes) if len64 < lenqp: - return email.base64mime + return base64mime else: - return email.quoprimime + return quoprimime else: return None @@ -390,9 +389,9 @@ if self.body_encoding is BASE64: if isinstance(string, str): string = string.encode(self.output_charset) - return email.base64mime.body_encode(string) + return base64mime.body_encode(string) elif self.body_encoding is QP: - return email.quoprimime.body_encode(string) + return quoprimime.body_encode(string) else: if isinstance(string, str): string = string.encode(self.output_charset).decode('ascii') diff -r 7520f1bf0a81 -r b22698463737 Lib/email/errors.py --- a/Lib/email/errors.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/errors.py Tue Jul 19 13:24:56 2011 -0400 @@ -58,3 +58,48 @@ class InvalidMultipartContentTransferEncodingDefect(MessageDefect): """An invalid content transfer encoding was set on the multipart itself.""" + +class DuplicateHeaderDefect(MessageDefect): + """Multiple instances of a required-to-be-unique header were found""" + + def __init__(self, header_name): + super().__init__(header_name) + self.header_name = header_name.lower() + + def __str__(self): + return ("there may only be one '{}' header per " + "message").format(self.arg[0]) + +# These errors are specific to header parsing. + +class HeaderDefect(Exception): + """Base class for a header defect.""" + +class InvalidHeaderDefect(HeaderDefect): + """Header is not valid, message gives details.""" + +class HeaderMissingRequiredValue(HeaderDefect): + """A header that must have a value had none""" + +class NonPrintableDefect(HeaderDefect): + """ASCII characters outside the ascii-printable range found""" + + def __init__(self, non_printables): + super().__init__(non_printables) + self.non_printables = non_printables + + def __str__(self): + return ("the following ASCII non-printables found in header: " + "{}".format(self.non_printables)) + +class ObsoleteHeaderDefect(HeaderDefect): + """Header uses syntax declared obsolete by RFC 5322""" + +class UndecodableBytesDefect(HeaderDefect): + """Header contained bytes that could not be decoded""" + +class InvalidBase64PaddingDefect(HeaderDefect): + """base64 encoded sequence had an incorrect length""" + +class InvalidBase64CharactersDefect(HeaderDefect): + """base64 encoded sequence had characters not in base64 alphabet""" diff -r 7520f1bf0a81 -r b22698463737 Lib/email/feedparser.py --- a/Lib/email/feedparser.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/feedparser.py Tue Jul 19 13:24:56 2011 -0400 @@ -22,9 +22,11 @@ __all__ = ['FeedParser'] import re +import warnings from email import errors from email import message +from email import header from email import policy NLCRE = re.compile('\r\n|\r|\n') @@ -458,9 +460,7 @@ lastvalue.append(line) continue if lastheader: - # XXX reconsider the joining of folded lines - lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') - self._cur[lastheader] = lhdr + self._append_header_to_cur(lastheader, lastvalue) lastheader, lastvalue = '', [] # Check for envelope header, i.e. unix-from if line.startswith('From '): @@ -493,8 +493,16 @@ lastvalue = [line[i+1:].lstrip()] # Done with all the lines, so handle the last header. if lastheader: - # XXX reconsider the joining of folded lines - self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n') + self._append_header_to_cur(lastheader, lastvalue) + + def _append_header_to_cur(self, name, linelist): + value = EMPTYSTRING.join(linelist).rstrip('\r\n') + unfolded = EMPTYSTRING.join([x.rstrip('\r\n') for x in linelist]) + header = self.policy.make_header(name, value, unfolded) + if header.max_count == 1 and name in self._cur: + self.policy.handle_defect(self._cur, + errors.DuplicateHeaderDefect(name)) + self._cur.append(header) class BytesFeedParser(FeedParser): diff -r 7520f1bf0a81 -r b22698463737 Lib/email/generator.py --- a/Lib/email/generator.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/generator.py Tue Jul 19 13:24:56 2011 -0400 @@ -16,7 +16,7 @@ from email import policy from email.header import Header from email.message import _has_surrogates -import email.charset as _charset +from email import charset as _charset UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. @@ -181,6 +181,7 @@ self.write(v.encode( maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL) else: + v = v.source if v.source is not None else v # Header's got lots of smarts, so use it. header = Header(v, maxlinelen=self._maxheaderlen, header_name=h) @@ -370,9 +371,10 @@ def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. - for h, v in msg._headers: - self.write('%s: ' % h) - if isinstance(v, str): + for h in msg._headers: + self.write('%s: ' % h.name) + if isinstance(h, str): + v = h.source if h.source is not None else h if _has_surrogates(v): if not self.policy.must_be_7bit: # If we have raw 8bit data in a byte string, we have no idea @@ -383,9 +385,9 @@ # be to not split the string and risk it being too long. self.write(v+NL) continue - h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h) + h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h.name) else: - h = Header(v, header_name=h) + h = Header(v, header_name=h.name) self.write(h.encode(linesep=self._NL, maxlinelen=self._maxheaderlen)+self._NL) # A blank line always separates headers from body diff -r 7520f1bf0a81 -r b22698463737 Lib/email/header.py --- a/Lib/email/header.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/header.py Tue Jul 19 13:24:56 2011 -0400 @@ -12,12 +12,15 @@ import re import binascii +import warnings +import datetime -import email.quoprimime -import email.base64mime - -from email.errors import HeaderParseError +from email import utils +from email import errors from email import charset as _charset +from email import quoprimime +from email import base64mime +from email import _header_value_parser as parser Charset = _charset.Charset NL = '\n' @@ -55,7 +58,7 @@ # Helpers -_max_append = email.quoprimime._max_append +_max_append = quoprimime._max_append @@ -104,16 +107,16 @@ # This is an unencoded word. decoded_words.append((encoded_string, charset)) elif encoding == 'q': - word = email.quoprimime.header_decode(encoded_string) + word = quoprimime.header_decode(encoded_string) decoded_words.append((word, charset)) elif encoding == 'b': paderr = len(encoded_string) % 4 # Postel's law: add missing padding if paderr: encoded_string += '==='[:4 - paderr] try: - word = email.base64mime.decode(encoded_string) + word = base64mime.decode(encoded_string) except binascii.Error: - raise HeaderParseError('Base64 decoding error') + raise errors.HeaderParseError('Base64 decoding error') else: decoded_words.append((word, charset)) else: @@ -194,9 +197,7 @@ errors is passed through to the .append() call. """ - if charset is None: - charset = USASCII - elif not isinstance(charset, Charset): + if charset is not None and not isinstance(charset, Charset): charset = Charset(charset) self._charset = charset self._continuation_ws = continuation_ws @@ -211,6 +212,7 @@ else: # Take the separating colon and space into account. self._headerlen = len(header_name) + 2 + self.name = header_name def __str__(self): """Return the string value of the header.""" @@ -274,12 +276,22 @@ elif not isinstance(charset, Charset): charset = Charset(charset) if not isinstance(s, str): + if charset is None: + charset = USASCII input_charset = charset.input_codec or 'us-ascii' if input_charset == _charset.UNKNOWN8BIT: s = s.decode('us-ascii', 'surrogateescape') else: s = s.decode(input_charset, errors) - # Ensure that the bytes we're storing can be decoded to the output + elif charset is None: + # If there are non-ASCII characters and no specified encoding, + # default to utf-8. + try: + s.encode('us-ascii') + charset = USASCII + except UnicodeEncodeError: + charset = UTF8 + # Ensure that the bytes we're storing can be encoded to the output # character set, otherwise an early error is thrown. output_charset = charset.output_codec or 'us-ascii' if output_charset != _charset.UNKNOWN8BIT: @@ -349,7 +361,7 @@ formatter.add_transition() value = formatter._str(linesep) if _embeded_header.search(value): - raise HeaderParseError("header value appears to contain " + raise errors.HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value @@ -539,3 +551,379 @@ def part_count(self): return super().__len__() + + +#### New style Headers #### + +# Address Support Classes # + +class Address(str): + + def __new__(cls, value, name, username, domain, defects): + self = str.__new__(cls, value) + self._name = name if name is not None else '' + self._username = username if username is not None else '' + self._domain = domain if domain is not None else '' + self._defects = tuple(defects) + return self + + @property + def name(self): + return self._name + + @property + def username(self): + return self._username + + @property + def domain(self): + return self._domain + + @property + def defects(self): + return self._defects + + @property + def addr_spec(self): + nameset = set(self.username) + if len(nameset) > len(nameset-parser.ATOM_ENDS): + lp = parser.quote_string(self.username) + else: + lp = self.username + if self.domain: + return lp + '@' + self.domain + return lp + + @property + def reformatted(self): + nameset = set(self.name) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(self.name) + else: + disp = self.name + if disp: + return "{} <{}>".format(disp, self.addr_spec) + return self.addr_spec if self.addr_spec else '' + + def __getnewargs__(self): + return (str(self), self.name, self.username, self.domain, self.defects) + + +class Group(str): + + def __new__(cls, value, name, addresses): + self = str.__new__(cls, value) + self._name = name + self._addresses = tuple(addresses) + return self + + @property + def name(self): + return self._name + + @property + def addresses(self): + return self._addresses + + def __getnewargs__(self): + return (str(self), self.name, self.addresses) + + +# Header Classes # + +class BaseHeader(str): + + """Base class for message headers. + + Implements generic behavior and provides tools for subclasses. + + A subclass must define a classmethod named 'parse' that takes an unfolded + value string and a dictionary as its arguments. The dictionary will + contain one key, 'defects', initialized to an empty list. After the call + the dictionary must contain an additional key, 'decoded', set to the string + value of the idealized representation of the data from the value. (That + is, encoded words are decoded, and values that have canonical + representations are so represented.) + + The defects key is intended to collect parsing defects, which the message + parser will subsequently dispose of as appropriate. The parser should not, + insofar as practical, raise any errors. Defects should be added to the + list instead. The standard header parsers register defects for RFC + compliance issues, for obsolete RFC syntax, and for unrecoverable parsing + errors. + + The parse method may add additional keys to the dictionary. In this case + the subclass must define an 'init' method, which will be passed the + dictionary as its keyword arguments. The method should use (usually by + setting them as the value of similarly named attributes) and remove all the + extra keys added by its parse method, and then use super to call its parent + class with the remaining arguments and keywords. + + The subclass should also make sure that a 'max_count' attribute is defined + that is either None or 1. XXX: need to better define this API. + + """ + + def __new__(cls, name, unparsed, unfolded=None, *, use_decoded=False): + # If unfolded is None then 'unparsed' comes from an application + # program, if it is not None then unfolded comes from a parse of some + # sort of source data, and we save unparsed as the source value. When the + # unparsed value comes from an application program it may not be a string. + # + # XXX: Currently we can have up to three copies of the header value. + # We can reduce this to two copies plus some metadata, at a slight cost + # in speed, but to do so now would be a premature optimization. + if isinstance(unparsed, str): + lines = unparsed.splitlines() + has_ec = ecre.search(unparsed) + source = unparsed + if len(lines) > 1 or has_ec: + # Deprecation plan: in 3.4 direct assignment of a folded/encoded + # value becomes an error, so the only way to set one as the value + # is to specify the unfolded arg. In 3.4 the value being the decoded + # header will be the default, and has_ec can go away here. + if unfolded is None: + source = None + warnings.warn("Direct assignment of header values containing " + "linesep characters or encoded words is deprecated.", + DeprecationWarning, 5) + unfolded = ''.join(lines) + elif unfolded is None: + source = None + unfolded = unparsed + else: + has_ec = False + use_decoded = True + source = None + unfolded = unparsed + kwds = {'defects': []} + cls.parse(unfolded, kwds) + self = str.__new__(cls, kwds['decoded'] if use_decoded else unparsed) + self.init(name, source=source, **kwds) + return self + + def init(self, name, *, source, decoded, defects): + self._name = name + self._source = source + self._value = decoded + self._defects = defects + + @property + def name(self): + return self._name + + @property + def source(self): + return self._source + + @property + def value(self): + return self._value + + @property + def defects(self): + return tuple(self._defects) + + def __reduce__(self): + return ( + _reconstruct_header, + ( + self.__class__.__name__, + self.__class__.__bases__, + str(self), + ), + self.__dict__) + + @classmethod + def _reconstruct(cls, value): + return str.__new__(cls, value) + + +def _reconstruct_header(cls_name, bases, value): + return type(cls_name, bases, {})._reconstruct(value) + + +class UnstructuredHeader: + + max_count = None + + @classmethod + def parse(cls, value, kwds): + kwds['decoded'] = str(parser.get_unstructured(value)) + + +class UniqueUnstructuredHeader(UnstructuredHeader): + + max_count = 1 + + +class DateHeader: + + """Header whose value consists of a single timestamp. + + Provides an additional attribute, datetime, which is either an aware + datetime using a timezone, or a naive datetime if the timezone + in the input string is -0000. Also accepts a datetime as input. + The 'value' attribute is the normalized form of the timestamp, + which means it is the output of format_datetime on the datetime. + """ + + max_count = None + + @classmethod + def parse(cls, value, kwds): + if not value: + kwds['defects'].append(errors.HeaderMissingRequiredValue()) + kwds['datetime'] = None + kwds['decoded'] = '' + return + if isinstance(value, str): + *dtuple, tz = utils.parsedate_tz(value) + value = datetime.datetime(*dtuple[:6], + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + kwds['datetime'] = value + kwds['decoded'] = utils.format_datetime(kwds['datetime']) + + def init(self, *args, **kw): + self._datetime = kw.pop('datetime') + super().init(*args, **kw) + + @property + def datetime(self): + return self._datetime + + +class UniqueDateHeader(DateHeader): + + max_count = 1 + + +class AddressHeader: + + max_count = None + + @classmethod + def parse(cls, value, kwds): + # We are translating here from the RFC language (address/mailbox) + # to our API language (group/address). + address_list, value = parser.get_address_list(value) + assert not value + #self.structured_ew_decode(address_list) + groups = [] + for addr in address_list.addresses: + groups.append(Group(str(addr).lstrip(), + addr.display_name, + [Address(str(mb).lstrip(), + mb.display_name, + mb.local_part, + mb.domain, + mb.all_defects) + for mb in addr.all_mailboxes])) + kwds['groups'] = groups + # The 'Address' object generates no defects, so all defects are on the + # mailbox objects except those on address_list itself. + kwds['defects'] = list(address_list.all_defects) + kwds['decoded'] = ', '.join([str(item) for item in groups]) + + def init(self, *args, **kw): + self._groups = kw.pop('groups') + self._flattened = tuple(self._flatten()) + super().init(*args, **kw) + + @property + def groups(self): + return tuple(self._groups) + + @property + def addresses(self): + return tuple(self._flattened) + + def _flatten(self): + for group in self._groups: + for address in group.addresses: + yield address + + +class UniqueAddressHeader(AddressHeader): + + max_count = 1 + + +class SingleAddressHeader(AddressHeader): + + @property + def address(self): + if len(self.addresses)!=1: + raise ValueError(("value of single address header {} is not " + "a single address").format(self.name)) + return self.addresses[0] + + +class UniqueSingleAddressHeader(SingleAddressHeader): + + max_count = 1 + + +# The header factory # + +_default_header_map = { + 'subject': UniqueUnstructuredHeader, + 'date': UniqueDateHeader, + 'resent-date': DateHeader, + 'orig-date': UniqueDateHeader, + 'sender': UniqueSingleAddressHeader, + 'resent-sender': SingleAddressHeader, + 'to': UniqueAddressHeader, + 'resent-to': AddressHeader, + 'cc': UniqueAddressHeader, + 'resent-cc': AddressHeader, + 'bcc': UniqueAddressHeader, + 'resent-bcc': AddressHeader, + 'from': UniqueAddressHeader, + 'resent-from': AddressHeader, + 'reply-to': UniqueAddressHeader, + } + +class HeaderFactory: + + """A header_factory and header registry.""" + + def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, + use_default_map=True): + """Create a header_factory that works with the Policy API. + + base_class is the class that will be the last class in the created + header class's __bases__ list. default_class is the class that will be + used if "name" (see __call__) does not appear in the registry. + use_default_map controls whether or not the default mapping of names to + specialized classes is copied in to the registry when the factory is + created. The default is True. + + """ + self.registry = {} + self.base_class = base_class + self.default_class = default_class + if use_default_map: + self.registry.update(_default_header_map) + + def map_to_type(self, name, cls): + """Register cls as the specialized class for handling "name" headers. + + """ + self.registry[name.lower()] = cls + + def __getitem__(self, name): + cls = self.registry.get(name.lower(), self.default_class) + return type('_'+cls.__name__, (cls, self.base_class), {}) + + def __call__(self, name, unparsed, unfolded=None, use_decoded=False): + """Create a header instance for header "name". + + Creates a header instance by creating a specialized class for parsing + and representing the specified header by combining the factory + base_class with a specialized class from the registry or the + default_class, and passing the name, unparsed, unfolded, and + use_decoded arguments to the constructed class's constructor. + + """ + return self[name](name, unparsed, unfolded, use_decoded=use_decoded) diff -r 7520f1bf0a81 -r b22698463737 Lib/email/message.py --- a/Lib/email/message.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/message.py Tue Jul 19 13:24:56 2011 -0400 @@ -17,6 +17,8 @@ from email import utils from email import errors from email import header +from email import policy +from email.utils import _has_surrogates from email import charset as _charset Charset = _charset.Charset @@ -26,12 +28,6 @@ # existence of which force quoting of the parameter value. tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') -# How to figure out if we are processing strings that come from a byte -# source with undecodable characters. -_has_surrogates = re.compile( - '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search - - # Helper functions def _sanitize_header(name, value): # If the header value contains surrogates, return a Header using @@ -120,6 +116,94 @@ return utils.unquote(value) +class _HeaderList(list): + """Manage a list of headers as a pseudo-dictionary. + + Headers have names and values. The name makes an obvious lookup key, + but some headers can appear more than once (eg: Received headers). We + also need to preserve the order of headers in order to be fully RFC + compliant. This class implements portions of the mapping interface + on top of a list of headers, with additional methods for manipulating + all headers having the same name. + """ + + def __getitem__(self, name): + """Get a header. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrence gets returned is undefined. Use get_all() to get all + the headers matching a given header field name. + """ + return self.get(name) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + self[:] = [h for h in self if h.name.lower() != name] + + def __contains__(self, name): + return name.lower() in (h.name.lower() for h in self) + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [h.name for h in self] + + def values(self): + """Return a list of all the message's headers. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [_sanitize_header(h.name, h) for h in self] + + def items(self): + """Return (name, header) tuples for all the headers in the message. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [(h.name, _sanitize_header(h.name, h)) for h in self] + + def get(self, name, failobj=None): + """Get a header. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for h in self: + if h.name.lower() == name: + return _sanitize_header(h.name, h) + return failobj + + def get_all(self, name, failobj=None): + """Return a list of all the headers named 'name'. + + These will be sorted in the order they appeared in the original + message. Any fields deleted and re-inserted are always appended to the + header list. + + If no such headers exist, failobj is returned (defaults to None). + """ + name = name.lower() + return [_sanitize_header(h.name, h) + for h in self if h.name.lower()==name] or failobj + class Message: """Basic message object. @@ -136,8 +220,13 @@ you must use the explicit API to set or get all the headers. Not all of the mapping methods are implemented. """ - def __init__(self): - self._headers = [] + + # Protect against infinite recursion in __getattr__ during unpickling. + _headers = None + + def __init__(self, policy=policy.default): + self._policy = policy + self._headers = _HeaderList() self._unixfrom = None self._payload = None self._charset = None @@ -341,20 +430,11 @@ # # MAPPING INTERFACE (partial) # - def __len__(self): - """Return the total number of headers, including duplicates.""" - return len(self._headers) - def __getitem__(self, name): - """Get a header value. - - Return None if the header is missing instead of raising an exception. - - Note that if the header appeared multiple times, exactly which - occurrence gets returned is undefined. Use get_all() to get all - the values matching a header field name. - """ - return self.get(name) + def __iter__(self): + """For backward compatibility, we return only keys on iter""" + for h in self._headers: + yield h.name def __setitem__(self, name, val): """Set the value of a header. @@ -362,90 +442,58 @@ Note: this does not overwrite an existing header with the same field name. Use __delitem__() first to delete any existing headers. """ - self._headers.append((name, val)) + if isinstance(val, header.Header): + # Ugly backward compatibility hack + val.name = name + val._headerlen = len(name) + 2 + self._headers.append(val) + else: + h = self._policy.make_header(name, val) + if h.max_count == 1 and name in self._headers: + raise ValueError( + "A message may have at most one {} header".format(name)) + self._headers.append(h) - def __delitem__(self, name): - """Delete all occurrences of a header, if present. + # Delegate other methods to _headers. Unfortunately we have to do + # the special methods explicitly. - Does not raise an exception if the header is missing. - """ - name = name.lower() - newheaders = [] - for k, v in self._headers: - if k.lower() != name: - newheaders.append((k, v)) - self._headers = newheaders + def __getattr__(self, key): + try: + return getattr(self._headers, key) + except AttributeError: + raise AttributeError("'{}' object has no attribute '{}'".format( + self.__class__.__name__, key)) - def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] + def __contains__(self, *args, **kw): + return self._headers.__contains__(*args, **kw) - def __iter__(self): - for field, value in self._headers: - yield field + def __getitem__(self, *args, **kw): + return self._headers.__getitem__(*args, **kw) - def keys(self): - """Return a list of all the message's header field names. + def __delitem__(self, *args, **kw): + return self._headers.__delitem__(*args, **kw) - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [k for k, v in self._headers] - - def values(self): - """Return a list of all the message's header values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [_sanitize_header(k, v) for k, v in self._headers] - - def items(self): - """Get all the message's header fields and values. - - These will be sorted in the order they appeared in the original - message, or were added to the message, and may contain duplicates. - Any fields deleted and re-inserted are always appended to the header - list. - """ - return [(k, _sanitize_header(k, v)) for k, v in self._headers] - - def get(self, name, failobj=None): - """Get a header value. - - Like __getitem__() but return failobj instead of None when the field - is missing. - """ - name = name.lower() - for k, v in self._headers: - if k.lower() == name: - return _sanitize_header(k, v) - return failobj + def __len__(self, *args, **kw): + return self._headers.__len__(*args, **kw) # # Additional useful stuff # - def get_all(self, name, failobj=None): - """Return a list of all the values for the named field. + def replace_header(self, name, value): + """Replace a header. - These will be sorted in the order they appeared in the original - message, and may contain duplicates. Any fields deleted and - re-inserted are always appended to the header list. - - If no such fields exist, failobj is returned (defaults to None). + Replace the first matching header found in the message, retaining + header order and case. If no matching header was found, a KeyError is + raised. """ - values = [] name = name.lower() - for k, v in self._headers: - if k.lower() == name: - values.append(_sanitize_header(k, v)) - if not values: - return failobj - return values + for i, h in enumerate(self._headers): + if h.name.lower() == name: + self._headers[i] = self._policy.make_header(h.name, value) + break + else: + raise KeyError(name) def add_header(self, _name, _value, **_params): """Extended header setting. @@ -475,26 +523,7 @@ parts.append(_formatparam(k.replace('_', '-'), v)) if _value is not None: parts.insert(0, _value) - self._headers.append((_name, SEMISPACE.join(parts))) - - def replace_header(self, _name, _value): - """Replace a header. - - Replace the first matching header found in the message, retaining - header order and case. If no matching header was found, a KeyError is - raised. - """ - _name = _name.lower() - for i, (k, v) in zip(range(len(self._headers)), self._headers): - if k.lower() == _name: - self._headers[i] = (k, _value) - break - else: - raise KeyError(_name) - - # - # Use these three methods instead of the three above. - # + self[_name] = SEMISPACE.join(parts) def get_content_type(self): """Return the message's content type. @@ -796,8 +825,8 @@ # instead??? newparams.append(('boundary', '"%s"' % boundary)) # Replace the existing Content-Type header with the new value - newheaders = [] - for h, v in self._headers: + newheaders = _HeaderList() + for h, v in self.items(): if h.lower() == 'content-type': parts = [] for k, v in newparams: @@ -805,10 +834,10 @@ parts.append(k) else: parts.append('%s=%s' % (k, v)) - newheaders.append((h, SEMISPACE.join(parts))) - + newheaders.append( + self._policy.make_header(h, SEMISPACE.join(parts))) else: - newheaders.append((h, v)) + newheaders.append(v) self._headers = newheaders def get_content_charset(self, failobj=None): diff -r 7520f1bf0a81 -r b22698463737 Lib/email/policy.py --- a/Lib/email/policy.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/policy.py Tue Jul 19 13:24:56 2011 -0400 @@ -3,6 +3,9 @@ Allows fine grained feature control of how the package parses and emits data. """ +import warnings +from email import header + __all__ = [ 'Policy', 'default', @@ -43,7 +46,7 @@ """ for name, value in kw.items(): if hasattr(self, name): - super(_PolicyBase,self).__setattr__(name, value) + object.__setattr__(self, name, value) else: raise TypeError( "{!r} is an invalid keyword argument for {}".format( @@ -61,10 +64,16 @@ except for the changes passed in as keyword arguments. """ + newpolicy = self.__class__.__new__(self.__class__) for attr, value in self.__dict__.items(): - if attr not in kw: - kw[attr] = value - return self.__class__(**kw) + object.__setattr__(newpolicy, attr, value) + for attr, value in kw.items(): + if not hasattr(self, attr): + raise TypeError( + "{!r} is an invalid keyword argument for {}".format( + attr, self.__class__.__name__)) + object.__setattr__(newpolicy, attr, value) + return newpolicy def __setattr__(self, name, value): if hasattr(self, name): @@ -105,18 +114,28 @@ Settable attributes: - raise_on_defect -- If true, then defects should be raised - as errors. Default False. + raise_on_defect -- If true, then defects should be raised as errors. + Default False. - linesep -- string containing the value to use as - separation between output lines. Default '\n'. + linesep -- string containing the value to use as separation + between output lines. Default '\n'. must_be_7bit -- output must contain only 7bit clean data. Default False. max_line_length -- maximum length of lines, excluding 'linesep', - during serialization. None means no line - wrapping is done. Default is 78. + during serialization. None means no line wrapping + is done. Default is 78. + + decoded_headers -- if false (the default), the value of a header is + equal to the value of its 'source_value' attribute. + If True, the value of a header is equal to the value + of its 'decodeed' attribute. In Python 3.4 the + default value of this attribute will become True. + + header_factory -- a callable that can be used to create a new header + object given a name and a value. See the header + documentation for details on the expected API. Methods: @@ -129,12 +148,25 @@ raise_on_defect is True, defect is raised as an error, otherwise register_defect is called. + make_header(name, source_value, unfolded_value) + intended to be called by parser code that identifies a header to be + attached to a message objecct. source_value is the value exactly as + obtained from the source, unfolded_value is the the header with just + the folding linesep characters removed. + """ raise_on_defect = False linesep = '\n' must_be_7bit = False max_line_length = 78 + decoded_headers = None + header_factory = header.HeaderFactory() + + def __init__(self, **kw): + if 'header_factory' not in kw: + object.__setattr__(self, 'header_factory', header.HeaderFactory()) + _PolicyBase.__init__(self, **kw) def handle_defect(self, obj, defect): """Based on policy, either raise defect or call register_defect. @@ -167,8 +199,35 @@ """ obj.defects.append(defect) + def make_header(self, name, unparsed, unfolded=None): + """Return a header object containing parsed data from the value. + + unparsed is the value obtained from the source data, including any + linesep characters. unfolded is the same value with just the folding + linesep characters removed. This parameter is separate because a + parser may have more knowledge of actual message line breaks than would + be apparent from examining the source_value string, specifically in the + instance of a file containing both os.linesep strings and individual + \\r or \\n characters. Note that any such individual characters that + are present in unfolded_value will be reported as RFC defects under the + default policy. If unfolded is None, unparsed should be a unicode + string that does not contain encoded words or linesep characters. + + header_factory will be called with these three parameters, plus the value + of the decoded_headers flag passed as the third parameter, use_decoded. + + """ + if unfolded is not None and self.decoded_headers is None: + warnings.warn("The default for the decoded_headers policy option " + "will change to True in Python 3.4", DeprecationWarning, 2) + return self.header_factory(name, unparsed, unfolded, + self.decoded_headers) default = Policy() +# Make the default Policy use the class default. +del default.header_factory strict = default.clone(raise_on_defect=True) SMTP = default.clone(linesep='\r\n') HTTP = default.clone(linesep='\r\n', max_line_length=None) +email5_defaults = default.clone(decoded_headers=False) +email6_defaults = default.clone(decoded_headers=True) diff -r 7520f1bf0a81 -r b22698463737 Lib/email/utils.py --- a/Lib/email/utils.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/email/utils.py Tue Jul 19 13:24:56 2011 -0400 @@ -11,6 +11,7 @@ 'encode_rfc2231', 'formataddr', 'formatdate', + 'format_datetime', 'getaddresses', 'make_msgid', 'mktime_tz', @@ -26,6 +27,7 @@ import base64 import random import socket +import datetime import urllib.parse import warnings from io import StringIO @@ -53,6 +55,10 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[][\\()"]') +# How to figure out if we are processing strings that come from a byte +# source with undecodable characters. +_has_surrogates = re.compile( + '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search # Helpers @@ -110,6 +116,14 @@ ''', re.VERBOSE | re.IGNORECASE) +def _format_timetuple_and_zone(timetuple, zone): + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], + timetuple[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], + timetuple[0], timetuple[3], timetuple[4], timetuple[5], + zone) def formatdate(timeval=None, localtime=False, usegmt=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -154,14 +168,25 @@ zone = 'GMT' else: zone = '-0000' - return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( - ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]], - now[2], - ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1], - now[0], now[3], now[4], now[5], - zone) + return _format_timetuple_and_zone(now, zone) +def format_datetime(dt, usegmt=False): + """Turn a datetime into a date string as specified in RFC 2822. + + If usegmt is True, dt must be an aware datetime with an offset of zero. In + this case 'GMT' will be rendered instead of the normal +0000 required by + RFC2822. This is to support HTTP headers involving date stamps. + """ + now = dt.timetuple() + if usegmt: + if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: + raise ValueError("usegmt option requires a UTC datetime") + zone = 'GMT' + elif dt.tzinfo is None: + zone = '-0000' + else: + zone = dt.strftime("%z") + return _format_timetuple_and_zone(now, zone) def make_msgid(idstring=None, domain=None): @@ -320,3 +345,54 @@ except LookupError: # charset is not a known codec. return unquote(text) + +# +# datetime doesn't provide a localtime function yes, so provide one. +# Code adapted from the patch in issue 9527. +# + +def localtime(dt=None, isdst=-1): + """Return local time as an aware datetime object. + + If called without arguments, return current time. Otherwise *dt* + argument should be a datetime instance, and it is converted to the + local time zone according to the system time zone database. If *dt* is + naive (that is, dt.tzinfo is None), it is assumed to be in local time. + In this case, a positive or zero value for *isdst* causes localtime to + presume initially that summer time (for example, Daylight Saving Time) + is or is not (respectively) in effect for the specified time. A + negative value for *isdst* causes the localtime() function to attempt + to divine whether summer time is in effect for the specified time. + + """ + if dt is None: + seconds = time.time() + else: + if dt.tzinfo is None: + # A naive datetime is given. Convert to a (localtime) + # timetuple and pass to system mktime together with + # the isdst hint. System mktime will return seconds + # sysce epoch. + tm = dt.timetuple()[:-1] + (isdst,) + seconds = time.mktime(tm) + else: + # An aware datetime is given. Use aware datetime + # arithmetics to find seconds since epoch. + delta = dt - datetime.datetime(1970, 1, 1, + tzinfo=datetime.timezone.utc) + seconds = delta.total_seconds() + tm = time.localtime(seconds) + + # XXX: The following logic may not work correctly if UTC + # offset has changed since time provided in dt. This will be + # corrected in C implementation for platforms that support + # tm_gmtoff. + if time.daylight: + if tm.tm_isdst: + offset = time.altzone + tzname = time.tzname[1] + else: + offset = time.timezone + tzname = time.tzname[0] + tz = datetime.timezone(datetime.timedelta(seconds=-offset), tzname) + return datetime.datetime.fromtimestamp(seconds, tz) diff -r 7520f1bf0a81 -r b22698463737 Lib/mailbox.py --- a/Lib/mailbox.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/mailbox.py Tue Jul 19 13:24:56 2011 -0400 @@ -1468,7 +1468,8 @@ def _become_message(self, message): """Assume the non-format-specific state of message.""" for name in ('_headers', '_unixfrom', '_payload', '_charset', - 'preamble', 'epilogue', 'defects', '_default_type'): + 'preamble', 'epilogue', 'defects', '_default_type', + '_policy'): self.__dict__[name] = message.__dict__[name] def _explain_to(self, message): diff -r 7520f1bf0a81 -r b22698463737 Lib/smtplib.py --- a/Lib/smtplib.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/smtplib.py Tue Jul 19 13:24:56 2011 -0400 @@ -797,7 +797,7 @@ msg[header_prefix+'Cc']) if f is not None] to_addrs = [a[1] for a in email.utils.getaddresses(addr_fields)] # Make a local copy so we can delete the bcc headers. - msg_copy = copy.copy(msg) + msg_copy = copy.deepcopy(msg) del msg_copy['Bcc'] del msg_copy['Resent-Bcc'] with io.BytesIO() as bytesmsg: diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/__init__.py --- a/Lib/test/test_email/__init__.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/test/test_email/__init__.py Tue Jul 19 13:24:56 2011 -0400 @@ -1,8 +1,10 @@ import os import sys import unittest +import warnings import test.support -import email +from email import message_from_file +from email import policy from test.test_email import __file__ as landmark # used by regrtest and __main__. @@ -26,6 +28,10 @@ class TestEmailBase(unittest.TestCase): maxDiff = None + # We put these here so we can see what happens to the tests if + # we change some defaults. + email5_policy = policy.email5_defaults + email6_policy = policy.email6_defaults def __init__(self, *args, **kw): super().__init__(*args, **kw) @@ -35,7 +41,7 @@ def _msgobj(self, filename): with openfile(filename) as fp: - return email.message_from_file(fp) + return message_from_file(fp, policy=self.email5_policy) def _bytes_repr(self, b): return [repr(x) for x in b.splitlines(True)] @@ -43,3 +49,9 @@ def assertBytesEqual(self, first, second, msg): """Our byte strings are really encoded strings; improve diff output""" self.assertEqual(self._bytes_repr(first), self._bytes_repr(second)) + + def assertDefectsEqual(self, actual, expected): + self.assertEqual(len(actual), len(expected), actual) + for i in range(len(actual)): + self.assertIsInstance(actual[i], expected[i], + 'item {}'.format(i)) diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test__encoded_words.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test__encoded_words.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,107 @@ +import unittest +from email import _encoded_words as _ew +from email import errors +from test.test_email import TestEmailBase + + +class TestDecodeQ(TestEmailBase): + + def _test(self, source, ex_result, ex_defects=[]): + result, defects = _ew.decode_q(source) + self.assertEqual(result, ex_result) + self.assertDefectsEqual(defects, ex_defects) + + def test_no_encoded(self): + self._test(b'foobar', b'foobar') + + def test_spaces(self): + self._test(b'foo=20bar=20', b'foo bar ') + + def test_run_of_encoded(self): + self._test(b'foo=20=20=21bar', b'foo !bar') + + +class TestDecodeB(TestEmailBase): + + def _test(self, source, ex_result, ex_defects=[]): + result, defects = _ew.decode_b(source) + self.assertEqual(result, ex_result) + self.assertDefectsEqual(defects, ex_defects) + + def test_simple(self): + self._test(b'Zm9v', b'foo') + + def test_missing_padding(self): + self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect]) + + def test_invalid_character(self): + self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) + + def test_invalid_character_and_bad_padding(self): + self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect, + errors.InvalidBase64PaddingDefect]) + + +class TestDecode(TestEmailBase): + + def test_wrong_format_input_raises(self): + with self.assertRaises(ValueError): + _ew.decode('=?badone?=') + with self.assertRaises(ValueError): + _ew.decode('=?') + with self.assertRaises(ValueError): + _ew.decode('') + + def _test(self, source, ex_result, ex_defects=[]): + result, defects = _ew.decode(source) + self.assertEqual(result, ex_result) + self.assertDefectsEqual(defects, ex_defects) + + def test_simple_q(self): + self._test('=?us-ascii?q?foo?=', 'foo') + + def test_simple_b(self): + self._test('=?us-ascii?b?dmk=?=', 'vi') + + def test_q_case_ignored(self): + self._test('=?us-ascii?Q?foo?=', 'foo') + + def test_b_case_ignored(self): + self._test('=?us-ascii?B?dmk=?=', 'vi') + + def test_non_trivial_q(self): + self._test('=?latin-1?q?=20F=fcr=20Elise=20?=', ' Für Elise ') + + def test_q_undecodable_bytes_transformed(self): + self._test(b'=?us-ascii?q?=20\xACfoo?='.decode('us-ascii', + 'surrogateescape'), + ' \uFFFDfoo', + [errors.UndecodableBytesDefect]) + + def test_b_undecodable_bytes_ignored_with_defect(self): + self._test(b'=?us-ascii?b?dm\xACk?='.decode('us-ascii', + 'surrogateescape'), + 'vi', + [errors.InvalidBase64CharactersDefect, + errors.InvalidBase64PaddingDefect]) + + def test_b_invalid_bytes_ignored_with_defect(self): + self._test('=?us-ascii?b?dm\x01k===?=', + 'vi', + [errors.InvalidBase64CharactersDefect]) + + def test_b_invalid_bytes_incorrect_padding(self): + self._test('=?us-ascii?b?dm\x01k?=', + 'vi', + [errors.InvalidBase64CharactersDefect, + errors.InvalidBase64PaddingDefect]) + + def test_b_padding_defect(self): + self._test('=?us-ascii?b?dmk?=', + 'vi', + [errors.InvalidBase64PaddingDefect]) + + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test__header_value_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test__header_value_parser.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,2135 @@ +import string +import unittest +from email import _header_value_parser as parser +from email import errors +from test.test_email import TestEmailBase + +class TestParser(TestEmailBase): + + # _wsp_splitter + + rfc_printable_ascii = bytes(range(33, 127)).decode('ascii') + rfc_atext_chars = (string.ascii_letters + string.digits + + "!#$%&\'*+-/=?^_`{}|~") + rfc_dtext_chars = rfc_printable_ascii.translate(str.maketrans('','',r'\[]')) + + def test__wsp_splitter_one_word(self): + self.assertEqual(parser._wsp_splitter('foo', 1), ['foo']) + + def test__wsp_splitter_two_words(self): + self.assertEqual(parser._wsp_splitter('foo def', 1), + ['foo', ' ', 'def']) + + def test__wsp_splitter_ws_runs(self): + self.assertEqual(parser._wsp_splitter('foo \t def jik', 1), + ['foo', ' \t ', 'def jik']) + + # _decode_ew_run + + def _test_ew_run(self, value, ex_text, ex_value, ex_defects=[]): + text, value, defects = parser._decode_ew_run(value) + self.assertEqual(text, ex_text) + self.assertEqual(value, ex_value) + self.assertDefectsEqual(defects, ex_defects) + + def test__decode_ew_run_invaild_ew(self): + self._test_ew_run('=?test val', '', '=?test val') + + def test__decode_ew_run_one_valid_ew_trailing_text(self): + self._test_ew_run('=?us-ascii?q?test?= val', 'test', ' val') + + def test__decode_ew_run_one_valid_ew_trailing_ws(self): + self._test_ew_run('=?us-ascii?q?test?= ', 'test', ' ') + + def test__decode_ew_run_one_valid_ew_no_trailing_ws(self): + self._test_ew_run('=?us-ascii?q?test?=', 'test', '') + + def test__decode_ew_run_two_ew_with_ws_run(self): + self._test_ew_run( + '=?us-ascii?q?test?= =?us-ascii?q?foo?= val', + 'testfoo', + ' val') + + def test__decode_ew_run_ew_with_internal_leading_ws(self): + self._test_ew_run( + '=?us-ascii?q?=20test?= =?us-ascii?q?=20foo?= val', + ' test foo', + ' val') + + def test__decode_ew_run_undecodable_bytes(self): + self._test_ew_run( + (b'=?us-ascii?q?=20test?= =?us-ascii?q?=20\xACfoo?=' + b' val').decode('ascii', 'surrogateescape'), + ' test \uFFFDfoo', + ' val', + [errors.UndecodableBytesDefect]) + + def test__decode_ew_run_missing_base64_padding(self): + self._test_ew_run('=?utf-8?b?dmk?=', + 'vi', + '', + [errors.InvalidBase64PaddingDefect]) + + def test__decode_ew_run_invalid_base64_character(self): + self._test_ew_run('=?utf-8?b?dm\x01k===?=', + 'vi', + '', + [errors.InvalidBase64CharactersDefect]) + + def test__decode_ew_run_invalid_base64_character_and_bad_padding(self): + self._test_ew_run('=?utf-8?b?dm\x01k?=', + 'vi', + '', + [errors.InvalidBase64CharactersDefect, errors.InvalidBase64PaddingDefect]) + + # get_unstructured + + def test_get_unstructured_null(self): + u = parser.get_unstructured('') + self.assertEqual(str(u), '') + self.assertEqual(len(u), 0) + + def test_get_unstructured_one_word(self): + u = parser.get_unstructured('foo') + self.assertEqual(str(u), 'foo') + + def test_get_unstructured_normal_phrase(self): + u = parser.get_unstructured('foo bar bird') + self.assertEqual(str(u), 'foo bar bird') + + def test_get_unstructured_normal_phrase_with_whitespace(self): + u = parser.get_unstructured('foo \t bar bird ') + self.assertEqual(str(u), 'foo \t bar bird ') + + def test_get_unstructured_leading_whitespace(self): + u = parser.get_unstructured(' foo bar') + self.assertEqual(str(u), ' foo bar') + + def test_get_unstructured_leading_and_trailing_whitespace(self): + u = parser.get_unstructured(' foo bar ') + self.assertEqual(str(u), ' foo bar ') + + def test_get_unstructured_leading_and_trailing_whitespace(self): + u = parser.get_unstructured(' foo bar ') + self.assertEqual(str(u), ' foo bar ') + + def test_get_unstructured_phrase_with_ew(self): + u = parser.get_unstructured('foo =?us-ascii?q?bar?= bird') + self.assertEqual(str(u), 'foo bar bird') + + def test_get_unstructured_phrase_with_two_ew(self): + u = parser.get_unstructured( + 'foo =?us-ascii?q?bar?= =?us-ascii?q?bird?=') + self.assertEqual(str(u), 'foo barbird') + + def test_get_unstructured_phrase_with_two_ew_trailing_ws(self): + u = parser.get_unstructured( + 'foo =?us-ascii?q?bar?= =?us-ascii?q?bird?= ') + self.assertEqual(str(u), 'foo barbird ') + + def test_get_unstructured_phrase_with_ew_with_leading_ws(self): + u = parser.get_unstructured(' =?us-ascii?q?bar?=') + self.assertEqual(str(u), ' bar') + + def test_get_unstructured_phrase_with_two_ew_extra_ws(self): + u = parser.get_unstructured( + 'foo =?us-ascii?q?bar?= \t =?us-ascii?q?bird?=') + self.assertEqual(str(u), 'foo barbird') + + def test_get_unstructured_ew_with_internal_ws(self): + u = parser.get_unstructured('=?iso-8859-1?q?hello=20world?=') + self.assertEqual(str(u), 'hello world') + self.assertEqual(len(u), 3) + + # test harness + + def _test_get_x(self, method, input, string, value, defects, + remainder, comments=None): + token, rest = method(input) + self.assertEqual(str(token), string) + self.assertEqual(token.value, value) + self.assertDefectsEqual(token.all_defects, defects) + self.assertEqual(rest, remainder) + if comments is not None: + self.assertEqual(token.comments, comments) + return token + + # get_qp_ctext + + def test_get_qp_ctext_only(self): + ptext = self._test_get_x(parser.get_qp_ctext, + 'foobar', 'foobar', ' ', [], '') + self.assertEqual(ptext.token_type, 'ptext') + + def test_get_qp_ctext_all_printables(self): + with_qp = self.rfc_printable_ascii.replace('\\', '\\\\') + with_qp = with_qp. replace('(', r'\(') + with_qp = with_qp.replace(')', r'\)') + ptext = self._test_get_x(parser.get_qp_ctext, + with_qp, self.rfc_printable_ascii, ' ', [], '') + + def test_get_qp_ctext_two_words_gets_first(self): + self._test_get_x(parser.get_qp_ctext, + 'foo de', 'foo', ' ', [], ' de') + + def test_get_qp_ctext_following_wsp_preserved(self): + self._test_get_x(parser.get_qp_ctext, + 'foo \t\tde', 'foo', ' ', [], ' \t\tde') + + def test_get_qp_ctext_up_to_close_paren_only(self): + self._test_get_x(parser.get_qp_ctext, + 'foo)', 'foo', ' ', [], ')') + + def test_get_qp_ctext_wsp_before_close_paren_preserved(self): + self._test_get_x(parser.get_qp_ctext, + 'foo )', 'foo', ' ', [], ' )') + + def test_get_qp_ctext_close_paren_mid_word(self): + self._test_get_x(parser.get_qp_ctext, + 'foo)bar', 'foo', ' ', [], ')bar') + + def test_get_qp_ctext_up_to_open_paren_only(self): + self._test_get_x(parser.get_qp_ctext, + 'foo(', 'foo', ' ', [], '(') + + def test_get_qp_ctext_wsp_before_open_paren_preserved(self): + self._test_get_x(parser.get_qp_ctext, + 'foo (', 'foo', ' ', [], ' (') + + def test_get_qp_ctext_open_paren_mid_word(self): + self._test_get_x(parser.get_qp_ctext, + 'foo(bar', 'foo', ' ', [], '(bar') + + def test_get_qp_ctext_non_printables(self): + ptext = self._test_get_x(parser.get_qp_ctext, + 'foo\x00bar)', 'foo\x00bar', ' ', + [errors.NonPrintableDefect], ')') + self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + + # get_qcontent + + def test_get_qcontent_only(self): + ptext = self._test_get_x(parser.get_qcontent, + 'foobar', 'foobar', 'foobar', [], '') + self.assertEqual(ptext.token_type, 'ptext') + + def test_get_qcontent_all_printables(self): + with_qp = self.rfc_printable_ascii.replace('\\', '\\\\') + with_qp = with_qp. replace('"', r'\"') + ptext = self._test_get_x(parser.get_qcontent, with_qp, + self.rfc_printable_ascii, + self.rfc_printable_ascii, [], '') + + def test_get_qcontent_two_words_gets_first(self): + self._test_get_x(parser.get_qcontent, + 'foo de', 'foo', 'foo', [], ' de') + + def test_get_qcontent_following_wsp_preserved(self): + self._test_get_x(parser.get_qcontent, + 'foo \t\tde', 'foo', 'foo', [], ' \t\tde') + + def test_get_qcontent_up_to_dquote_only(self): + self._test_get_x(parser.get_qcontent, + 'foo"', 'foo', 'foo', [], '"') + + def test_get_qcontent_wsp_before_close_paren_preserved(self): + self._test_get_x(parser.get_qcontent, + 'foo "', 'foo', 'foo', [], ' "') + + def test_get_qcontent_close_paren_mid_word(self): + self._test_get_x(parser.get_qcontent, + 'foo"bar', 'foo', 'foo', [], '"bar') + + def test_get_qcontent_non_printables(self): + ptext = self._test_get_x(parser.get_qcontent, + 'foo\x00fg"', 'foo\x00fg', 'foo\x00fg', + [errors.NonPrintableDefect], '"') + self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + + # get_atext + + def test_get_atext_only(self): + atext = self._test_get_x(parser.get_atext, + 'foobar', 'foobar', 'foobar', [], '') + self.assertEqual(atext.token_type, 'atext') + + def test_get_atext_all_atext(self): + atext = self._test_get_x(parser.get_atext, self.rfc_atext_chars, + self.rfc_atext_chars, + self.rfc_atext_chars, [], '') + + def test_get_atext_two_words_gets_first(self): + self._test_get_x(parser.get_atext, + 'foo bar', 'foo', 'foo', [], ' bar') + + def test_get_atext_following_wsp_preserved(self): + self._test_get_x(parser.get_atext, + 'foo \t\tbar', 'foo', 'foo', [], ' \t\tbar') + + def test_get_atext_up_to_special(self): + self._test_get_x(parser.get_atext, + 'foo@bar', 'foo', 'foo', [], '@bar') + + def test_get_atext_non_printables(self): + atext = self._test_get_x(parser.get_atext, + 'foo\x00bar(', 'foo\x00bar', 'foo\x00bar', + [errors.NonPrintableDefect], '(') + self.assertEqual(atext.defects[0].non_printables[0], '\x00') + + # get_fws + + def test_get_fws_only(self): + fws = self._test_get_x(parser.get_fws, ' \t ', ' \t ', ' ', [], '') + self.assertEqual(fws.token_type, 'fws') + + def test_get_fws_space(self): + self._test_get_x(parser.get_fws, ' foo', ' ', ' ', [], 'foo') + + def test_get_fws_ws_run(self): + self._test_get_x(parser.get_fws, ' \t foo ', ' \t ', ' ', [], 'foo ') + + # get_bare_quoted_string + + def test_get_bare_quoted_string_only(self): + bqs = self._test_get_x(parser.get_bare_quoted_string, + '"foo"', '"foo"', 'foo', [], '') + self.assertEqual(bqs.token_type, 'bare-quoted-string') + + def test_get_bare_quoted_string_must_start_with_dquote(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_bare_quoted_string('foo"') + with self.assertRaises(errors.HeaderParseError): + parser.get_bare_quoted_string(' "foo"') + + def test_get_bare_quoted_string_following_wsp_preserved(self): + self._test_get_x(parser.get_bare_quoted_string, + '"foo"\t bar', '"foo"', 'foo', [], '\t bar') + + def test_get_bare_quoted_string_multiple_words(self): + self._test_get_x(parser.get_bare_quoted_string, + '"foo bar moo"', '"foo bar moo"', 'foo bar moo', [], '') + + def test_get_bare_quoted_string_multiple_words_wsp_preserved(self): + self._test_get_x(parser.get_bare_quoted_string, + '" foo moo\t"', '" foo moo\t"', ' foo moo\t', [], '') + + def test_get_bare_quoted_string_end_dquote_mid_word(self): + self._test_get_x(parser.get_bare_quoted_string, + '"foo"bar', '"foo"', 'foo', [], 'bar') + + def test_get_bare_quoted_string_quoted_dquote(self): + self._test_get_x(parser.get_bare_quoted_string, + r'"foo\"in"a', r'"foo\"in"', 'foo"in', [], 'a') + + def test_get_bare_quoted_string_non_printables(self): + self._test_get_x(parser.get_bare_quoted_string, + '"a\x01a"', '"a\x01a"', 'a\x01a', + [errors.NonPrintableDefect], '') + + def test_get_bare_quoted_string_no_end_dquote(self): + self._test_get_x(parser.get_bare_quoted_string, + '"foo', '"foo"', 'foo', + [errors.InvalidHeaderDefect], '') + self._test_get_x(parser.get_bare_quoted_string, + '"foo ', '"foo "', 'foo ', + [errors.InvalidHeaderDefect], '') + + def test_get_bare_quoted_string_empty_quotes(self): + self._test_get_x(parser.get_bare_quoted_string, + '""', '""', '', [], '') + + # get_comment + + def test_get_comment_only(self): + comment = self._test_get_x(parser.get_comment, + '(comment)', '(comment)', ' ', [], '', ['comment']) + self.assertEqual(comment.token_type, 'comment') + + def test_get_comment_must_start_with_paren(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_comment('foo"') + with self.assertRaises(errors.HeaderParseError): + parser.get_comment(' (foo"') + + def test_get_comment_following_wsp_preserved(self): + self._test_get_x(parser.get_comment, + '(comment) \t', '(comment)', ' ', [], ' \t', ['comment']) + + def test_get_comment_multiple_words(self): + self._test_get_x(parser.get_comment, + '(foo bar) \t', '(foo bar)', ' ', [], ' \t', ['foo bar']) + + def test_get_comment_multiple_words_wsp_preserved(self): + self._test_get_x(parser.get_comment, + '( foo bar\t ) \t', '( foo bar\t )', ' ', [], ' \t', + [' foo bar\t ']) + + def test_get_comment_end_paren_mid_word(self): + self._test_get_x(parser.get_comment, + '(foo)bar', '(foo)', ' ', [], 'bar', ['foo']) + + def test_get_comment_quoted_parens(self): + self._test_get_x(parser.get_comment, + '(foo\) \(\)bar)', '(foo\) \(\)bar)', ' ', [], '', ['foo) ()bar']) + + def test_get_comment_non_printable(self): + self._test_get_x(parser.get_comment, + '(foo\x7Fbar)', '(foo\x7Fbar)', ' ', + [errors.NonPrintableDefect], '', ['foo\x7Fbar']) + + def test_get_comment_no_end_paren(self): + self._test_get_x(parser.get_comment, + '(foo bar', '(foo bar)', ' ', + [errors.InvalidHeaderDefect], '', ['foo bar']) + self._test_get_x(parser.get_comment, + '(foo bar ', '(foo bar )', ' ', + [errors.InvalidHeaderDefect], '', ['foo bar ']) + + def test_get_comment_nested_comment(self): + comment = self._test_get_x(parser.get_comment, + '(foo(bar))', '(foo(bar))', ' ', [], '', ['foo(bar)']) + self.assertEqual(comment[1].content, 'bar') + + def test_get_comment_nested_comment_wsp(self): + comment = self._test_get_x(parser.get_comment, + '(foo ( bar ) )', '(foo ( bar ) )', ' ', [], '', ['foo ( bar ) ']) + self.assertEqual(comment[2].content, ' bar ') + + def test_get_comment_empty_comment(self): + self._test_get_x(parser.get_comment, + '()', '()', ' ', [], '', ['']) + + def test_get_comment_multiple_nesting(self): + comment = self._test_get_x(parser.get_comment, + '(((((foo)))))', '(((((foo)))))', ' ', [], '', ['((((foo))))']) + for i in range(4, 0, -1): + self.assertEqual(comment[0].content, '('*(i-1)+'foo'+')'*(i-1)) + comment = comment[0] + self.assertEqual(comment.content, 'foo') + + def test_get_comment_missing_end_of_nesting(self): + self._test_get_x(parser.get_comment, + '(((((foo)))', '(((((foo)))))', ' ', + [errors.InvalidHeaderDefect]*2, '', ['((((foo))))']) + + def test_get_comment_qs_in_nested_comment(self): + comment = self._test_get_x(parser.get_comment, + '(foo (b\)))', '(foo (b\)))', ' ', [], '', ['foo (b\))']) + self.assertEqual(comment[2].content, 'b)') + + # get_cfws + + def test_get_cfws_only_ws(self): + cfws = self._test_get_x(parser.get_cfws, + ' \t \t', ' \t \t', ' ', [], '', []) + self.assertEqual(cfws.token_type, 'cfws') + + def test_get_cfws_only_comment(self): + cfws = self._test_get_x(parser.get_cfws, + '(foo)', '(foo)', ' ', [], '', ['foo']) + self.assertEqual(cfws[0].content, 'foo') + + def test_get_cfws_only_mixed(self): + cfws = self._test_get_x(parser.get_cfws, + ' (foo ) ( bar) ', ' (foo ) ( bar) ', ' ', [], '', + ['foo ', ' bar']) + self.assertEqual(cfws[1].content, 'foo ') + self.assertEqual(cfws[3].content, ' bar') + + def test_get_cfws_ends_at_non_leader(self): + cfws = self._test_get_x(parser.get_cfws, + '(foo) bar', '(foo) ', ' ', [], 'bar', ['foo']) + self.assertEqual(cfws[0].content, 'foo') + + def test_get_cfws_ends_at_non_printable(self): + cfws = self._test_get_x(parser.get_cfws, + '(foo) \x07', '(foo) ', ' ', [], '\x07', ['foo']) + self.assertEqual(cfws[0].content, 'foo') + + def test_get_cfws_non_printable_in_comment(self): + cfws = self._test_get_x(parser.get_cfws, + '(foo \x07) "test"', '(foo \x07) ', ' ', + [errors.NonPrintableDefect], '"test"', ['foo \x07']) + self.assertEqual(cfws[0].content, 'foo \x07') + + def test_get_cfws_header_ends_in_comment(self): + cfws = self._test_get_x(parser.get_cfws, + ' (foo ', ' (foo )', ' ', + [errors.InvalidHeaderDefect], '', ['foo ']) + self.assertEqual(cfws[1].content, 'foo ') + + def test_get_cfws_multiple_nested_comments(self): + cfws = self._test_get_x(parser.get_cfws, + '(foo (bar)) ((a)(a))', '(foo (bar)) ((a)(a))', ' ', [], + '', ['foo (bar)', '(a)(a)']) + self.assertEqual(cfws[0].comments, ['foo (bar)']) + self.assertEqual(cfws[2].comments, ['(a)(a)']) + + # get_quoted_string + + def test_get_quoted_string_only(self): + qs = self._test_get_x(parser.get_quoted_string, + '"bob"', '"bob"', 'bob', [], '') + self.assertEqual(qs.token_type, 'quoted-string') + self.assertEqual(qs.quoted_value, '"bob"') + self.assertEqual(qs.content, 'bob') + + def test_get_quoted_string_with_wsp(self): + qs = self._test_get_x(parser.get_quoted_string, + '\t "bob" ', '\t "bob" ', ' bob ', [], '') + self.assertEqual(qs.quoted_value, ' "bob" ') + self.assertEqual(qs.content, 'bob') + + def test_get_quoted_string_with_comments_and_wsp(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (foo) "bob"(bar)', ' (foo) "bob"(bar)', ' bob ', [], '') + self.assertEqual(qs[0][1].content, 'foo') + self.assertEqual(qs[2][0].content, 'bar') + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob" ') + + def test_get_quoted_string_with_multiple_comments(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (foo) (bar) "bob"(bird)', ' (foo) (bar) "bob"(bird)', ' bob ', + [], '') + self.assertEqual(qs[0].comments, ['foo', 'bar']) + self.assertEqual(qs[2].comments, ['bird']) + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob" ') + + def test_get_quoted_string_non_printable_in_comment(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (\x0A) "bob"', ' (\x0A) "bob"', ' bob', + [errors.NonPrintableDefect], '') + self.assertEqual(qs[0].comments, ['\x0A']) + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob"') + + def test_get_quoted_string_non_printable_in_qcontent(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (a) "a\x0B"', ' (a) "a\x0B"', ' a\x0B', + [errors.NonPrintableDefect], '') + self.assertEqual(qs[0].comments, ['a']) + self.assertEqual(qs.content, 'a\x0B') + self.assertEqual(qs.quoted_value, ' "a\x0B"') + + def test_get_quoted_string_internal_ws(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (a) "foo bar "', ' (a) "foo bar "', ' foo bar ', + [], '') + self.assertEqual(qs[0].comments, ['a']) + self.assertEqual(qs.content, 'foo bar ') + self.assertEqual(qs.quoted_value, ' "foo bar "') + + def test_get_quoted_string_header_ends_in_comment(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (a) "bob" (a', ' (a) "bob" (a)', ' bob ', + [errors.InvalidHeaderDefect], '') + self.assertEqual(qs[0].comments, ['a']) + self.assertEqual(qs[2].comments, ['a']) + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob" ') + + def test_get_quoted_string_header_ends_in_qcontent(self): + qs = self._test_get_x(parser.get_quoted_string, + ' (a) "bob', ' (a) "bob"', ' bob', + [errors.InvalidHeaderDefect], '') + self.assertEqual(qs[0].comments, ['a']) + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob"') + + def test_get_quoted_string_no_quoted_string(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_quoted_string(' (ab) xyz') + + def test_get_quoted_string_qs_ends_at_noncfws(self): + qs = self._test_get_x(parser.get_quoted_string, + '\t "bob" fee', '\t "bob" ', ' bob ', [], 'fee') + self.assertEqual(qs.content, 'bob') + self.assertEqual(qs.quoted_value, ' "bob" ') + + # get_atom + + def test_get_atom_only(self): + atom = self._test_get_x(parser.get_atom, + 'bob', 'bob', 'bob', [], '') + self.assertEqual(atom.token_type, 'atom') + + def test_get_atom_with_wsp(self): + self._test_get_x(parser.get_atom, + '\t bob ', '\t bob ', ' bob ', [], '') + + def test_get_atom_with_comments_and_wsp(self): + atom = self._test_get_x(parser.get_atom, + ' (foo) bob(bar)', ' (foo) bob(bar)', ' bob ', [], '') + self.assertEqual(atom[0][1].content, 'foo') + self.assertEqual(atom[2][0].content, 'bar') + + def test_get_atom_with_multiple_comments(self): + atom = self._test_get_x(parser.get_atom, + ' (foo) (bar) bob(bird)', ' (foo) (bar) bob(bird)', ' bob ', + [], '') + self.assertEqual(atom[0].comments, ['foo', 'bar']) + self.assertEqual(atom[2].comments, ['bird']) + + def test_get_atom_non_printable_in_comment(self): + atom = self._test_get_x(parser.get_atom, + ' (\x0A) bob', ' (\x0A) bob', ' bob', + [errors.NonPrintableDefect], '') + self.assertEqual(atom[0].comments, ['\x0A']) + + def test_get_atom_non_printable_in_atext(self): + atom = self._test_get_x(parser.get_atom, + ' (a) a\x0B', ' (a) a\x0B', ' a\x0B', + [errors.NonPrintableDefect], '') + self.assertEqual(atom[0].comments, ['a']) + + def test_get_atom_header_ends_in_comment(self): + atom = self._test_get_x(parser.get_atom, + ' (a) bob (a', ' (a) bob (a)', ' bob ', + [errors.InvalidHeaderDefect], '') + self.assertEqual(atom[0].comments, ['a']) + self.assertEqual(atom[2].comments, ['a']) + + def test_get_atom_no_atom(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_atom(' (ab) ') + + def test_get_atom_no_atom_before_special(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_atom(' (ab) @') + + def test_get_atom_atom_ends_at_special(self): + atom = self._test_get_x(parser.get_atom, + ' (foo) bob(bar) @bang', ' (foo) bob(bar) ', ' bob ', [], '@bang') + self.assertEqual(atom[0].comments, ['foo']) + self.assertEqual(atom[2].comments, ['bar']) + + def test_get_atom_atom_ends_at_noncfws(self): + atom = self._test_get_x(parser.get_atom, + 'bob fred', 'bob ', 'bob ', [], 'fred') + + # get_dot_atom_text + + def test_get_dot_atom_text(self): + dot_atom_text = self._test_get_x(parser.get_dot_atom_text, + 'foo.bar.bang', 'foo.bar.bang', 'foo.bar.bang', [], '') + self.assertEqual(dot_atom_text.token_type, 'dot-atom-text') + self.assertEqual(len(dot_atom_text), 5) + + def test_get_dot_atom_text_lone_atom_is_valid(self): + dot_atom_text = self._test_get_x(parser.get_dot_atom_text, + 'foo', 'foo', 'foo', [], '') + + def test_get_dot_atom_text_raises_on_leading_dot(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom_text('.foo.bar') + + def test_get_dot_atom_text_raises_on_trailing_dot(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom_text('foo.bar.') + + def test_get_dot_atom_text_raises_on_leading_non_atext(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom_text(' foo.bar') + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom_text('@foo.bar') + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom_text('"foo.bar"') + + def test_get_dot_atom_text_trailing_text_preserved(self): + dot_atom_text = self._test_get_x(parser.get_dot_atom_text, + 'foo@bar', 'foo', 'foo', [], '@bar') + + def test_get_dot_atom_text_trailing_ws_preserved(self): + dot_atom_text = self._test_get_x(parser.get_dot_atom_text, + 'foo .bar', 'foo', 'foo', [], ' .bar') + + # get_dot_atom + + def test_get_dot_atom_only(self): + dot_atom = self._test_get_x(parser.get_dot_atom, + 'foo.bar.bing', 'foo.bar.bing', 'foo.bar.bing', [], '') + self.assertEqual(dot_atom.token_type, 'dot-atom') + self.assertEqual(len(dot_atom), 1) + + def test_get_dot_atom_with_wsp(self): + self._test_get_x(parser.get_dot_atom, + '\t foo.bar.bing ', '\t foo.bar.bing ', ' foo.bar.bing ', [], '') + + def test_get_dot_atom_with_comments_and_wsp(self): + self._test_get_x(parser.get_dot_atom, + ' (sing) foo.bar.bing (here) ', ' (sing) foo.bar.bing (here) ', + ' foo.bar.bing ', [], '') + + def test_get_dot_atom_space_ends_dot_atom(self): + self._test_get_x(parser.get_dot_atom, + ' (sing) foo.bar .bing (here) ', ' (sing) foo.bar ', + ' foo.bar ', [], '.bing (here) ') + + def test_get_dot_atom_no_atom_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom(' (foo) ') + + def test_get_dot_atom_leading_dot_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom(' (foo) .bar') + + def test_get_dot_atom_two_dots_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom('bar..bang') + + def test_get_dot_atom_trailing_dot_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_dot_atom(' (foo) bar.bang. foo') + + # get_word (if this were black box we'd repeat all the qs/atom tests) + + def test_get_word_atom_yields_atom(self): + word = self._test_get_x(parser.get_word, + ' (foo) bar (bang) :ah', ' (foo) bar (bang) ', ' bar ', [], ':ah') + self.assertEqual(word.token_type, 'atom') + self.assertEqual(word[0].token_type, 'cfws') + + def test_get_word_qs_yields_qs(self): + word = self._test_get_x(parser.get_word, + '"bar " (bang) ah', '"bar " (bang) ', 'bar ', [], 'ah') + self.assertEqual(word.token_type, 'quoted-string') + self.assertEqual(word[0].token_type, 'bare-quoted-string') + self.assertEqual(word[0].value, 'bar ') + self.assertEqual(word.content, 'bar ') + + def test_get_word_ends_at_dot(self): + self._test_get_x(parser.get_word, + 'foo.', 'foo', 'foo', [], '.') + + # get_phrase + + def test_get_phrase_simple(self): + phrase = self._test_get_x(parser.get_phrase, + '"Fred A. Johnson" is his name, oh.', + '"Fred A. Johnson" is his name', + 'Fred A. Johnson is his name', + [], + ', oh.') + self.assertEqual(phrase.token_type, 'phrase') + + def test_get_phrase_complex(self): + phrase = self._test_get_x(parser.get_phrase, + ' (A) bird (in (my|your)) "hand " is messy\t<>\t', + ' (A) bird (in (my|your)) "hand " is messy\t', + ' bird hand is messy ', + [], + '<>\t') + self.assertEqual(phrase[0][0].comments, ['A']) + self.assertEqual(phrase[0][2].comments, ['in (my|your)']) + + def test_get_phrase_obsolete(self): + phrase = self._test_get_x(parser.get_phrase, + 'Fred A.(weird).O Johnson', + 'Fred A.(weird).O Johnson', + 'Fred A. .O Johnson', + [errors.ObsoleteHeaderDefect]*3, + '') + self.assertEqual(len(phrase), 7) + self.assertEqual(phrase[3].comments, ['weird']) + + def test_get_phrase_pharse_must_start_with_word(self): + phrase = self._test_get_x(parser.get_phrase, + '(even weirder).name', + '(even weirder).name', + ' .name', + [errors.InvalidHeaderDefect] + [errors.ObsoleteHeaderDefect]*2, + '') + self.assertEqual(len(phrase), 3) + self.assertEqual(phrase[0].comments, ['even weirder']) + + def test_get_phrase_ending_with_obsolete(self): + phrase = self._test_get_x(parser.get_phrase, + 'simple phrase.(with trailing comment):boo', + 'simple phrase.(with trailing comment)', + 'simple phrase. ', + [errors.ObsoleteHeaderDefect]*2, + ':boo') + self.assertEqual(len(phrase), 4) + self.assertEqual(phrase[3].comments, ['with trailing comment']) + + def get_phrase_cfws_only_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_phrase(' (foo) ') + + # get_local_part + + def test_get_local_part_simple(self): + local_part = self._test_get_x(parser.get_local_part, + 'dinsdale@python.org', 'dinsdale', 'dinsdale', [], '@python.org') + self.assertEqual(local_part.token_type, 'local-part') + self.assertEqual(local_part.local_part, 'dinsdale') + + def test_get_local_part_with_dot(self): + local_part = self._test_get_x(parser.get_local_part, + 'Fred.A.Johnson@python.org', + 'Fred.A.Johnson', + 'Fred.A.Johnson', + [], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred.A.Johnson') + + def test_get_local_part_with_whitespace(self): + local_part = self._test_get_x(parser.get_local_part, + ' Fred.A.Johnson @python.org', + ' Fred.A.Johnson ', + ' Fred.A.Johnson ', + [], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred.A.Johnson') + + def test_get_local_part_with_cfws(self): + local_part = self._test_get_x(parser.get_local_part, + ' (foo) Fred.A.Johnson (bar (bird)) @python.org', + ' (foo) Fred.A.Johnson (bar (bird)) ', + ' Fred.A.Johnson ', + [], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred.A.Johnson') + self.assertEqual(local_part[0][0].comments, ['foo']) + self.assertEqual(local_part[0][2].comments, ['bar (bird)']) + + def test_get_local_part_simple_quoted(self): + local_part = self._test_get_x(parser.get_local_part, + '"dinsdale"@python.org', '"dinsdale"', '"dinsdale"', [], '@python.org') + self.assertEqual(local_part.token_type, 'local-part') + self.assertEqual(local_part.local_part, 'dinsdale') + + def test_get_local_part_with_quoted_dot(self): + local_part = self._test_get_x(parser.get_local_part, + '"Fred.A.Johnson"@python.org', + '"Fred.A.Johnson"', + '"Fred.A.Johnson"', + [], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred.A.Johnson') + + def test_get_local_part_quoted_with_whitespace(self): + local_part = self._test_get_x(parser.get_local_part, + ' "Fred A. Johnson" @python.org', + ' "Fred A. Johnson" ', + ' "Fred A. Johnson" ', + [], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred A. Johnson') + + def test_get_local_part_quoted_with_cfws(self): + local_part = self._test_get_x(parser.get_local_part, + ' (foo) " Fred A. Johnson " (bar (bird)) @python.org', + ' (foo) " Fred A. Johnson " (bar (bird)) ', + ' " Fred A. Johnson " ', + [], + '@python.org') + self.assertEqual(local_part.local_part, ' Fred A. Johnson ') + self.assertEqual(local_part[0][0].comments, ['foo']) + self.assertEqual(local_part[0][2].comments, ['bar (bird)']) + + + def test_get_local_part_simple_obsolete(self): + local_part = self._test_get_x(parser.get_local_part, + 'Fred. A.Johnson@python.org', + 'Fred. A.Johnson', + 'Fred. A.Johnson', + [errors.ObsoleteHeaderDefect], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred. A.Johnson') + + def test_get_local_part_complex_obsolete_1(self): + local_part = self._test_get_x(parser.get_local_part, + ' (foo )Fred (bar).(bird) A.(sheep)Johnson."and dogs "@python.org', + ' (foo )Fred (bar).(bird) A.(sheep)Johnson."and dogs "', + ' Fred . A. Johnson.and dogs ', + [errors.ObsoleteHeaderDefect], + '@python.org') + self.assertEqual(local_part.local_part, 'Fred . A. Johnson.and dogs ') + + def test_get_local_part_complex_obsolete_invalid(self): + local_part = self._test_get_x(parser.get_local_part, + ' (foo )Fred (bar).(bird) A.(sheep)Johnson "and dogs"@python.org', + ' (foo )Fred (bar).(bird) A.(sheep)Johnson "and dogs"', + ' Fred . A. Johnson and dogs', + [errors.InvalidHeaderDefect]*2, + '@python.org') + self.assertEqual(local_part.local_part, 'Fred . A. Johnson and dogs') + + def test_get_local_part_no_part_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part(' (foo) ') + + def test_get_local_part_special_instead_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part(' (foo) @python.org') + + # XXX Should these three be errors or defects? + + def test_get_local_part_leading_dot_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part(' .borris@python.org') + + def test_get_local_part_trailing_dot_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part(' borris.@python.org') + + def test_get_local_part_double_dot_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_local_part(' borris.(foo).natasha@python.org') + + def test_get_local_part_quoted_strings_in_atom_list(self): + local_part = self._test_get_x(parser.get_local_part, + '""example" example"@example.com', + '""example" example"', + 'example example', + [errors.InvalidHeaderDefect]*3, + '@example.com') + self.assertEqual(local_part.local_part, 'example example') + + def test_get_local_part_valid_and_invalid_qp_in_atom_list(self): + local_part = self._test_get_x(parser.get_local_part, + r'"\\"example\\" example"@example.com', + r'"\\"example\\" example"', + r'\example\\ example', + [errors.InvalidHeaderDefect]*5, + '@example.com') + self.assertEqual(local_part.local_part, r'\example\\ example') + + # get_dtext + + def test_get_dtext_only(self): + dtext = self._test_get_x(parser.get_dtext, + 'foobar', 'foobar', 'foobar', [], '') + self.assertEqual(dtext.token_type, 'ptext') + + def test_get_dtext_all_dtext(self): + dtext = self._test_get_x(parser.get_dtext, self.rfc_dtext_chars, + self.rfc_dtext_chars, + self.rfc_dtext_chars, [], '') + + def test_get_dtext_two_words_gets_first(self): + self._test_get_x(parser.get_dtext, + 'foo bar', 'foo', 'foo', [], ' bar') + + def test_get_dtext_following_wsp_preserved(self): + self._test_get_x(parser.get_dtext, + 'foo \t\tbar', 'foo', 'foo', [], ' \t\tbar') + + def test_get_dtext_non_printables(self): + dtext = self._test_get_x(parser.get_dtext, + 'foo\x00bar]', 'foo\x00bar', 'foo\x00bar', + [errors.NonPrintableDefect], ']') + self.assertEqual(dtext.defects[0].non_printables[0], '\x00') + + def test_get_dtext_with_qp(self): + ptext = self._test_get_x(parser.get_dtext, + r'foo\]\[\\bar\b\e\l\l', + r'foo][\barbell', + r'foo][\barbell', + [errors.ObsoleteHeaderDefect], + '') + + def test_get_dtext_up_to_close_bracket_only(self): + self._test_get_x(parser.get_dtext, + 'foo]', 'foo', 'foo', [], ']') + + def test_get_dtext_wsp_before_close_bracket_preserved(self): + self._test_get_x(parser.get_dtext, + 'foo ]', 'foo', 'foo', [], ' ]') + + def test_get_dtext_close_bracket_mid_word(self): + self._test_get_x(parser.get_dtext, + 'foo]bar', 'foo', 'foo', [], ']bar') + + def test_get_dtext_up_to_open_bracket_only(self): + self._test_get_x(parser.get_dtext, + 'foo[', 'foo', 'foo', [], '[') + + def test_get_dtext_wsp_before_open_bracket_preserved(self): + self._test_get_x(parser.get_dtext, + 'foo [', 'foo', 'foo', [], ' [') + + def test_get_dtext_open_bracket_mid_word(self): + self._test_get_x(parser.get_dtext, + 'foo[bar', 'foo', 'foo', [], '[bar') + + # get_domain_literal + + def test_get_domain_literal_only(self): + domain_literal = domain_literal = self._test_get_x(parser.get_domain_literal, + '[127.0.0.1]', + '[127.0.0.1]', + '[127.0.0.1]', + [], + '') + self.assertEqual(domain_literal.token_type, 'domain-literal') + self.assertEqual(domain_literal.domain, '[127.0.0.1]') + self.assertEqual(domain_literal.ip, '127.0.0.1') + + def test_get_domain_literal_with_internal_ws(self): + domain_literal = self._test_get_x(parser.get_domain_literal, + '[ 127.0.0.1\t ]', + '[ 127.0.0.1\t ]', + '[ 127.0.0.1 ]', + [], + '') + self.assertEqual(domain_literal.domain, '[127.0.0.1]') + self.assertEqual(domain_literal.ip, '127.0.0.1') + + def test_get_domain_literal_with_surrounding_cfws(self): + domain_literal = self._test_get_x(parser.get_domain_literal, + '(foo)[ 127.0.0.1] (bar)', + '(foo)[ 127.0.0.1] (bar)', + ' [ 127.0.0.1] ', + [], + '') + self.assertEqual(domain_literal.domain, '[127.0.0.1]') + self.assertEqual(domain_literal.ip, '127.0.0.1') + + def test_get_domain_literal_no_start_char_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain_literal('(foo) ') + + def test_get_domain_literal_no_start_char_before_special_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain_literal('(foo) @') + + def test_get_domain_literal_bad_dtext_char_before_special_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain_literal('(foo) [abc[@') + + # get_domain + + def test_get_domain_regular_domain_only(self): + domain = self._test_get_x(parser.get_domain, + 'example.com', + 'example.com', + 'example.com', + [], + '') + self.assertEqual(domain.token_type, 'domain') + self.assertEqual(domain.domain, 'example.com') + + def test_get_domain_domain_literal_only(self): + domain = self._test_get_x(parser.get_domain, + '[127.0.0.1]', + '[127.0.0.1]', + '[127.0.0.1]', + [], + '') + self.assertEqual(domain.token_type, 'domain') + self.assertEqual(domain.domain, '[127.0.0.1]') + + def test_get_domain_with_cfws(self): + domain = self._test_get_x(parser.get_domain, + '(foo) example.com(bar)\t', + '(foo) example.com(bar)\t', + ' example.com ', + [], + '') + self.assertEqual(domain.domain, 'example.com') + + def test_get_domain_domain_literal_with_cfws(self): + domain = self._test_get_x(parser.get_domain, + '(foo)[127.0.0.1]\t(bar)', + '(foo)[127.0.0.1]\t(bar)', + ' [127.0.0.1] ', + [], + '') + self.assertEqual(domain.domain, '[127.0.0.1]') + + def test_get_domain_domain_with_cfws_ends_at_special(self): + domain = self._test_get_x(parser.get_domain, + '(foo)example.com\t(bar), next', + '(foo)example.com\t(bar)', + ' example.com ', + [], + ', next') + self.assertEqual(domain.domain, 'example.com') + + def test_get_domain_domain_literal_with_cfws_ends_at_special(self): + domain = self._test_get_x(parser.get_domain, + '(foo)[127.0.0.1]\t(bar), next', + '(foo)[127.0.0.1]\t(bar)', + ' [127.0.0.1] ', + [], + ', next') + self.assertEqual(domain.domain, '[127.0.0.1]') + + def test_get_domain_obsolete(self): + domain = self._test_get_x(parser.get_domain, + '(foo) example . (bird)com(bar)\t', + '(foo) example . (bird)com(bar)\t', + ' example . com ', + [errors.ObsoleteHeaderDefect], + '') + self.assertEqual(domain.domain, 'example.com') + + def test_get_domain_no_non_cfws_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain(" (foo)\t") + + def test_get_domain_no_atom_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_domain(" (foo)\t, broken") + + + # get_addr_spec + + def test_get_addr_spec_normal(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + 'dinsdale@example.com', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + '') + self.assertEqual(addr_spec.token_type, 'addr-spec') + self.assertEqual(addr_spec.local_part, 'dinsdale') + self.assertEqual(addr_spec.domain, 'example.com') + self.assertEqual(addr_spec.addr_spec, 'dinsdale@example.com') + + def test_get_addr_spec_with_doamin_literal(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + 'dinsdale@[127.0.0.1]', + 'dinsdale@[127.0.0.1]', + 'dinsdale@[127.0.0.1]', + [], + '') + self.assertEqual(addr_spec.local_part, 'dinsdale') + self.assertEqual(addr_spec.domain, '[127.0.0.1]') + self.assertEqual(addr_spec.addr_spec, 'dinsdale@[127.0.0.1]') + + def test_get_addr_spec_with_cfws(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + '(foo) dinsdale(bar)@ (bird) example.com (bog)', + '(foo) dinsdale(bar)@ (bird) example.com (bog)', + ' dinsdale@example.com ', + [], + '') + self.assertEqual(addr_spec.local_part, 'dinsdale') + self.assertEqual(addr_spec.domain, 'example.com') + self.assertEqual(addr_spec.addr_spec, 'dinsdale@example.com') + + def test_get_addr_spec_with_qouoted_string_and_cfws(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + '(foo) "roy a bug"(bar)@ (bird) example.com (bog)', + '(foo) "roy a bug"(bar)@ (bird) example.com (bog)', + ' "roy a bug"@example.com ', + [], + '') + self.assertEqual(addr_spec.local_part, 'roy a bug') + self.assertEqual(addr_spec.domain, 'example.com') + self.assertEqual(addr_spec.addr_spec, '"roy a bug"@example.com') + + def test_get_addr_spec_ends_at_special(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + '(foo) "roy a bug"(bar)@ (bird) example.com (bog) , next', + '(foo) "roy a bug"(bar)@ (bird) example.com (bog) ', + ' "roy a bug"@example.com ', + [], + ', next') + self.assertEqual(addr_spec.local_part, 'roy a bug') + self.assertEqual(addr_spec.domain, 'example.com') + self.assertEqual(addr_spec.addr_spec, '"roy a bug"@example.com') + + def test_get_addr_spec_quoted_strings_in_atom_list(self): + addr_spec = self._test_get_x(parser.get_addr_spec, + '""example" example"@example.com', + '""example" example"@example.com', + 'example example@example.com', + [errors.InvalidHeaderDefect]*3, + '') + self.assertEqual(addr_spec.local_part, 'example example') + self.assertEqual(addr_spec.domain, 'example.com') + self.assertEqual(addr_spec.addr_spec, '"example example"@example.com') + + # get_obs_route + + def test_get_obs_route_simple(self): + obs_route = self._test_get_x(parser.get_obs_route, + '@example.com, @two.example.com:', + '@example.com, @two.example.com:', + '@example.com, @two.example.com:', + [], + '') + self.assertEqual(obs_route.token_type, 'obs-route') + self.assertEqual(obs_route.domains, ['example.com', 'two.example.com']) + + def test_get_obs_route_complex(self): + obs_route = self._test_get_x(parser.get_obs_route, + '(foo),, (blue)@example.com (bar),@two.(foo) example.com (bird):', + '(foo),, (blue)@example.com (bar),@two.(foo) example.com (bird):', + ' ,, @example.com ,@two. example.com :', + [errors.ObsoleteHeaderDefect], # This is the obs-domain + '') + self.assertEqual(obs_route.token_type, 'obs-route') + self.assertEqual(obs_route.domains, ['example.com', 'two.example.com']) + + def test_get_obs_route_no_route_before_end_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('(foo) @example.com,') + + def test_get_obs_route_no_route_before_special_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('(foo) [abc],') + + def test_get_obs_route_no_route_before_special_raises2(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_obs_route('(foo) @example.com [abc],') + + # get_angle_addr + + def test_get_angle_addr_simple(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '', + '', + '', + [], + '') + self.assertEqual(angle_addr.token_type, 'angle-addr') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_with_cfws(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + ' (foo) (bar)', + ' (foo) (bar)', + ' ', + [], + '') + self.assertEqual(angle_addr.token_type, 'angle-addr') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_qs_and_domain_literal(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '<"Fred Perfect"@[127.0.0.1]>', + '<"Fred Perfect"@[127.0.0.1]>', + '<"Fred Perfect"@[127.0.0.1]>', + [], + '') + self.assertEqual(angle_addr.local_part, 'Fred Perfect') + self.assertEqual(angle_addr.domain, '[127.0.0.1]') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, '"Fred Perfect"@[127.0.0.1]') + + def test_get_angle_addr_internal_cfws(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '<(foo) dinsdale@example.com(bar)>', + '<(foo) dinsdale@example.com(bar)>', + '< dinsdale@example.com >', + [], + '') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_obs_route(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '(foo)<@example.com, (bird) @two.example.com: dinsdale@example.com> (bar) ', + '(foo)<@example.com, (bird) @two.example.com: dinsdale@example.com> (bar) ', + ' <@example.com, @two.example.com: dinsdale@example.com> ', + [errors.ObsoleteHeaderDefect], + '') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertEqual(angle_addr.route, ['example.com', 'two.example.com']) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_missing_closing_angle(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '', + '', + [errors.InvalidHeaderDefect], + '') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_missing_closing_angle_with_cfws(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + '', + '', + [errors.InvalidHeaderDefect], + '') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_ends_at_special(self): + angle_addr = self._test_get_x(parser.get_angle_addr, + ' (foo), next', + ' (foo)', + ' ', + [], + ', next') + self.assertEqual(angle_addr.local_part, 'dinsdale') + self.assertEqual(angle_addr.domain, 'example.com') + self.assertIsNone(angle_addr.route) + self.assertEqual(angle_addr.addr_spec, 'dinsdale@example.com') + + def test_get_angle_addr_no_angle_raise(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('(foo) ') + + def test_get_angle_addr_no_angle_before_special_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('(foo) , next') + + def test_get_angle_addr_no_angle_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('bar') + + def test_get_angle_addr_special_after_angle_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_angle_addr('(foo) <, bar') + + # get_display_name This is phrase but with a different value. + + def test_get_display_name_simple(self): + display_name = self._test_get_x(parser.get_display_name, + 'Fred A Johnson', + 'Fred A Johnson', + 'Fred A Johnson', + [], + '') + self.assertEqual(display_name.token_type, 'display-name') + self.assertEqual(display_name.display_name, 'Fred A Johnson') + + def test_get_display_name_complex1(self): + display_name = self._test_get_x(parser.get_display_name, + '"Fred A. Johnson" is his name, oh.', + '"Fred A. Johnson" is his name', + '"Fred A. Johnson is his name"', + [], + ', oh.') + self.assertEqual(display_name.token_type, 'display-name') + self.assertEqual(display_name.display_name, 'Fred A. Johnson is his name') + + def test_get_display_name_complex2(self): + display_name = self._test_get_x(parser.get_display_name, + ' (A) bird (in (my|your)) "hand " is messy\t<>\t', + ' (A) bird (in (my|your)) "hand " is messy\t', + ' "bird hand is messy" ', + [], + '<>\t') + self.assertEqual(display_name[0][0].comments, ['A']) + self.assertEqual(display_name[0][2].comments, ['in (my|your)']) + self.assertEqual(display_name.display_name, 'bird hand is messy') + + def test_get_display_name_obsolete(self): + display_name = self._test_get_x(parser.get_display_name, + 'Fred A.(weird).O Johnson', + 'Fred A.(weird).O Johnson', + '"Fred A. .O Johnson"', + [errors.ObsoleteHeaderDefect]*3, + '') + self.assertEqual(len(display_name), 7) + self.assertEqual(display_name[3].comments, ['weird']) + self.assertEqual(display_name.display_name, 'Fred A. .O Johnson') + + def test_get_display_name_pharse_must_start_with_word(self): + display_name = self._test_get_x(parser.get_display_name, + '(even weirder).name', + '(even weirder).name', + ' ".name"', + [errors.InvalidHeaderDefect] + [errors.ObsoleteHeaderDefect]*2, + '') + self.assertEqual(len(display_name), 3) + self.assertEqual(display_name[0].comments, ['even weirder']) + self.assertEqual(display_name.display_name, '.name') + + def test_get_display_name_ending_with_obsolete(self): + display_name = self._test_get_x(parser.get_display_name, + 'simple phrase.(with trailing comment):boo', + 'simple phrase.(with trailing comment)', + '"simple phrase." ', + [errors.ObsoleteHeaderDefect]*2, + ':boo') + self.assertEqual(len(display_name), 4) + self.assertEqual(display_name[3].comments, ['with trailing comment']) + self.assertEqual(display_name.display_name, 'simple phrase.') + + # get_name_addr + + def test_get_name_addr_angle_addr_only(self): + name_addr = self._test_get_x(parser.get_name_addr, + '', + '', + '', + [], + '') + self.assertEqual(name_addr.token_type, 'name-addr') + self.assertIsNone(name_addr.display_name) + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_atom_name(self): + name_addr = self._test_get_x(parser.get_name_addr, + 'Dinsdale ', + 'Dinsdale ', + 'Dinsdale ', + [], + '') + self.assertEqual(name_addr.token_type, 'name-addr') + self.assertEqual(name_addr.display_name, 'Dinsdale') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_atom_name_with_cfws(self): + name_addr = self._test_get_x(parser.get_name_addr, + '(foo) Dinsdale (bar) (bird)', + '(foo) Dinsdale (bar) (bird)', + ' Dinsdale ', + [], + '') + self.assertEqual(name_addr.display_name, 'Dinsdale') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_name_with_cfws_and_dots(self): + name_addr = self._test_get_x(parser.get_name_addr, + '(foo) Roy.A.Bear (bar) (bird)', + '(foo) Roy.A.Bear (bar) (bird)', + ' "Roy.A.Bear" ', + [errors.ObsoleteHeaderDefect]*2, + '') + self.assertEqual(name_addr.display_name, 'Roy.A.Bear') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_qs_name(self): + name_addr = self._test_get_x(parser.get_name_addr, + '"Roy.A.Bear" ', + '"Roy.A.Bear" ', + '"Roy.A.Bear" ', + [], + '') + self.assertEqual(name_addr.display_name, 'Roy.A.Bear') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_with_route(self): + name_addr = self._test_get_x(parser.get_name_addr, + '"Roy.A.Bear" <@two.example.com: dinsdale@example.com>', + '"Roy.A.Bear" <@two.example.com: dinsdale@example.com>', + '"Roy.A.Bear" <@two.example.com: dinsdale@example.com>', + [errors.ObsoleteHeaderDefect], + '') + self.assertEqual(name_addr.display_name, 'Roy.A.Bear') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertEqual(name_addr.route, ['two.example.com']) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_ends_at_special(self): + name_addr = self._test_get_x(parser.get_name_addr, + '"Roy.A.Bear" , next', + '"Roy.A.Bear" ', + '"Roy.A.Bear" ', + [], + ', next') + self.assertEqual(name_addr.display_name, 'Roy.A.Bear') + self.assertEqual(name_addr.local_part, 'dinsdale') + self.assertEqual(name_addr.domain, 'example.com') + self.assertIsNone(name_addr.route) + self.assertEqual(name_addr.addr_spec, 'dinsdale@example.com') + + def test_get_name_addr_no_content_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_name_addr(' (foo) ') + + def test_get_name_addr_no_content_before_special_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_name_addr(' (foo) ,') + + def test_get_name_addr_no_angle_after_display_name_raises(self): + with self.assertRaises(errors.HeaderParseError): + parser.get_name_addr('foo bar') + + # get_mailbox + + def test_get_mailbox_addr_spec_only(self): + mailbox = self._test_get_x(parser.get_mailbox, + 'dinsdale@example.com', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + '') + self.assertEqual(mailbox.token_type, 'mailbox') + self.assertIsNone(mailbox.display_name) + self.assertEqual(mailbox.local_part, 'dinsdale') + self.assertEqual(mailbox.domain, 'example.com') + self.assertIsNone(mailbox.route) + self.assertEqual(mailbox.addr_spec, 'dinsdale@example.com') + + def test_get_mailbox_angle_addr_only(self): + mailbox = self._test_get_x(parser.get_mailbox, + '', + '', + '', + [], + '') + self.assertEqual(mailbox.token_type, 'mailbox') + self.assertIsNone(mailbox.display_name) + self.assertEqual(mailbox.local_part, 'dinsdale') + self.assertEqual(mailbox.domain, 'example.com') + self.assertIsNone(mailbox.route) + self.assertEqual(mailbox.addr_spec, 'dinsdale@example.com') + + def test_get_mailbox_name_addr(self): + mailbox = self._test_get_x(parser.get_mailbox, + '"Roy A. Bear" ', + '"Roy A. Bear" ', + '"Roy A. Bear" ', + [], + '') + self.assertEqual(mailbox.token_type, 'mailbox') + self.assertEqual(mailbox.display_name, 'Roy A. Bear') + self.assertEqual(mailbox.local_part, 'dinsdale') + self.assertEqual(mailbox.domain, 'example.com') + self.assertIsNone(mailbox.route) + self.assertEqual(mailbox.addr_spec, 'dinsdale@example.com') + + def test_get_mailbox_ends_at_special(self): + mailbox = self._test_get_x(parser.get_mailbox, + '"Roy A. Bear" , rest', + '"Roy A. Bear" ', + '"Roy A. Bear" ', + [], + ', rest') + self.assertEqual(mailbox.token_type, 'mailbox') + self.assertEqual(mailbox.display_name, 'Roy A. Bear') + self.assertEqual(mailbox.local_part, 'dinsdale') + self.assertEqual(mailbox.domain, 'example.com') + self.assertIsNone(mailbox.route) + self.assertEqual(mailbox.addr_spec, 'dinsdale@example.com') + + def test_get_mailbox_quoted_strings_in_atom_list(self): + mailbox = self._test_get_x(parser.get_mailbox, + '""example" example"@example.com', + '""example" example"@example.com', + 'example example@example.com', + [errors.InvalidHeaderDefect]*3, + '') + self.assertEqual(mailbox.local_part, 'example example') + self.assertEqual(mailbox.domain, 'example.com') + self.assertEqual(mailbox.addr_spec, '"example example"@example.com') + + # get_mailbox_list + + def test_get_mailbox_list_single_addr(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + 'dinsdale@example.com', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + '') + self.assertEqual(mailbox_list.token_type, 'mailbox-list') + self.assertEqual(len(mailbox_list.mailboxes), 1) + mailbox = mailbox_list.mailboxes[0] + self.assertIsNone(mailbox.display_name) + self.assertEqual(mailbox.local_part, 'dinsdale') + self.assertEqual(mailbox.domain, 'example.com') + self.assertIsNone(mailbox.route) + self.assertEqual(mailbox.addr_spec, 'dinsdale@example.com') + self.assertEqual(mailbox_list.mailboxes, + mailbox_list.all_mailboxes) + + def test_get_mailbox_list_two_simple_addr(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + 'dinsdale@example.com, dinsdale@test.example.com', + 'dinsdale@example.com, dinsdale@test.example.com', + 'dinsdale@example.com, dinsdale@test.example.com', + [], + '') + self.assertEqual(mailbox_list.token_type, 'mailbox-list') + self.assertEqual(len(mailbox_list.mailboxes), 2) + self.assertEqual(mailbox_list.mailboxes[0].addr_spec, + 'dinsdale@example.com') + self.assertEqual(mailbox_list.mailboxes[1].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes, + mailbox_list.all_mailboxes) + + def test_get_mailbox_list_two_name_addr(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + ('"Roy A. Bear" ,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" ,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" ,' + ' "Fred Flintstone" '), + [], + '') + self.assertEqual(len(mailbox_list.mailboxes), 2) + self.assertEqual(mailbox_list.mailboxes[0].addr_spec, + 'dinsdale@example.com') + self.assertEqual(mailbox_list.mailboxes[0].display_name, + 'Roy A. Bear') + self.assertEqual(mailbox_list.mailboxes[1].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes[1].display_name, + 'Fred Flintstone') + self.assertEqual(mailbox_list.mailboxes, + mailbox_list.all_mailboxes) + + def test_get_mailbox_list_two_complex(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + ('(foo) "Roy A. Bear" (bar),' + ' "Fred Flintstone" '), + ('(foo) "Roy A. Bear" (bar),' + ' "Fred Flintstone" '), + (' "Roy A. Bear" ,' + ' "Fred Flintstone" '), + [errors.ObsoleteHeaderDefect], + '') + self.assertEqual(len(mailbox_list.mailboxes), 2) + self.assertEqual(mailbox_list.mailboxes[0].addr_spec, + 'dinsdale@example.com') + self.assertEqual(mailbox_list.mailboxes[0].display_name, + 'Roy A. Bear') + self.assertEqual(mailbox_list.mailboxes[1].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes[1].display_name, + 'Fred Flintstone') + self.assertEqual(mailbox_list.mailboxes, + mailbox_list.all_mailboxes) + + def test_get_mailbox_list_unparseable_mailbox_null(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + ('"Roy A. Bear"[] dinsdale@example.com,' + ' "Fred Flintstone" '), + ('"Roy A. Bear"[] dinsdale@example.com,' + ' "Fred Flintstone" '), + ('"Roy A. Bear"[] dinsdale@example.com,' + ' "Fred Flintstone" '), + [errors.InvalidHeaderDefect, # the 'extra' text after the local part + errors.InvalidHeaderDefect, # the local part with no angle-addr + errors.ObsoleteHeaderDefect, # period in extra text (example.com) + errors.ObsoleteHeaderDefect], # (bird) in valid address. + '') + self.assertEqual(len(mailbox_list.mailboxes), 1) + self.assertEqual(len(mailbox_list.all_mailboxes), 2) + self.assertEqual(mailbox_list.all_mailboxes[0].token_type, + 'invalid-mailbox') + self.assertIsNone(mailbox_list.all_mailboxes[0].display_name) + self.assertEqual(mailbox_list.all_mailboxes[0].local_part, + 'Roy A. Bear') + self.assertIsNone(mailbox_list.all_mailboxes[0].domain) + self.assertEqual(mailbox_list.all_mailboxes[0].addr_spec, + '"Roy A. Bear"') + self.assertIs(mailbox_list.all_mailboxes[1], + mailbox_list.mailboxes[0]) + self.assertEqual(mailbox_list.mailboxes[0].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes[0].display_name, + 'Fred Flintstone') + + def test_get_mailbox_list_junk_after_valid_address(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + ('"Roy A. Bear" @@,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" @@,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" @@,' + ' "Fred Flintstone" '), + [errors.InvalidHeaderDefect], + '') + self.assertEqual(len(mailbox_list.mailboxes), 1) + self.assertEqual(len(mailbox_list.all_mailboxes), 2) + self.assertEqual(mailbox_list.all_mailboxes[0].addr_spec, + 'dinsdale@example.com') + self.assertEqual(mailbox_list.all_mailboxes[0].display_name, + 'Roy A. Bear') + self.assertEqual(mailbox_list.all_mailboxes[0].token_type, + 'invalid-mailbox') + self.assertIs(mailbox_list.all_mailboxes[1], + mailbox_list.mailboxes[0]) + self.assertEqual(mailbox_list.mailboxes[0].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes[0].display_name, + 'Fred Flintstone') + + def test_get_mailbox_list_empty_list_element(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + ('"Roy A. Bear" , (bird),,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" , (bird),,' + ' "Fred Flintstone" '), + ('"Roy A. Bear" , ,,' + ' "Fred Flintstone" '), + [errors.ObsoleteHeaderDefect]*2, + '') + self.assertEqual(len(mailbox_list.mailboxes), 2) + self.assertEqual(mailbox_list.all_mailboxes, + mailbox_list.mailboxes) + self.assertEqual(mailbox_list.all_mailboxes[0].addr_spec, + 'dinsdale@example.com') + self.assertEqual(mailbox_list.all_mailboxes[0].display_name, + 'Roy A. Bear') + self.assertEqual(mailbox_list.mailboxes[1].addr_spec, + 'dinsdale@test.example.com') + self.assertEqual(mailbox_list.mailboxes[1].display_name, + 'Fred Flintstone') + + def test_get_mailbox_list_only_empty_elements(self): + mailbox_list = self._test_get_x(parser.get_mailbox_list, + '(foo),, (bar)', + '(foo),, (bar)', + ' ,, ', + [errors.ObsoleteHeaderDefect]*3, + '') + self.assertEqual(len(mailbox_list.mailboxes), 0) + self.assertEqual(mailbox_list.all_mailboxes, + mailbox_list.mailboxes) + + # get_group_list + + def test_get_group_list_cfws_only(self): + group_list = self._test_get_x(parser.get_group_list, + '(hidden);', + '(hidden)', + ' ', + [], + ';') + self.assertEqual(group_list.token_type, 'group-list') + self.assertEqual(len(group_list.mailboxes), 0) + self.assertEqual(group_list.mailboxes, + group_list.all_mailboxes) + + def test_get_group_list_mailbox_list(self): + group_list = self._test_get_x(parser.get_group_list, + 'dinsdale@example.org, "Fred A. Bear" ', + 'dinsdale@example.org, "Fred A. Bear" ', + 'dinsdale@example.org, "Fred A. Bear" ', + [], + '') + self.assertEqual(group_list.token_type, 'group-list') + self.assertEqual(len(group_list.mailboxes), 2) + self.assertEqual(group_list.mailboxes, + group_list.all_mailboxes) + self.assertEqual(group_list.mailboxes[1].display_name, + 'Fred A. Bear') + + def test_get_group_list_obs_group_list(self): + group_list = self._test_get_x(parser.get_group_list, + ', (foo),,(bar)', + ', (foo),,(bar)', + ', ,, ', + [errors.ObsoleteHeaderDefect], + '') + self.assertEqual(group_list.token_type, 'group-list') + self.assertEqual(len(group_list.mailboxes), 0) + self.assertEqual(group_list.mailboxes, + group_list.all_mailboxes) + + def test_get_group_list_comment_only_invalid(self): + group_list = self._test_get_x(parser.get_group_list, + '(bar)', + '(bar)', + ' ', + [errors.InvalidHeaderDefect], + '') + self.assertEqual(group_list.token_type, 'group-list') + self.assertEqual(len(group_list.mailboxes), 0) + self.assertEqual(group_list.mailboxes, + group_list.all_mailboxes) + + # get_group + + def test_get_group_empty(self): + group = self._test_get_x(parser.get_group, + 'Monty Python:;', + 'Monty Python:;', + 'Monty Python:;', + [], + '') + self.assertEqual(group.token_type, 'group') + self.assertEqual(group.display_name, 'Monty Python') + self.assertEqual(len(group.mailboxes), 0) + self.assertEqual(group.mailboxes, + group.all_mailboxes) + + def test_get_group_cfws_only(self): + group = self._test_get_x(parser.get_group, + 'Monty Python: (hidden);', + 'Monty Python: (hidden);', + 'Monty Python: ;', + [], + '') + self.assertEqual(group.token_type, 'group') + self.assertEqual(group.display_name, 'Monty Python') + self.assertEqual(len(group.mailboxes), 0) + self.assertEqual(group.mailboxes, + group.all_mailboxes) + + def test_get_group_single_mailbox(self): + group = self._test_get_x(parser.get_group, + 'Monty Python: "Fred A. Bear" ;', + 'Monty Python: "Fred A. Bear" ;', + 'Monty Python: "Fred A. Bear" ;', + [], + '') + self.assertEqual(group.token_type, 'group') + self.assertEqual(group.display_name, 'Monty Python') + self.assertEqual(len(group.mailboxes), 1) + self.assertEqual(group.mailboxes, + group.all_mailboxes) + self.assertEqual(group.mailboxes[0].addr_spec, + 'dinsdale@example.com') + + def test_get_group_mixed_list(self): + group = self._test_get_x(parser.get_group, + ('Monty Python: "Fred A. Bear" ,' + '(foo) Roger , x@test.example.com;'), + ('Monty Python: "Fred A. Bear" ,' + '(foo) Roger , x@test.example.com;'), + ('Monty Python: "Fred A. Bear" ,' + ' Roger , x@test.example.com;'), + [], + '') + self.assertEqual(group.token_type, 'group') + self.assertEqual(group.display_name, 'Monty Python') + self.assertEqual(len(group.mailboxes), 3) + self.assertEqual(group.mailboxes, + group.all_mailboxes) + self.assertEqual(group.mailboxes[0].display_name, + 'Fred A. Bear') + self.assertEqual(group.mailboxes[1].display_name, + 'Roger') + self.assertEqual(group.mailboxes[2].local_part, 'x') + + def test_get_group_one_invalid(self): + group = self._test_get_x(parser.get_group, + ('Monty Python: "Fred A. Bear" ,' + '(foo) Roger ping@exampele.com, x@test.example.com;'), + ('Monty Python: "Fred A. Bear" ,' + '(foo) Roger ping@exampele.com, x@test.example.com;'), + ('Monty Python: "Fred A. Bear" ,' + ' Roger ping@exampele.com, x@test.example.com;'), + [errors.InvalidHeaderDefect, # non-angle addr makes local part invalid + errors.InvalidHeaderDefect], # and its not obs-local either: no dots. + '') + self.assertEqual(group.token_type, 'group') + self.assertEqual(group.display_name, 'Monty Python') + self.assertEqual(len(group.mailboxes), 2) + self.assertEqual(len(group.all_mailboxes), 3) + self.assertEqual(group.mailboxes[0].display_name, + 'Fred A. Bear') + self.assertEqual(group.mailboxes[1].local_part, 'x') + self.assertIsNone(group.all_mailboxes[1].display_name) + + # get_address + + def test_get_address_simple(self): + address = self._test_get_x(parser.get_address, + 'dinsdale@example.com', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].domain, + 'example.com') + self.assertEqual(address[0].token_type, + 'mailbox') + + def test_get_address_complex(self): + address = self._test_get_x(parser.get_address, + '(foo) "Fred A. Bear" <(bird)dinsdale@example.com>', + '(foo) "Fred A. Bear" <(bird)dinsdale@example.com>', + ' "Fred A. Bear" < dinsdale@example.com>', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].display_name, + 'Fred A. Bear') + self.assertEqual(address[0].token_type, + 'mailbox') + + def test_get_address_empty_group(self): + address = self._test_get_x(parser.get_address, + 'Monty Python:;', + 'Monty Python:;', + 'Monty Python:;', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address[0].token_type, + 'group') + self.assertEqual(address[0].display_name, + 'Monty Python') + + def test_get_address_group(self): + address = self._test_get_x(parser.get_address, + 'Monty Python: x@example.com, y@example.com;', + 'Monty Python: x@example.com, y@example.com;', + 'Monty Python: x@example.com, y@example.com;', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 2) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address[0].token_type, + 'group') + self.assertEqual(address[0].display_name, + 'Monty Python') + self.assertEqual(address.mailboxes[0].local_part, 'x') + + def test_get_address_quoted_local_part(self): + address = self._test_get_x(parser.get_address, + '"foo bar"@example.com', + '"foo bar"@example.com', + '"foo bar"@example.com', + [], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].domain, + 'example.com') + self.assertEqual(address.mailboxes[0].local_part, + 'foo bar') + self.assertEqual(address[0].token_type, 'mailbox') + + def test_get_address_ends_at_special(self): + address = self._test_get_x(parser.get_address, + 'dinsdale@example.com, next', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + ', next') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 1) + self.assertEqual(address.mailboxes, + address.all_mailboxes) + self.assertEqual(address.mailboxes[0].domain, + 'example.com') + self.assertEqual(address[0].token_type, 'mailbox') + + def test_get_address_invalid_mailbox_invalid(self): + address = self._test_get_x(parser.get_address, + 'ping example.com, next', + 'ping example.com', + 'ping example.com', + [errors.InvalidHeaderDefect, # addr-spec with no domain + errors.InvalidHeaderDefect, # invalid local-part + errors.InvalidHeaderDefect, # missing .s in local-part + ], + ', next') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertIsNone(address.all_mailboxes[0].domain) + self.assertEqual(address.all_mailboxes[0].local_part, 'ping example.com') + self.assertEqual(address[0].token_type, 'invalid-mailbox') + + def test_get_address_quoted_strings_in_atom_list(self): + address = self._test_get_x(parser.get_address, + '""example" example"@example.com', + '""example" example"@example.com', + 'example example@example.com', + [errors.InvalidHeaderDefect]*3, + '') + self.assertEqual(address.all_mailboxes[0].local_part, 'example example') + self.assertEqual(address.all_mailboxes[0].domain, 'example.com') + self.assertEqual(address.all_mailboxes[0].addr_spec, '"example example"@example.com') + + + # get_address_list + + def test_get_address_list_mailboxes_simple(self): + address_list = self._test_get_x(parser.get_address_list, + 'dinsdale@example.com', + 'dinsdale@example.com', + 'dinsdale@example.com', + [], + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 1) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual([str(x) for x in address_list.mailboxes], + [str(x) for x in address_list.addresses]) + self.assertEqual(address_list.mailboxes[0].domain, 'example.com') + self.assertEqual(address_list[0].token_type, 'address') + self.assertIsNone(address_list[0].display_name) + + def test_get_address_list_mailboxes_two_simple(self): + address_list = self._test_get_x(parser.get_address_list, + 'foo@example.com, "Fred A. Bar" ', + 'foo@example.com, "Fred A. Bar" ', + 'foo@example.com, "Fred A. Bar" ', + [], + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 2) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual([str(x) for x in address_list.mailboxes], + [str(x) for x in address_list.addresses]) + self.assertEqual(address_list.mailboxes[0].local_part, 'foo') + self.assertEqual(address_list.mailboxes[1].display_name, "Fred A. Bar") + + def test_get_address_list_mailboxes_complex(self): + address_list = self._test_get_x(parser.get_address_list, + ('"Roy A. Bear" , ' + '(ping) Foo ,' + 'Nobody Is. Special '), + ('"Roy A. Bear" , ' + '(ping) Foo ,' + 'Nobody Is. Special '), + ('"Roy A. Bear" , ' + 'Foo ,' + '"Nobody Is. Special" '), + [errors.ObsoleteHeaderDefect, # period in Is. + errors.ObsoleteHeaderDefect], # cfws in domain + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 3) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual([str(x) for x in address_list.mailboxes], + [str(x) for x in address_list.addresses]) + self.assertEqual(address_list.mailboxes[0].domain, 'example.com') + self.assertEqual(address_list.mailboxes[0].token_type, 'mailbox') + self.assertEqual(address_list.addresses[0].token_type, 'address') + self.assertEqual(address_list.mailboxes[1].local_part, 'x') + self.assertEqual(address_list.mailboxes[2].display_name, + 'Nobody Is. Special') + + def test_get_address_list_mailboxes_invalid_addresses(self): + address_list = self._test_get_x(parser.get_address_list, + ('"Roy A. Bear" , ' + '(ping) Foo x@example.com[],' + 'Nobody Is. Special <(bird)example.(bad)com>'), + ('"Roy A. Bear" , ' + '(ping) Foo x@example.com[],' + 'Nobody Is. Special <(bird)example.(bad)com>'), + ('"Roy A. Bear" , ' + 'Foo x@example.com[],' + '"Nobody Is. Special" < example. com>'), + [errors.InvalidHeaderDefect, # invalid address in list + errors.InvalidHeaderDefect, # 'Foo x' local part invalid. + errors.InvalidHeaderDefect, # Missing . in 'Foo x' local part + errors.ObsoleteHeaderDefect, # period in 'Is.' disp-name phrase + errors.InvalidHeaderDefect, # no domain part in addr-spec + errors.ObsoleteHeaderDefect], # addr-spec has comment in it + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 1) + self.assertEqual(len(address_list.all_mailboxes), 3) + self.assertEqual([str(x) for x in address_list.all_mailboxes], + [str(x) for x in address_list.addresses]) + self.assertEqual(address_list.mailboxes[0].domain, 'example.com') + self.assertEqual(address_list.mailboxes[0].token_type, 'mailbox') + self.assertEqual(address_list.addresses[0].token_type, 'address') + self.assertEqual(address_list.addresses[1].token_type, 'address') + self.assertEqual(len(address_list.addresses[0].mailboxes), 1) + self.assertEqual(len(address_list.addresses[1].mailboxes), 0) + self.assertEqual(len(address_list.addresses[1].mailboxes), 0) + self.assertEqual( + address_list.addresses[1].all_mailboxes[0].local_part, 'Foo x') + self.assertEqual( + address_list.addresses[2].all_mailboxes[0].display_name, + "Nobody Is. Special") + + def test_get_address_list_group_empty(self): + address_list = self._test_get_x(parser.get_address_list, + 'Monty Python: ;', + 'Monty Python: ;', + 'Monty Python: ;', + [], + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 0) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual(len(address_list.addresses), 1) + self.assertEqual(address_list.addresses[0].token_type, 'address') + self.assertEqual(address_list.addresses[0].display_name, 'Monty Python') + self.assertEqual(len(address_list.addresses[0].mailboxes), 0) + + def test_get_address_list_group_simple(self): + address_list = self._test_get_x(parser.get_address_list, + 'Monty Python: dinsdale@example.com;', + 'Monty Python: dinsdale@example.com;', + 'Monty Python: dinsdale@example.com;', + [], + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 1) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual(address_list.mailboxes[0].domain, 'example.com') + self.assertEqual(address_list.addresses[0].display_name, + 'Monty Python') + self.assertEqual(address_list.addresses[0].mailboxes[0].domain, + 'example.com') + + def test_get_address_list_group_and_mailboxes(self): + address_list = self._test_get_x(parser.get_address_list, + ('Monty Python: dinsdale@example.com, "Fred" ;, ' + 'Abe , Bee '), + ('Monty Python: dinsdale@example.com, "Fred" ;, ' + 'Abe , Bee '), + ('Monty Python: dinsdale@example.com, "Fred" ;, ' + 'Abe , Bee '), + [], + '') + self.assertEqual(address_list.token_type, 'address-list') + self.assertEqual(len(address_list.mailboxes), 4) + self.assertEqual(address_list.mailboxes, + address_list.all_mailboxes) + self.assertEqual(len(address_list.addresses), 3) + self.assertEqual(address_list.mailboxes[0].local_part, 'dinsdale') + self.assertEqual(address_list.addresses[0].display_name, + 'Monty Python') + self.assertEqual(address_list.addresses[0].mailboxes[0].domain, + 'example.com') + self.assertEqual(address_list.addresses[0].mailboxes[1].local_part, + 'flint') + self.assertEqual(address_list.addresses[1].mailboxes[0].local_part, + 'x') + self.assertEqual(address_list.addresses[2].mailboxes[0].local_part, + 'y') + self.assertEqual(str(address_list.addresses[1]), + str(address_list.mailboxes[2])) + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_email.py --- a/Lib/test/test_email/test_email.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/test/test_email/test_email.py Tue Jul 19 13:24:56 2011 -0400 @@ -15,11 +15,10 @@ from io import StringIO, BytesIO from itertools import chain -import email - +from email import message_from_string, message_from_file, message_from_bytes from email.charset import Charset from email.header import Header, decode_header, make_header -from email.parser import Parser, HeaderParser +from email.parser import Parser, BytesParser, HeaderParser, BytesHeaderParser from email.generator import Generator, DecodedGenerator from email.message import Message from email.mime.application import MIMEApplication @@ -31,8 +30,14 @@ from email.mime.multipart import MIMEMultipart from email import utils from email import errors +from email import header +from email import policy +from email import charset +from email import message from email import encoders +from email import generator from email import iterators +from email import feedparser from email import base64mime from email import quoprimime @@ -170,7 +175,7 @@ # HeaderParser caused an exception when flattened. with openfile('msg_46.txt') as fp: msgdata = fp.read() - parser = HeaderParser() + parser = HeaderParser(policy=self.email5_policy) msg = parser.parsestr(msgdata) out = StringIO() gen = Generator(out, True, 0) @@ -181,10 +186,10 @@ # Make sure new bytes header parser also passes this. with openfile('msg_46.txt', 'rb') as fp: msgdata = fp.read() - parser = email.parser.BytesHeaderParser() + parser = BytesHeaderParser(policy=self.email5_policy) msg = parser.parsebytes(msgdata) out = BytesIO() - gen = email.generator.BytesGenerator(out) + gen = generator.BytesGenerator(out) gen.flatten(msg) self.assertEqual(out.getvalue(), msgdata) @@ -259,35 +264,42 @@ eq(text, NL.join(lines[1:])) def test_bad_param(self): - msg = email.message_from_string("Content-Type: blarg; baz; boo\n") + msg = message_from_string("Content-Type: blarg; baz; boo\n", + policy=self.email5_policy) self.assertEqual(msg.get_param('baz'), '') def test_missing_filename(self): - msg = email.message_from_string("From: foo\n") + msg = message_from_string("From: foo\n", + policy=self.email5_policy) self.assertEqual(msg.get_filename(), None) def test_bogus_filename(self): - msg = email.message_from_string( - "Content-Disposition: blarg; filename\n") + msg = message_from_string( + "Content-Disposition: blarg; filename\n", + policy=self.email5_policy) self.assertEqual(msg.get_filename(), '') def test_missing_boundary(self): - msg = email.message_from_string("From: foo\n") + msg = message_from_string("From: foo\n", + policy=self.email5_policy) self.assertEqual(msg.get_boundary(), None) def test_get_params(self): eq = self.assertEqual - msg = email.message_from_string( - 'X-Header: foo=one; bar=two; baz=three\n') + msg = message_from_string( + 'X-Header: foo=one; bar=two; baz=three\n', + policy=self.email5_policy) eq(msg.get_params(header='x-header'), [('foo', 'one'), ('bar', 'two'), ('baz', 'three')]) - msg = email.message_from_string( - 'X-Header: foo; bar=one; baz=two\n') + msg = message_from_string( + 'X-Header: foo; bar=one; baz=two\n', + policy=self.email5_policy) eq(msg.get_params(header='x-header'), [('foo', ''), ('bar', 'one'), ('baz', 'two')]) eq(msg.get_params(), None) - msg = email.message_from_string( - 'X-Header: foo; bar="one"; baz=two\n') + msg = message_from_string( + 'X-Header: foo; bar="one"; baz=two\n', + policy=self.email5_policy) eq(msg.get_params(header='x-header'), [('foo', ''), ('bar', 'one'), ('baz', 'two')]) @@ -298,13 +310,15 @@ def test_get_param(self): eq = self.assertEqual - msg = email.message_from_string( - "X-Header: foo=one; bar=two; baz=three\n") + msg = message_from_string( + "X-Header: foo=one; bar=two; baz=three\n", + policy=self.email5_policy) eq(msg.get_param('bar', header='x-header'), 'two') eq(msg.get_param('quuz', header='x-header'), None) eq(msg.get_param('quuz'), None) - msg = email.message_from_string( - 'X-Header: foo; bar="one"; baz=two\n') + msg = message_from_string( + 'X-Header: foo; bar="one"; baz=two\n', + policy=self.email5_policy) eq(msg.get_param('foo', header='x-header'), '') eq(msg.get_param('bar', header='x-header'), 'one') eq(msg.get_param('baz', header='x-header'), 'two') @@ -318,23 +332,27 @@ self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG') def test_get_param_with_semis_in_quotes(self): - msg = email.message_from_string( - 'Content-Type: image/pjpeg; name="Jim&&Jill"\n') + msg = message_from_string( + 'Content-Type: image/pjpeg; name="Jim&&Jill"\n', + policy=self.email5_policy) self.assertEqual(msg.get_param('name'), 'Jim&&Jill') self.assertEqual(msg.get_param('name', unquote=False), '"Jim&&Jill"') def test_get_param_with_quotes(self): - msg = email.message_from_string( - 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"') + msg = message_from_string( + 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"', + policy=self.email5_policy) self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') - msg = email.message_from_string( - "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"") + msg = message_from_string( + "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"", + policy=self.email5_policy) self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz') def test_field_containment(self): unless = self.assertTrue - msg = email.message_from_string('Header: exists') + msg = message_from_string('Header: exists', + policy=self.email5_policy) unless('header' in msg) unless('Header' in msg) unless('HEADER' in msg) @@ -594,23 +612,51 @@ # Issue 5871: reject an attempt to embed a header inside a header value # (header injection attack). - def test_embeded_header_via_Header_rejected(self): + def test_embedded_header_via_Header_rejected(self): msg = Message() msg['Dummy'] = Header('dummy\nX-Injected-Header: test') self.assertRaises(errors.HeaderParseError, msg.as_string) - def test_embeded_header_via_string_rejected(self): + def test_embedded_header_via_string_rejected(self): msg = Message() - msg['Dummy'] = 'dummy\nX-Injected-Header: test' + with self.assertWarns(DeprecationWarning): + msg['Dummy'] = 'dummy\nX-Injected-Header: test' self.assertRaises(errors.HeaderParseError, msg.as_string) + def _test_duplicate_unique_header_raises(self, name, value): + msg = MIMEText("test") + msg[name] = value + with self.assertRaises(ValueError): + msg[name] = value + + def test_duplicate_subject_raises(self): + self._test_duplicate_unique_header_raises('subject', 'test') + + def test_duplicate_date_raises(self): + self._test_duplicate_unique_header_raises( + 'date', + 'Mon, 10 Dec 2010 15:55:20 -0000') + + def _test_duplicate_multiple_header_ok(self, name, value): + msg = MIMEText("test") + before = len(msg) + msg[name] = value + msg[name] = value + msg[name] = value + self.assertEqual(len(msg), before + 3) + + def test_multiple_resent_date_ok(self): + self._test_duplicate_multiple_header_ok( + 'resent-date', + 'Mon, 10 Dec 2010 15:55:20 -0000') + # Test the email.encoders module class TestEncoders(unittest.TestCase): def test_EncodersEncode_base64(self): with openfile('PyBanner048.gif', 'rb') as fp: bindata = fp.read() - mimed = email.mime.image.MIMEImage(bindata) + mimed = MIMEImage(bindata) base64ed = mimed.get_payload() # the transfer-encoded body lines should all be <=76 characters lines = base64ed.split('\n') @@ -642,7 +688,6 @@ msg = MIMEText('文', _charset='euc-jp') eq(msg['content-transfer-encoding'], '7bit') - # Test long header wrapping class TestLongHeaders(TestEmailBase): @@ -650,13 +695,13 @@ def test_split_long_continuation(self): eq = self.ndiffAssertEqual - msg = email.message_from_string("""\ + msg = message_from_string("""\ Subject: bug demonstration \t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789 \tmore text test -""") +""", policy=self.email5_policy) sfp = StringIO() g = Generator(sfp) g.flatten(msg) @@ -1044,16 +1089,17 @@ header_string = ('Britische Regierung gibt gr\xfcnes Licht ' 'f\xfcr Offshore-Windkraftprojekte ' '') + expected = """\ +Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= + =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= + +""" msg['Reply-To'] = header_string - self.assertRaises(UnicodeEncodeError, msg.as_string) + eq(msg.as_string(maxheaderlen=78), expected) msg = Message() msg['Reply-To'] = Header(header_string, 'utf-8', header_name='Reply-To') - eq(msg.as_string(maxheaderlen=78), """\ -Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?= - =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?= - -""") + eq(msg.as_string(maxheaderlen=78), expected) def test_long_to_header(self): eq = self.ndiffAssertEqual @@ -1147,9 +1193,11 @@ t = """\ iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp""" - msg['Face-1'] = t + with self.assertWarns(DeprecationWarning): + msg['Face-1'] = t msg['Face-2'] = Header(t, header_name='Face-2') - msg['Face-3'] = ' ' + t + with self.assertWarns(DeprecationWarning): + msg['Face-3'] = ' ' + t # XXX This splitting is all wrong. It the first value line should be # snug against the field name or the space after the header not there. eq(msg.as_string(maxheaderlen=78), """\ @@ -1171,7 +1219,7 @@ '([172.25.1.3]) by zima.siliconimage.com with ' 'Microsoft SMTPSVC(5.0.2195.4905); ' 'Wed, 16 Oct 2002 07:41:11 -0700') - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) eq(msg.as_string(maxheaderlen=78), '''\ Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700 @@ -1471,7 +1519,7 @@ --BOUNDARY-- """ - msg = Parser().parsestr(text) + msg = Parser(policy=self.email5_policy).parsestr(text) self.ndiffAssertEqual(text, msg.as_string()) def test_no_parts_in_a_multipart_with_none_epilogue(self): @@ -1742,7 +1790,7 @@ def test_boundary_with_leading_space(self): eq = self.assertEqual - msg = email.message_from_string('''\ + msg = message_from_string('''\ MIME-Version: 1.0 Content-Type: multipart/mixed; boundary=" XXXX" @@ -1754,13 +1802,13 @@ Content-Type: text/plain -- XXXX-- -''') +''', policy=self.email5_policy) self.assertTrue(msg.is_multipart()) eq(msg.get_boundary(), ' XXXX') eq(len(msg.get_payload()), 2) def test_boundary_without_trailing_newline(self): - m = Parser().parsestr("""\ + m = Parser(policy=self.email5_policy).parsestr("""\ Content-Type: multipart/mixed; boundary="===============0012394164==" MIME-Version: 1.0 @@ -1780,7 +1828,7 @@ def _msgobj(self, filename): with openfile(filename) as fp: - return email.message_from_file(fp, policy=self.policy) + return message_from_file(fp, policy=self.policy) def test_parse_missing_minor_type(self): eq = self.assertEqual @@ -1799,15 +1847,20 @@ unless(isinstance(self.get_defects(inner)[0], errors.StartBoundaryNotFoundDefect)) - def test_multipart_no_boundary(self): + def test_multipart_no_boundary_and_dup_header(self): unless = self.assertTrue msg = self._msgobj('msg_25.txt') unless(isinstance(msg.get_payload(), str)) - self.assertEqual(len(self.get_defects(msg)), 2) + self.assertEqual(len(self.get_defects(msg)), 3) unless(isinstance(self.get_defects(msg)[0], - errors.NoBoundaryInMultipartDefect)) + errors.DuplicateHeaderDefect), + self.get_defects(msg)) unless(isinstance(self.get_defects(msg)[1], - errors.MultipartInvariantViolationDefect)) + errors.NoBoundaryInMultipartDefect), + self.get_defects(msg)) + unless(isinstance(self.get_defects(msg)[2], + errors.MultipartInvariantViolationDefect), + self.get_defects(msg)) multipart_msg = textwrap.dedent("""\ Date: Wed, 14 Nov 2007 12:56:23 GMT @@ -1833,7 +1886,7 @@ """) def test_multipart_invalid_cte(self): - msg = email.message_from_string( + msg = message_from_string( self.multipart_msg.format("\nContent-Transfer-Encoding: base64"), policy = self.policy) self.assertEqual(len(self.get_defects(msg)), 1) @@ -1841,14 +1894,14 @@ errors.InvalidMultipartContentTransferEncodingDefect) def test_multipart_no_cte_no_defect(self): - msg = email.message_from_string( + msg = message_from_string( self.multipart_msg.format(''), policy = self.policy) self.assertEqual(len(self.get_defects(msg)), 0) def test_multipart_valid_cte_no_defect(self): for cte in ('7bit', '8bit', 'BINary'): - msg = email.message_from_string( + msg = message_from_string( self.multipart_msg.format( "\nContent-Transfer-Encoding: {}".format(cte)), policy = self.policy) @@ -1931,7 +1984,7 @@ def test_first_line_is_continuation_header(self): eq = self.assertEqual m = ' Line 1\nLine 2\nLine 3' - msg = email.message_from_string(m, policy=self.policy) + msg = message_from_string(m, policy=self.policy) eq(msg.keys(), []) eq(msg.get_payload(), 'Line 2\nLine 3') eq(len(self.get_defects(msg)), 1) @@ -1942,7 +1995,7 @@ class TestNonConformant(TestNonConformantBase, TestEmailBase): - policy=email.policy.default + policy = policy.default + TestEmailBase.email5_policy def get_defects(self, obj): return obj.defects @@ -1950,13 +2003,13 @@ class TestNonConformantCapture(TestNonConformantBase, TestEmailBase): - class CapturePolicy(email.policy.Policy): + class CapturePolicy(policy.Policy): captured = None def register_defect(self, obj, defect): self.captured.append(defect) def setUp(self): - self.policy = self.CapturePolicy(captured=list()) + self.policy = self.CapturePolicy(captured=list()) + self.email5_policy def get_defects(self, obj): return self.policy.captured @@ -1964,16 +2017,21 @@ class TestRaisingDefects(TestEmailBase): + policy = TestEmailBase.email5_policy + policy.strict + def _msgobj(self, filename): with openfile(filename) as fp: - return email.message_from_file(fp, policy=email.policy.strict) + return message_from_file(fp, policy=self.policy) def test_same_boundary_inner_outer(self): with self.assertRaises(errors.StartBoundaryNotFoundDefect): self._msgobj('msg_15.txt') - def test_multipart_no_boundary(self): - with self.assertRaises(errors.NoBoundaryInMultipartDefect): + #XXX need new test file to test this one, since msg_25 raises sooner now. + #def test_multipart_no_boundary(self): + + def test_duplicate_header(self): + with self.assertRaises(errors.DuplicateHeaderDefect): self._msgobj('msg_25.txt') def test_lying_multipart(self): @@ -1988,7 +2046,7 @@ def test_first_line_is_continuation_header(self): m = ' Line 1\nLine 2\nLine 3' with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect): - msg = email.message_from_string(m, policy=email.policy.strict) + msg = message_from_string(m, policy=policy.strict) # Test RFC 2047 header encoding and decoding @@ -2239,7 +2297,7 @@ def test_default_type(self): eq = self.assertEqual with openfile('msg_30.txt') as fp: - msg = email.message_from_file(fp) + msg = message_from_file(fp, policy=self.email5_policy) container1 = msg.get_payload(0) eq(container1.get_default_type(), 'message/rfc822') eq(container1.get_content_type(), 'message/rfc822') @@ -2256,7 +2314,7 @@ def test_default_type_with_explicit_container_type(self): eq = self.assertEqual with openfile('msg_28.txt') as fp: - msg = email.message_from_file(fp) + msg = message_from_file(fp, policy=self.email5_policy) container1 = msg.get_payload(0) eq(container1.get_default_type(), 'message/rfc822') eq(container1.get_content_type(), 'message/rfc822') @@ -2370,7 +2428,7 @@ def _msgobj(self, filename): with openfile(filename) as fp: data = fp.read() - msg = email.message_from_string(data) + msg = message_from_string(data, policy=self.email5_policy) return msg, data def _idempotent(self, msg, text, unixfrom=False): @@ -2528,7 +2586,7 @@ def test_message_from_string(self): with openfile('msg_01.txt') as fp: text = fp.read() - msg = email.message_from_string(text) + msg = message_from_string(text, policy=self.email5_policy) s = StringIO() # Don't wrap/continue long headers since we're trying to test # idempotency. @@ -2540,7 +2598,7 @@ with openfile('msg_01.txt') as fp: text = fp.read() fp.seek(0) - msg = email.message_from_file(fp) + msg = message_from_file(fp, policy=self.email5_policy) s = StringIO() # Don't wrap/continue long headers since we're trying to test # idempotency. @@ -2557,12 +2615,14 @@ class MyMessage(Message): pass - msg = email.message_from_string(text, MyMessage) + msg = message_from_string(text, MyMessage, + policy=self.email5_policy) unless(isinstance(msg, MyMessage)) # Try something more complicated with openfile('msg_02.txt') as fp: text = fp.read() - msg = email.message_from_string(text, MyMessage) + msg = message_from_string(text, MyMessage, + policy=self.email5_policy) for subpart in msg.walk(): unless(isinstance(subpart, MyMessage)) @@ -2573,11 +2633,13 @@ pass with openfile('msg_01.txt') as fp: - msg = email.message_from_file(fp, MyMessage) + msg = message_from_file(fp, MyMessage, + policy=self.email5_policy) unless(isinstance(msg, MyMessage)) # Try something more complicated with openfile('msg_02.txt') as fp: - msg = email.message_from_file(fp, MyMessage) + msg = message_from_file(fp, MyMessage, + policy=self.email5_policy) for subpart in msg.walk(): unless(isinstance(subpart, MyMessage)) @@ -2699,7 +2761,7 @@ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) def test_quotes_unicode_names(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. utils.formataddr() should be rfc2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -2709,7 +2771,7 @@ latin1_quopri) def test_accepts_any_charset_like_object(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. utils.formataddr() should be rfc2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -2724,7 +2786,7 @@ utf8_base64) def test_invalid_charset_like_object_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. utils.formataddr() should be rfc2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' # A object without a header_encode method: @@ -2733,7 +2795,7 @@ bad_charset) def test_unicode_address_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. utils.formataddr() should be rfc2047 aware. addr = 'pers\u00f6n@dom.in' self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) @@ -2923,7 +2985,7 @@ def test_make_msgid_domain(self): self.assertEqual( - email.utils.make_msgid(domain='testdomain-string')[-19:], + utils.make_msgid(domain='testdomain-string')[-19:], '@testdomain-string>') @@ -2997,8 +3059,7 @@ ("\nf", 1), ("\r\n", 1), ] - from email.feedparser import BufferedSubFile, NeedMoreData - bsf = BufferedSubFile() + bsf = feedparser.BufferedSubFile() om = [] nt = 0 for il, n in imt: @@ -3007,7 +3068,7 @@ n1 = 0 while True: ol = bsf.readline() - if ol == NeedMoreData: + if ol == feedparser.NeedMoreData: break om.append(ol) n1 += 1 @@ -3023,7 +3084,7 @@ eq = self.assertEqual # Parse only the headers of a complex multipart MIME document with openfile('msg_02.txt') as fp: - msg = HeaderParser().parse(fp) + msg = HeaderParser(policy=self.email5_policy).parse(fp) eq(msg['from'], 'ppp-request@zzz.org') eq(msg['to'], 'ppp@zzz.org') eq(msg.get_content_type(), 'multipart/mixed') @@ -3034,7 +3095,8 @@ eq = self.assertEqual # Parse only the headers of a complex multipart MIME document with openfile('msg_02.txt', 'rb') as fp: - msg = email.parser.BytesHeaderParser().parse(fp) + msg = BytesHeaderParser( + policy=self.email5_policy).parse(fp) eq(msg['from'], 'ppp-request@zzz.org') eq(msg['to'], 'ppp@zzz.org') eq(msg.get_content_type(), 'multipart/mixed') @@ -3046,7 +3108,7 @@ eq = self.assertEqual # This message contains a line after the Subject: header that has only # whitespace, but it is not empty! - msg = email.message_from_string("""\ + msg = message_from_string("""\ From: aperson@dom.ain To: bperson@dom.ain Subject: the next line has a space on it @@ -3055,7 +3117,7 @@ Message-ID: spam Here's the message body -""") +""", policy=self.email5_policy) eq(msg['subject'], 'the next line has a space on it\n ') eq(msg['message-id'], 'spam') eq(msg.get_payload(), "Here's the message body\n") @@ -3064,7 +3126,7 @@ eq = self.assertEqual # Like the previous test, but the subject line is the last # header. - msg = email.message_from_string("""\ + msg = message_from_string("""\ From: aperson@dom.ain To: bperson@dom.ain Date: Mon, 8 Apr 2002 15:09:19 -0400 @@ -3073,7 +3135,7 @@ \x20 Here's the message body -""") +""", policy=self.email5_policy) eq(msg['subject'], 'the next line has a space on it\n ') eq(msg['message-id'], 'spam') eq(msg.get_payload(), "Here's the message body\n") @@ -3081,7 +3143,7 @@ def test_crlf_separation(self): eq = self.assertEqual with openfile('msg_26.txt', newline='\n') as fp: - msg = Parser().parse(fp) + msg = Parser(policy=self.email5_policy).parse(fp) eq(len(msg.get_payload()), 2) part1 = msg.get_payload(0) eq(part1.get_content_type(), 'text/plain') @@ -3093,7 +3155,7 @@ # Using newline='\n' preserves the crlfs in this input file. with openfile('msg_26.txt', newline='\n') as fp: text = fp.read() - msg = email.message_from_string(text) + msg = message_from_string(text, policy=self.email5_policy) s = StringIO() g = Generator(s) g.flatten(msg, linesep='\r\n') @@ -3102,9 +3164,9 @@ def test_crlf_control_via_policy(self): with openfile('msg_26.txt', newline='\n') as fp: text = fp.read() - msg = email.message_from_string(text) + msg = message_from_string(text, policy=self.email5_policy) s = StringIO() - g = email.generator.Generator(s, policy=email.policy.SMTP) + g = generator.Generator(s, policy=policy.SMTP) g.flatten(msg) self.assertEqual(s.getvalue(), text) @@ -3112,9 +3174,9 @@ # msg_27 is lf separated with openfile('msg_27.txt', newline='\n') as fp: text = fp.read() - msg = email.message_from_string(text) + msg = message_from_string(text, policy=self.email5_policy) s = StringIO() - g = email.generator.Generator(s, policy=email.policy.SMTP) + g = generator.Generator(s, policy=policy.SMTP) g.flatten(msg, linesep='\n') self.assertEqual(s.getvalue(), text) @@ -3124,7 +3186,7 @@ eq = self.assertEqual neq = self.ndiffAssertEqual with openfile('msg_28.txt') as fp: - msg = email.message_from_file(fp) + msg = message_from_file(fp, policy=self.email5_policy) # Structure is: # multipart/digest # message/rfc822 @@ -3156,7 +3218,8 @@ lines = ['From: Andrew Person From', 'From']) eq(msg.get_payload(), 'body') @@ -3181,13 +3244,13 @@ def test_rfc2822_space_not_allowed_in_header(self): eq = self.assertEqual m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) eq(len(msg.keys()), 0) def test_rfc2822_one_character_header(self): eq = self.assertEqual m = 'A: first header\nB: second header\nCC: third header\n\nbody' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) headers = msg.keys() headers.sort() eq(headers, ['A', 'B', 'CC']) @@ -3209,11 +3272,11 @@ "\n" "--BOUNDARY--\n" ) - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n')) -class Test8BitBytesHandling(unittest.TestCase): +class Test8BitBytesHandling(TestEmailBase): # In Python3 all input is string, but that doesn't work if the actual input # uses an 8bit transfer encoding. To hack around that, in email 5.1 we # decode byte streams using the surrogateescape error handler, and @@ -3236,7 +3299,7 @@ m = self.bodytest_msg.format(charset='utf-8', cte='8bit', bodyline='pöstal').encode('utf-8') - msg = email.message_from_bytes(m) + msg = message_from_bytes(m, policy=self.email5_policy) self.assertEqual(msg.get_payload(), "pöstal\n") self.assertEqual(msg.get_payload(decode=True), "pöstal\n".encode('utf-8')) @@ -3245,7 +3308,7 @@ m = self.bodytest_msg.format(charset='notavalidcharset', cte='8bit', bodyline='pöstal').encode('utf-8') - msg = email.message_from_bytes(m) + msg = message_from_bytes(m, policy=self.email5_policy) self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n") self.assertEqual(msg.get_payload(decode=True), "pöstal\n".encode('utf-8')) @@ -3263,7 +3326,7 @@ m = self.bodytest_msg.format(charset='utf-8', cte='quoted-printable', bodyline='p=C3=B6stál').encode('utf-8') - msg = email.message_from_bytes(m) + msg = message_from_bytes(m, policy=self.email5_policy) self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n') self.assertEqual(msg.get_payload(decode=True), 'pöstál\n'.encode('utf-8')) @@ -3278,7 +3341,7 @@ m = self.bodytest_msg.format(charset='ascii', cte='quoted-printable', bodyline='p=C3=B6stál').encode('utf-8') - msg = email.message_from_bytes(m) + msg = message_from_bytes(m, policy=self.email5_policy) self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n') self.assertEqual(msg.get_payload(decode=True), 'pöstál\n'.encode('utf-8')) @@ -3289,7 +3352,7 @@ m = self.bodytest_msg.format(charset='utf-8', cte='base64', bodyline='cMO2c3RhbAá=').encode('utf-8') - msg = email.message_from_bytes(m) + msg = message_from_bytes(m, policy=self.email5_policy) self.assertEqual(msg.get_payload(decode=True), 'cMO2c3RhbAá=\n'.encode('utf-8')) @@ -3299,7 +3362,7 @@ m = self.bodytest_msg.format(charset='utf-8', cte='uuencode', bodyline='<,.V7bit conversion. @@ -3523,7 +3599,7 @@ self.latin_bin_msg.decode('latin-1')+'\n') def test_bytes_feedparser(self): - bfp = email.feedparser.BytesFeedParser() + bfp = feedparser.BytesFeedParser(policy=self.email5_policy) for i in range(0, len(self.latin_bin_msg), 10): bfp.feed(self.latin_bin_msg[i:i+10]) m = bfp.close() @@ -3532,9 +3608,10 @@ def test_crlf_flatten(self): with openfile('msg_26.txt', 'rb') as fp: text = fp.read() - msg = email.message_from_bytes(text) + msg = message_from_bytes(text, + policy=self.email5_policy) s = BytesIO() - g = email.generator.BytesGenerator(s) + g = generator.BytesGenerator(s) g.flatten(msg, linesep='\r\n') self.assertEqual(s.getvalue(), text) @@ -3578,9 +3655,9 @@ --b1_76a486bee62b0d200f33dc2ca08220ad-- """).encode('utf-8') - msg = email.message_from_bytes(source) + msg = message_from_bytes(source, policy=self.email5_policy) s = BytesIO() - g = email.generator.BytesGenerator(s) + g = generator.BytesGenerator(s) g.flatten(msg) self.assertEqual(s.getvalue(), source) @@ -3588,9 +3665,9 @@ # msg_26 is crlf terminated with openfile('msg_26.txt', 'rb') as fp: text = fp.read() - msg = email.message_from_bytes(text) + msg = message_from_bytes(text, policy=self.email5_policy) s = BytesIO() - g = email.generator.BytesGenerator(s, policy=email.policy.SMTP) + g = generator.BytesGenerator(s, policy=policy.SMTP) g.flatten(msg) self.assertEqual(s.getvalue(), text) @@ -3598,26 +3675,28 @@ # msg_27 is lf separated with openfile('msg_27.txt', 'rb') as fp: text = fp.read() - msg = email.message_from_bytes(text) + msg = message_from_bytes(text, policy=self.email5_policy) s = BytesIO() - g = email.generator.BytesGenerator(s, policy=email.policy.SMTP) + g = generator.BytesGenerator(s, policy=policy.SMTP) g.flatten(msg, linesep='\n') self.assertEqual(s.getvalue(), text) def test_must_be_7bit_handles_unknown_8bit(self): - msg = email.message_from_bytes(self.non_latin_bin_msg) + msg = message_from_bytes(self.non_latin_bin_msg, + policy=self.email5_policy) out = BytesIO() - g = email.generator.BytesGenerator(out, - policy=email.policy.default.clone(must_be_7bit=True)) + g = generator.BytesGenerator(out, + policy=policy.default.clone(must_be_7bit=True)) g.flatten(msg) self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped.encode('ascii')) def test_must_be_7bit_transforms_8bit_cte(self): - msg = email.message_from_bytes(self.latin_bin_msg) + msg = message_from_bytes(self.latin_bin_msg, + policy=self.email5_policy) out = BytesIO() - g = email.generator.BytesGenerator(out, - policy=email.policy.default.clone(must_be_7bit=True)) + g = generator.BytesGenerator(out, + policy=policy.default.clone(must_be_7bit=True)) g.flatten(msg) self.assertEqual(out.getvalue(), self.latin_bin_msg_as7bit.encode('ascii')) @@ -3633,12 +3712,12 @@ with openfile(filename, 'rb') as fp: data = fp.read() data = self.normalize_linesep_regex.sub(self.blinesep, data) - msg = email.message_from_bytes(data) + msg = message_from_bytes(data, policy=self.email5_policy) return msg, data def _idempotent(self, msg, data, unixfrom=False): b = BytesIO() - g = email.generator.BytesGenerator(b, maxheaderlen=0) + g = generator.BytesGenerator(b, maxheaderlen=0) g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep) self.assertEqual(data, b.getvalue()) @@ -4370,29 +4449,29 @@ def test_escaped_8bit_header(self): x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' e = x.decode('ascii', 'surrogateescape') - h = Header(e, charset=email.charset.UNKNOWN8BIT) + h = Header(e, charset=charset.UNKNOWN8BIT) self.assertEqual(str(h), 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) + self.assertEqual(header.decode_header(h), [(x, 'unknown-8bit')]) def test_header_handles_binary_unknown8bit(self): x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - h = Header(x, charset=email.charset.UNKNOWN8BIT) + h = Header(x, charset=charset.UNKNOWN8BIT) self.assertEqual(str(h), 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')]) + self.assertEqual(header.decode_header(h), [(x, 'unknown-8bit')]) def test_make_header_handles_binary_unknown8bit(self): x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big' - h = Header(x, charset=email.charset.UNKNOWN8BIT) - h2 = email.header.make_header(email.header.decode_header(h)) + h = Header(x, charset=charset.UNKNOWN8BIT) + h2 = header.make_header(header.decode_header(h)) self.assertEqual(str(h2), 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big') - self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')]) + self.assertEqual(header.decode_header(h2), [(x, 'unknown-8bit')]) def test_modify_returned_list_does_not_change_header(self): h = Header('test') - chunks = email.header.decode_header(h) + chunks = header.decode_header(h) chunks.append(('ascii', 'test2')) self.assertEqual(str(h), 'test') @@ -4428,7 +4507,8 @@ # Issue 11401 (regression from email 4.x) Note that the space after # the header doesn't reflect the input, but this is also the way # email 4.x behaved. At some point it would be nice to fix that. - msg = email.message_from_string("EmptyHeader:") + msg = message_from_string("EmptyHeader:", + policy=self.email5_policy) self.assertEqual(str(msg), "EmptyHeader: \n\n") def test_encode_preserves_leading_ws_on_value(self): @@ -4538,7 +4618,7 @@ \tfilename*2="is it not.pdf" ''') - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') self.assertEqual(m, msg.as_string()) @@ -4551,7 +4631,7 @@ \tfilename*2="is it not.pdf" ''') - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') self.assertEqual(m, msg.as_string()) @@ -4563,7 +4643,7 @@ Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) param = msg.get_param('NAME') self.assertFalse(isinstance(param, tuple)) self.assertEqual( @@ -4578,7 +4658,7 @@ \tfilename*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') @@ -4590,7 +4670,7 @@ \tfilename*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') @@ -4602,7 +4682,7 @@ \tfilename*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual( msg.get_filename(), 'This%20is%20even%20more%20***fun*** is it not.pdf') @@ -4615,7 +4695,7 @@ \tfilename*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual( msg.get_filename(), 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf') @@ -4628,7 +4708,7 @@ \tboundary*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_boundary(), 'This is even more ***fun*** is it not.pdf') @@ -4641,7 +4721,7 @@ \tcharset*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_content_charset(), 'this is even more ***fun*** is it not.pdf') @@ -4653,7 +4733,7 @@ \tfilename*2="is it not.pdf" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf') @@ -4662,7 +4742,7 @@ Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) # This should return None because non-ascii characters in the charset # are not allowed. self.assertEqual(msg.get_content_charset(), None) @@ -4672,7 +4752,7 @@ Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) # This should return None because non-ascii characters in the charset # are not allowed. self.assertEqual(msg.get_content_charset(), None) @@ -4685,7 +4765,7 @@ \tfilename*2*="is it not.pdf%E2" ''' - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'This is even more ***fun*** is it not.pdf\ufffd') @@ -4695,7 +4775,7 @@ Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) self.assertEqual(msg.get_filename(), 'myfile.txt') def test_rfc2231_single_tick_in_filename_extended(self): @@ -4705,7 +4785,7 @@ \tname*0*=\"Frank's\"; name*1*=\" Document\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) charset, language, s = msg.get_param('name') eq(charset, None) eq(language, None) @@ -4716,7 +4796,7 @@ Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) param = msg.get_param('name') self.assertFalse(isinstance(param, tuple)) self.assertEqual(param, "Frank's Document") @@ -4728,7 +4808,7 @@ \tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) charset, language, s = msg.get_param('name') eq(charset, 'us-ascii') eq(language, 'en-us') @@ -4740,7 +4820,7 @@ \tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) param = msg.get_param('name') self.assertFalse(isinstance(param, tuple)) self.assertEqual(param, "us-ascii'en-us'Frank's Document") @@ -4751,7 +4831,7 @@ Content-Type: application/x-foo; name=\"Frank's Document\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) eq(msg.get_param('name'), "Frank's Document") def test_rfc2231_encoded_then_unencoded_segments(self): @@ -4763,7 +4843,7 @@ \tname*2*=\" For You\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) charset, language, s = msg.get_param('name') eq(charset, 'us-ascii') eq(language, 'en-us') @@ -4778,7 +4858,7 @@ \tname*2*=\" For You\" """ - msg = email.message_from_string(m) + msg = message_from_string(m, policy=self.email5_policy) charset, language, s = msg.get_param('name') eq(charset, 'us-ascii') eq(language, 'en-us') @@ -4794,7 +4874,8 @@ def _msg_and_obj(self, filename): with openfile(filename) as fp: original = fp.read() - msg = email.message_from_string(original) + msg = message_from_string(original, + policy=self.email5_policy) return original, msg def _signed_parts_eq(self, original, result): diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_generator.py --- a/Lib/test/test_email/test_generator.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/test/test_email/test_generator.py Tue Jul 19 13:24:56 2011 -0400 @@ -4,12 +4,18 @@ from email import message_from_string, message_from_bytes from email.generator import Generator, BytesGenerator from email import policy +from email import message from test.test_email import TestEmailBase # XXX: move generator tests from test_email into here at some point. +# XXX: also need to test both old_policy_defaults and future_policy_defaults. -class TestGeneratorBase(): +# +# ASCII only in source +# + +class TestGeneratorASCIIBase(): long_subject = { 0: textwrap.dedent("""\ @@ -58,7 +64,8 @@ long_subject[100] = long_subject[0] def maxheaderlen_parameter_test(self, n): - msg = self.msgmaker(self.long_subject[0]) + msg = self.msgmaker(self.long_subject[0], + policy=self.policy) s = self.ioclass() g = self.genclass(s, maxheaderlen=n) g.flatten(msg) @@ -77,7 +84,8 @@ self.maxheaderlen_parameter_test(20) def maxheaderlen_policy_test(self, n): - msg = self.msgmaker(self.long_subject[0]) + msg = self.msgmaker(self.long_subject[0], + policy=self.policy) s = self.ioclass() g = self.genclass(s, policy=policy.default.clone(max_line_length=n)) g.flatten(msg) @@ -96,7 +104,8 @@ self.maxheaderlen_policy_test(20) def maxheaderlen_parm_overrides_policy_test(self, n): - msg = self.msgmaker(self.long_subject[0]) + msg = self.msgmaker(self.long_subject[0], + policy=self.email5_policy) s = self.ioclass() g = self.genclass(s, maxheaderlen=n, policy=policy.default.clone(max_line_length=10)) @@ -116,20 +125,99 @@ self.maxheaderlen_parm_overrides_policy_test(20) -class TestGenerator(TestGeneratorBase, TestEmailBase): +class TestGeneratorASCII(TestGeneratorASCIIBase, TestEmailBase): msgmaker = staticmethod(message_from_string) genclass = Generator ioclass = io.StringIO + policy = TestEmailBase.email5_policy -class TestBytesGenerator(TestGeneratorBase, TestEmailBase): +class TestGeneratorASCIIEmail6Policy(TestGeneratorASCII): + + policy = TestEmailBase.email6_policy + + +class TestBytesGeneratorASCII(TestGeneratorASCIIBase, TestEmailBase): msgmaker = staticmethod(message_from_bytes) genclass = BytesGenerator ioclass = io.BytesIO + policy = TestEmailBase.email5_policy + long_subject = {key: x.encode('ascii') - for key, x in TestGeneratorBase.long_subject.items()} + for key, x in TestGeneratorASCIIBase.long_subject.items()} + + +class TestBytesGeneratorASCIIEmail6Policy(TestBytesGeneratorASCII): + + policy = TestEmailBase.email6_policy + + +# +# Non ASCII in source/model +# + +class TestGeneratorNonASCIIBase: + + unicode_subject = textwrap.dedent("""\ + To: whom_it_may_concern@example.com + From: nobody_you_want_to_know@example.com + Subject: Mein kleiner grüner Kaktus, Es hat viele Stacheln und + beißt mich oft. + + Aber ich liebe ihn trotzdem. + """) + + def test_flatten_nonascii_model(self): + msg = message.Message(policy=self.policy) + msg['Subject'] = ("Subject: Mein kleiner grüner Kaktus, " + "Es hat viele Stacheln und beißt mich oft.") + self.assertGeneratesEqual(msg, textwrap.dedent("""\ + Subject: =?utf-8?q?Subject=3A_Mein_kleiner_gr=C3=BCner_Kaktus=2C_Es_hat_viel?= + =?utf-8?q?e_Stacheln_und_bei=C3=9Ft_mich_oft=2E?= + + """)) + + +class TestGeneratorNonASCII(TestGeneratorNonASCIIBase): + + def assertGeneratesEqual(self, msg, expected): + s = io.StringIO() + g = Generator(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected) + + +class TestBytesGeneratorNonASCII(TestGeneratorNonASCIIBase): + + def assertGeneratesEqual(self, msg, expected): + s = io.BytesIO() + g = BytesGenerator(s) + g.flatten(msg) + self.assertEqual(s.getvalue(), expected.encode('ascii')) + + +class TestGeneratorNonASCIIEamil5(TestGeneratorNonASCII, TestEmailBase): + + policy = TestEmailBase.email5_policy + + +class TestGeneratorNonASCIIEmail6(TestGeneratorNonASCII, TestEmailBase): + + policy = TestEmailBase.email6_policy + + +class TestBytesGeneratorNonASCIIEamil5(TestBytesGeneratorNonASCII, + TestEmailBase): + + policy = TestEmailBase.email5_policy + + +class TestBytesGeneratorNonASCIIEmail6(TestBytesGeneratorNonASCII, + TestEmailBase): + + policy = TestEmailBase.email6_policy if __name__ == '__main__': diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_header.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test_header.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,323 @@ +import datetime +import unittest +from email import header +from email import errors +from test.test_email import TestEmailBase + + +class TestBaseHeaderFeatures(TestEmailBase): + + def test_str(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertIsInstance(h, str) + self.assertEqual(h, 'this is a test') + self.assertEqual(str(h), 'this is a test') + + def test_substr(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertEqual(h[5:7], 'is') + + def test_has_name(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertEqual(h.name, 'subject') + + def test_source(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertIsNone(h.source) + h = header.HeaderFactory()('subject', 'this is a test', 'unfolded value') + self.assertEqual(h.source, 'this is a test') + + def test_value(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertEqual(h.value, 'this is a test') + h = header.HeaderFactory()('subject', 'this is a test', 'unfolded value', + use_decoded=True) + self.assertEqual(h.value, 'unfolded value') + + def _test_attr_ro(self, attr): + h = header.HeaderFactory()('subject', 'this is a test') + with self.assertRaises(AttributeError): + setattr(h, attr, 'foo') + + def test_name_read_only(self): + self._test_attr_ro('name') + + def test_source_read_only(self): + self._test_attr_ro('source') + + def test_value_read_only(self): + self._test_attr_ro('value') + + def test_defects_read_only(self): + self._test_attr_ro('defects') + + def test_defects_is_tuple(self): + h = header.HeaderFactory()('subject', 'this is a test') + self.assertEqual(len(h.defects), 0) + self.assertIsInstance(h.defects, tuple) + # Make sure it is still true when there are defects. + h = header.HeaderFactory()('date', '') + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects, tuple) + + +class TestBaseHeaderCompatHackBase: + + # XXX: these should turn into errors in 3.4. + + def test_folded_value_alone_auto_decoded_with_warning(self): + value = self.NL.join(['this is', ' a test']) + with self.assertWarnsRegex(DeprecationWarning, "linesep"): + h = header.HeaderFactory()('subject', value) + self.assertEqual(h, value) + self.assertEqual(h.value, 'this is a test') + self.assertIsNone(h.source) + + def test_RFC2047_value_alone_auto_decoded_with_warning(self): + value = '=?utf-8?q?this_is_a_test?=' + with self.assertWarnsRegex(DeprecationWarning, "encoded word"): + h = header.HeaderFactory()('subject', value) + self.assertEqual(h, value) + self.assertEqual(h.value, 'this is a test') + self.assertIsNone(h.source) + +class TestBaseHeaderCompatHackLF(TestBaseHeaderCompatHackBase, TestEmailBase): + NL = '\n' + +class TestBaseHeaderCompatHackCRLF(TestBaseHeaderCompatHackBase, TestEmailBase): + NL = '\r\n' + + +class TestDateHeader(TestEmailBase): + + datestring = 'Sun, 23 Sep 2001 20:10:55 -0700' + utcoffset = datetime.timedelta(hours=-7) + tz = datetime.timezone(utcoffset) + dt = datetime.datetime(2001, 9, 23, 20, 10, 55, tzinfo=tz) + + def test_parse_date(self): + h = header.HeaderFactory()('date', self.datestring) + self.assertEqual(h, self.datestring) + self.assertEqual(h.datetime, self.dt) + self.assertEqual(h.datetime.utcoffset(), self.utcoffset) + self.assertEqual(h.defects, ()) + + def test_set_from_datetime(self): + h = header.HeaderFactory()('date', self.dt) + self.assertEqual(h, self.datestring) + self.assertEqual(h.datetime, self.dt) + self.assertEqual(h.defects, ()) + + def test_date_header_properties(self): + h = header.HeaderFactory()('date', self.datestring) + self.assertIsInstance(h, header.UniqueDateHeader) + self.assertEqual(h.max_count, 1) + self.assertEqual(h.defects, ()) + + def test_resent_date_header_properties(self): + h = header.HeaderFactory()('resent-date', self.datestring) + self.assertIsInstance(h, header.DateHeader) + self.assertEqual(h.max_count, None) + self.assertEqual(h.defects, ()) + + def test_no_value_is_defect(self): + h = header.HeaderFactory()('date', '') + self.assertEqual(len(h.defects), 1) + self.assertIsInstance(h.defects[0], errors.HeaderMissingRequiredValue) + + def test_datetime_read_only(self): + h = header.HeaderFactory()('date', self.datestring) + with self.assertRaises(AttributeError): + h.datetime = 'foo' + + +class TestAddressHeader(TestEmailBase): + + def test_address_read_only(self): + h = header.HeaderFactory()('sender', 'abc@xyz.com') + with self.assertRaises(AttributeError): + h.address = 'foo' + + def test_addresses_read_only(self): + h = header.HeaderFactory()('sender', 'abc@xyz.com') + self.assertIsInstance(h.groups, tuple) + with self.assertRaises(AttributeError): + h.addresses = 'foo' + + def test_groups_read_only(self): + h = header.HeaderFactory()('sender', 'abc@xyz.com') + self.assertIsInstance(h.addresses, tuple) + with self.assertRaises(AttributeError): + h.groups = 'foo' + + def _test_single_addr(self, source, unfolded, decoded, defects, reformatted, + name, addr_spec, username, domain, comment): + h = header.HeaderFactory()('sender', source, unfolded, + use_decoded=True) + self.assertEqual(h, decoded) + self.assertEqual(h.source, source) + self.assertEqual(h.value, decoded) + self.assertDefectsEqual(h.defects, defects) + a = h.address + self.assertEqual([a], list(h.groups)) + self.assertEqual([a], list(h.addresses)) + self.assertEqual(a.reformatted, reformatted) + self.assertEqual(a.name, name) + self.assertEqual(a.addr_spec, addr_spec) + self.assertEqual(a.username, username) + self.assertEqual(a.domain, domain) + #self.assertEqual(a.comment, comment) + + examples = { + + 'empty': + ('<>', + '<>', + '<>', + [errors.InvalidHeaderDefect], + '', + '', + '', + '', + '', + None), + + 'address_only': + ('zippy@pinhead.com', + 'zippy@pinhead.com', + 'zippy@pinhead.com', + [], + 'zippy@pinhead.com', + '', + 'zippy@pinhead.com', + 'zippy', + 'pinhead.com', + None), + + 'name_and_address': + ('Zaphrod Beblebrux ', + 'Zaphrod Beblebrux ', + 'Zaphrod Beblebrux ', + [], + 'Zaphrod Beblebrux ', + 'Zaphrod Beblebrux', + 'zippy@pinhead.com', + 'zippy', + 'pinhead.com', + None), + + 'quoted_local_part': + ('Zaphrod Beblebrux <"foo bar"@pinhead.com>', + 'Zaphrod Beblebrux <"foo bar"@pinhead.com>', + 'Zaphrod Beblebrux <"foo bar"@pinhead.com>', + [], + 'Zaphrod Beblebrux <"foo bar"@pinhead.com>', + 'Zaphrod Beblebrux', + '"foo bar"@pinhead.com', + 'foo bar', + 'pinhead.com', + None), + + # The decoded differs from what formataddr produces: formataddr produces + # the ()s as quoted pairs. By RFC there is no need to quote ()s inside + # a quoted string, and minimal use of qp is encouraged. + 'quoted_parens_in_name': + (r'"A \(Special\) Person" ', + r'"A \(Special\) Person" ', + '"A (Special) Person" ', + [], + '"A (Special) Person" ', + 'A (Special) Person', + 'person@dom.ain', + 'person', + 'dom.ain', + None), + + 'quoted_backslashes_in_name': + (r'"Arthur \\Backslash\\ Foobar" ', + r'"Arthur \\Backslash\\ Foobar" ', + r'"Arthur \\Backslash\\ Foobar" ', + [], + r'"Arthur \\Backslash\\ Foobar" ', + r'Arthur \Backslash\ Foobar', + 'person@dom.ain', + 'person', + 'dom.ain', + None), + + 'name_with_dot': + ('John X. Doe ', + 'John X. Doe ', + 'John X. Doe ', + [errors.ObsoleteHeaderDefect], + '"John X. Doe" ', + 'John X. Doe', + 'jxd@example.com', + 'jxd', + 'example.com', + None), + + 'quoted_strings_in_local_part': + ('""example" example"@example.com', + '""example" example"@example.com', + '""example" example"@example.com', + [errors.InvalidHeaderDefect]*3, + '"example example"@example.com', + '', + '"example example"@example.com', + 'example example', + 'example.com', + None), + + 'escaped_quoted_strings_in_local_part': + (r'"\"example\" example"@example.com', + r'"\"example\" example"@example.com', + r'"\"example\" example"@example.com', + [], + r'"\"example\" example"@example.com', + '', + r'"\"example\" example"@example.com', + r'"example" example', + 'example.com', + None), + + 'escaped_escapes_in_local_part': + (r'"\\"example\\" example"@example.com', + r'"\\"example\\" example"@example.com', + r'"\\"example\\" example"@example.com', + [errors.InvalidHeaderDefect]*5, + r'"\\example\\\\ example"@example.com', + '', + r'"\\example\\\\ example"@example.com', + r'\example\\ example', + 'example.com', + None), + + } + + for name in examples: + locals()['test_'+name] = ( + lambda self, name=name: + self._test_single_addr(*self.examples[name])) + + # XXX: a quick and dirty address list test, more later. + def test_simple_address_list(self): + value = ('Fred , foo@example.com, ' + '"Harry W. Hastings" ') + h = header.HeaderFactory()('to', value, value) + self.assertEqual(h, value) + self.assertEqual(h.value, value) + self.assertEqual(h.source, value) + self.assertEqual(len(h.groups), 3) + self.assertEqual(len(h.addresses), 3) + self.assertEqual(h.groups[0], 'Fred ') + self.assertEqual(h.groups[1], 'foo@example.com') + self.assertEqual(h.groups[2], + '"Harry W. Hastings" ') + self.assertEqual(h.addresses[2].name, + 'Harry W. Hastings') + + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_header_factory.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test_header_factory.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,65 @@ +import unittest +from email import header +from test.test_email import TestEmailBase + + +class TestHeaderFactory(TestEmailBase): + + def test_arbitrary_name_unstructured(self): + factory = header.HeaderFactory() + h = factory('foobar', 'test', 'test') + self.assertIsInstance(h, header.BaseHeader) + self.assertIsInstance(h, header.UnstructuredHeader) + + def test_name_case_ignored(self): + factory = header.HeaderFactory() + # Whitebox check that test is valid + self.assertNotIn('Subject', factory.registry) + h = factory('Subject', 'test', 'test') + self.assertIsInstance(h, header.BaseHeader) + self.assertIsInstance(h, header.UniqueUnstructuredHeader) + + class FooBase: + def __init__(self, *args, **kw): + pass + + def test_override_default_base_class(self): + factory = header.HeaderFactory(base_class=self.FooBase) + h = factory('foobar', 'test', 'test') + self.assertIsInstance(h, self.FooBase) + self.assertIsInstance(h, header.UnstructuredHeader) + + class FooDefault: + parse = header.UnstructuredHeader.parse + + def test_override_default_class(self): + factory = header.HeaderFactory(default_class=self.FooDefault) + h = factory('foobar', 'test', 'test') + self.assertIsInstance(h, header.BaseHeader) + self.assertIsInstance(h, self.FooDefault) + + def test_override_default_class_only_overrides_default(self): + factory = header.HeaderFactory(default_class=self.FooDefault) + h = factory('subject', 'test', 'test') + self.assertIsInstance(h, header.BaseHeader) + self.assertIsInstance(h, header.UniqueUnstructuredHeader) + + def test_dont_use_default_map(self): + factory = header.HeaderFactory(use_default_map=False) + h = factory('subject', 'test', 'test') + self.assertIsInstance(h, header.BaseHeader) + self.assertIsInstance(h, header.UnstructuredHeader) + + def test_map_to_type(self): + factory = header.HeaderFactory() + h1 = factory('foobar', 'test', 'test') + factory.map_to_type('foobar', header.UniqueUnstructuredHeader) + h2 = factory('foobar', 'test', 'test') + self.assertIsInstance(h1, header.BaseHeader) + self.assertIsInstance(h1, header.UnstructuredHeader) + self.assertIsInstance(h2, header.BaseHeader) + self.assertIsInstance(h2, header.UniqueUnstructuredHeader) + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test_parser.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,190 @@ +import sys +import unittest +from test.test_email import TestEmailBase +from email import policy +from email import errors +from email.feedparser import FeedParser, BytesFeedParser + + +class TestAllFeedParserBase: + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + self.sNL = self.NL + self.bNL = self.sNL.encode('ascii') + + def make_input(self, lines): + return self.bNL.join(lines) + + # + # decoded_headers + # + + def decoded_headers_test(self, setting=None, restype=None): + if setting is None: + mypolicy = policy.default + else: + mypolicy = policy.default.clone(decoded_headers=setting) + p = self.parser(policy=mypolicy) + p.feed(self.make_input([ + b"Test: test", + b" =?utf-8?q?test?=", + b"", + b"test"])) + msg = p.close() + expected = { + 'raw': self.sNL.join(["test", " =?utf-8?q?test?="]), + 'decoded': 'test test' + }[restype] + self.assertEqual(msg['Test'], expected) + + def test_decoded_headers_false(self): + self.decoded_headers_test(False, 'raw') + + def test_decoded_headers_true(self): + self.decoded_headers_test(True, 'decoded') + + def test_decode_policy_default(self): + # This will fail with the first 3.4 version until the default + # is switched. Sorry, Georg, but you OKed this kind of thing :) + if sys.hexversion < 0x3040000: + with self.assertWarnsRegex(DeprecationWarning, "decoded_headers"): + self.decoded_headers_test(None, 'raw') + else: + self.decoded_headers_test(None, 'decoded') + + # + # Defect detection + # + + def duplicate_header_msg_testbed(self, headername, *, duplicate): + count = 2 if duplicate else 1 + p = self.parser(policy=self.email6_policy) + p.feed(self.make_input( + [(headername.encode('ascii') + b": test")] * count)) + return p.close() + + def test_single_nonunique_header_nodefect(self): + msg = self.duplicate_header_msg_testbed('foobar', duplicate=False) + self.assertEqual([], msg.defects) + + def test_multiple_nonunique_header_nodefect(self): + msg = self.duplicate_header_msg_testbed('foobar', duplicate=True) + self.assertEqual([], msg.defects) + + def test_single_unique_header_nodefect(self): + msg = self.duplicate_header_msg_testbed('Subject', duplicate=False) + self.assertEqual([], msg.defects) + + def test_multiple_unique_header_defect(self): + msg = self.duplicate_header_msg_testbed('Subject', duplicate=True) + self.assertEqual(len(msg.defects), 1) + self.assertIsInstance(msg.defects[0], errors.DuplicateHeaderDefect) + self.assertEqual(msg.defects[0].header_name, 'subject') + + +class TestFeedParserBase(TestAllFeedParserBase): + + def make_input(self, lines): + return TestAllFeedParserBase.make_input(self, lines).decode('ascii') + +class TestFeedParserLF(TestFeedParserBase, TestEmailBase): + NL = '\n' + parser = FeedParser + +class TestFeedParserCRLF(TestFeedParserBase, TestEmailBase): + NL = '\r\n' + parser = FeedParser + + +class TestBytesFeedParserBase(TestAllFeedParserBase): + + # XXX: check the defects list + # XXX: uncomment 'v.decoded' tests once Header is a BaseHeader. + + def _test_invalid_byte_in_header(self, decode): + p = self.parser(policy=policy.default.clone(decoded_headers=decode)) + p.feed(self.make_input([ + b"Test: a b\xbfd test", + b"", + b"test"])) + msg = p.close() + v = msg['test'] + #self.assertEqual(v.decoded, "a b\uFFFDd test") + if not decode: + v = str(v) + self.assertEqual(v, "a b\uFFFDd test") + + def _test_invalid_byte_in_q_encoded_word(self, decode): + p = self.parser(policy=policy.default.clone(decoded_headers=decode)) + p.feed(self.make_input([ + b"Test: a =?utf-8?q?h\xbfader?=", + b"", + b"test"])) + msg = p.close() + v = msg['test'] + #self.assertEqual(v.decoded, "a h\uFFFDader") + if not decode: + v = str(v) + self.assertEqual(v, "a h\uFFFDader") + + def _test_invalid_byte_in_b_encoded_word(self, decode): + p = self.parser(policy=policy.default.clone(decoded_headers=decode)) + p.feed(self.make_input([ + b"Test: a =?utf-8?b?a\xbaGVhZGVy?=", + b"", + b"test"])) + msg = p.close() + v = msg['test'] + #self.assertEqual(v.decoded, "a header") + if not decode: + v = str(v) + self.assertEqual(v, "a header") + + def _test_invalid_byte_in_both(self, decode): + p = self.parser(policy=policy.default.clone(decoded_headers=decode)) + p.feed(self.make_input([ + b"Test: a b\xbfd =?utf-8?q?h\xbfader?=", + b"", + b"test"])) + msg = p.close() + v = msg['test'] + #self.assertEqual(v.decoded, "a b\uFFFDd h\uFFFDader") + if not decode: + v = str(v) + self.assertEqual(v, "a b\uFFFDd h\uFFFDader") + + def test_invalid_byte_in_header_decode_true(self): + self._test_invalid_byte_in_header(True) + + def test_invalid_byte_in_q_enooded_word_decode_true(self): + self._test_invalid_byte_in_q_encoded_word(True) + + def test_invalid_byte_in_b_enooded_word_decode_true(self): + self._test_invalid_byte_in_b_encoded_word(True) + + def test_invalid_byte_in_both_decode_true(self): + self._test_invalid_byte_in_both(True) + + # XXX: These tests fails because of a bug in email 5.1 that it isn't + # clear how to fix. + def test_invalid_byte_in_header_decode_false(self): + self._test_invalid_byte_in_header(False) + + def XXXtest_invalid_byte_in_q_enooded_word_decode_false(self): + self._test_invalid_byte_in_q_encoded_word(False) + + def XXXtest_invalid_byte_in_b_enooded_word_decode_false(self): + self._test_invalid_byte_in_b_encoded_word(False) + + def XXXtest_invalid_byte_in_both_decode_false(self): + self._test_invalid_byte_in_both(False) + + +class TestBytesFeedParserLF(TestBytesFeedParserBase, TestEmailBase): + NL = '\n' + parser = BytesFeedParser + +class TestBytesFeedParserCRLF(TestBytesFeedParserBase, TestEmailBase): + NL = '\r\n' + parser = BytesFeedParser diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_pickleable.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test_pickleable.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,56 @@ +import unittest +import textwrap +import copy +import pickle +from email import message_from_string +from email import header +from test.test_email import TestEmailBase + +class TestPickleCopyHeader(TestEmailBase): + + unstructured = header.HeaderFactory()('subject', 'this is a test') + + def test_deepcopy_unstructured(self): + h = copy.deepcopy(self.unstructured) + self.assertEqual(str(h), str(self.unstructured)) + + def test_pickle_unstructured(self): + p = pickle.dumps(self.unstructured) + h = pickle.loads(p) + self.assertEqual(str(h), str(self.unstructured)) + + address = header.HeaderFactory()('from', 'frodo@mordor.net') + + def test_deepcopy_address(self): + h = copy.deepcopy(self.address) + self.assertEqual(str(h), str(self.address)) + + def test_pickle_address(self): + p = pickle.dumps(self.address) + h = pickle.loads(p) + self.assertEqual(str(h), str(self.address)) + + +class TestPickleCopyMessage(TestEmailBase): + + testmsg = message_from_string(textwrap.dedent("""\ + From: frodo@mordor.net + To: bilbo@underhill.org + Subject: help + + I think I forgot the ring. + """), policy=TestEmailBase.email6_policy) + + def test_deepcopy(self): + msg2 = copy.deepcopy(self.testmsg) + self.assertEqual(msg2.as_string(), self.testmsg.as_string()) + + def test_pickle(self): + p = pickle.dumps(self.testmsg) + msg2 = pickle.loads(p) + self.assertEqual(msg2.as_string(), self.testmsg.as_string()) + + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_policy.py --- a/Lib/test/test_email/test_policy.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/test/test_email/test_policy.py Tue Jul 19 13:24:56 2011 -0400 @@ -1,43 +1,69 @@ import types import unittest -import email.policy +from email import header +from email import policy as _policy class PolicyAPITests(unittest.TestCase): longMessage = True - # These default values are the ones set on email.policy.default. + # These default values are the ones set on _policy.default. # If any of these defaults change, the docs must be updated. policy_defaults = { 'max_line_length': 78, 'linesep': '\n', 'must_be_7bit': False, 'raise_on_defect': False, + 'decoded_headers': None, + 'header_factory': _policy.Policy.header_factory, } - # For each policy under test, we give here the values of the attributes - # that are different from the defaults for that policy. - policies = { - email.policy.Policy(): {}, - email.policy.default: {}, - email.policy.SMTP: {'linesep': '\r\n'}, - email.policy.HTTP: {'linesep': '\r\n', 'max_line_length': None}, - email.policy.strict: {'raise_on_defect': True}, - } + policies = [ + _policy.Policy(), + _policy.default, + _policy.SMTP, + _policy.HTTP, + _policy.strict, + _policy.email5_defaults, + _policy.email6_defaults, + ] - def test_defaults(self): - for policy, changed_defaults in self.policies.items(): - expected = self.policy_defaults.copy() - expected.update(changed_defaults) - for attr, value in expected.items(): - self.assertEqual(getattr(policy, attr), value, - ("change {} docs/docstrings if defaults have " - "changed").format(policy)) + def settings_test(self, policy, changed_from_default): + expected = self.policy_defaults.copy() + expected.update(changed_from_default) + for attr, value in expected.items(): + self.assertEqual(getattr(policy, attr), value, + ("change docs/docstrings if defaults have changed")) + + def test_new_policy(self): + new_policy = _policy.Policy() + self.settings_test(new_policy, {'header_factory': new_policy.header_factory}) + + def test_default_policy(self): + self.settings_test(_policy.default, {}) + + def test_SMTP_policy(self): + self.settings_test(_policy.SMTP, {'linesep': '\r\n'}) + + def test_HTTP_policy(self): + self.settings_test(_policy.HTTP, {'linesep': '\r\n', + 'max_line_length': None}) + + def test_strict_policy(self): + self.settings_test(_policy.strict, {'raise_on_defect': True}) + + def test_email5_defaults_policy(self): + self.settings_test(_policy.email5_defaults, + {'decoded_headers': False}) + + def test_email6_defaults_policy(self): + self.settings_test(_policy.email6_defaults, + {'decoded_headers': True}) def test_all_attributes_covered(self): - for attr in dir(email.policy.default): + for attr in dir(_policy.default): if (attr.startswith('_') or - isinstance(getattr(email.policy.Policy, attr), + isinstance(getattr(_policy.Policy, attr), types.FunctionType)): continue else: @@ -52,24 +78,24 @@ with self.assertRaisesRegex(AttributeError, 'no attribute.*foo'): policy.foo = None - def test_set_policy_attrs_when_calledl(self): + def test_set_policy_attrs_when_cloned(self): testattrdict = { attr: None for attr in self.policy_defaults } for policyclass in self.policies: policy = policyclass.clone(**testattrdict) for attr in self.policy_defaults: self.assertIsNone(getattr(policy, attr)) - def test_reject_non_policy_keyword_when_called(self): + def test_reject_non_policy_keyword_when_cloned(self): for policyclass in self.policies: with self.assertRaises(TypeError): - policyclass(this_keyword_should_not_be_valid=None) + policyclass.clone(this_keyword_should_not_be_valid=None) with self.assertRaises(TypeError): - policyclass(newtline=None) + policyclass.clone(newtline=None) def test_policy_addition(self): expected = self.policy_defaults.copy() - p1 = email.policy.default.clone(max_line_length=100) - p2 = email.policy.default.clone(max_line_length=50) + p1 = _policy.default.clone(max_line_length=100) + p2 = _policy.default.clone(max_line_length=50) added = p1 + p2 expected.update(max_line_length=50) for attr, value in expected.items(): @@ -78,7 +104,7 @@ expected.update(max_line_length=100) for attr, value in expected.items(): self.assertEqual(getattr(added, attr), value) - added = added + email.policy.default + added = added + _policy.default for attr, value in expected.items(): self.assertEqual(getattr(added, attr), value) @@ -88,7 +114,7 @@ self.defects = [] obj = Dummy() defect = object() - policy = email.policy.Policy() + policy = _policy.Policy() policy.register_defect(obj, defect) self.assertEqual(obj.defects, [defect]) defect2 = object() @@ -106,18 +132,18 @@ foo = self.MyObj() defect = self.MyDefect("the telly is broken") with self.assertRaisesRegex(self.MyDefect, "the telly is broken"): - email.policy.strict.handle_defect(foo, defect) + _policy.strict.handle_defect(foo, defect) def test_handle_defect_registers_defect(self): foo = self.MyObj() defect1 = self.MyDefect("one") - email.policy.default.handle_defect(foo, defect1) + _policy.default.handle_defect(foo, defect1) self.assertEqual(foo.defects, [defect1]) defect2 = self.MyDefect("two") - email.policy.default.handle_defect(foo, defect2) + _policy.default.handle_defect(foo, defect2) self.assertEqual(foo.defects, [defect1, defect2]) - class MyPolicy(email.policy.Policy): + class MyPolicy(_policy.Policy): defects = [] def register_defect(self, obj, defect): self.defects.append(defect) @@ -140,6 +166,72 @@ self.assertEqual(my_policy.defects, [defect1, defect2]) self.assertEqual(foo.defects, []) + def test_default_header_factory(self): + h = _policy.default.header_factory('Test', 'test') + self.assertEqual(h.name, 'Test') + self.assertIsInstance(h, header.UnstructuredHeader) + self.assertIsInstance(h, header.BaseHeader) + + class Foo: + parse = header.UnstructuredHeader.parse + + def test_each_Policy_gets_unique_factory(self): + policy1 = _policy.Policy() + policy2 = _policy.Policy() + policy1.header_factory.map_to_type('foo', self.Foo) + h = policy1.header_factory('foo', 'test') + self.assertIsInstance(h, self.Foo) + self.assertNotIsInstance(h, header.UnstructuredHeader) + h = policy2.header_factory('foo', 'test') + self.assertNotIsInstance(h, self.Foo) + self.assertIsInstance(h, header.UnstructuredHeader) + + def test_clone_copies_factory(self): + policy1 = _policy.Policy() + policy2 = policy1.clone() + policy1.header_factory.map_to_type('foo', self.Foo) + h = policy1.header_factory('foo', 'test') + self.assertIsInstance(h, self.Foo) + h = policy2.header_factory('foo', 'test') + self.assertIsInstance(h, self.Foo) + + def test_new_factory_overrides_default(self): + mypolicy = _policy.Policy() + myfactory = mypolicy.header_factory + newpolicy = mypolicy + _policy.strict + self.assertEqual(newpolicy.header_factory, myfactory) + newpolicy = _policy.strict + mypolicy + self.assertEqual(newpolicy.header_factory, myfactory) + + def test_adding_default_policies_prserves_default_factory(self): + newpolicy = _policy.default + _policy.strict + self.assertEqual(newpolicy.header_factory, + _policy.Policy.header_factory) + self.assertEqual(newpolicy.__dict__, {'raise_on_defect': True}) + + def test_make_header(self): + with self.assertWarnsRegex(DeprecationWarning, 'decoded_headers'): + h = _policy.default.make_header('Test', 'test', 'test') + self.assertIsInstance(h, header.UnstructuredHeader) + self.assertEqual(h.name, 'Test') + self.assertEqual(h, 'test') + + def test_make_header_with_no_unfolded(self): + h = _policy.default.make_header('Test', 'test') + self.assertIsInstance(h, header.UnstructuredHeader) + self.assertEqual(h.name, 'Test') + self.assertEqual(h, 'test') + self.assertIsNone(h.source) + self.assertEqual(h.value, 'test') + + def test_make_header_sets_decoded(self): + mypolicy1 = _policy.default.clone(decoded_headers=False) + h = mypolicy1.make_header('Test', 'test\n test', 'test test') + self.assertEqual(h, 'test\n test') + mypolicy2 = _policy.default.clone(decoded_headers=True) + h = mypolicy2.make_header('Test', 'test\n test', 'test test') + self.assertEqual(h, 'test test') + # XXX: Need subclassing tests. # For adding subclassed objects, make sure the usual rules apply (subclass # wins), but that the order still works (right overrides left). diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/test_utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_email/test_utils.py Tue Jul 19 13:24:56 2011 -0400 @@ -0,0 +1,61 @@ +import datetime +from email import utils +import unittest + +class FormatDateTimeTests(unittest.TestCase): + + datestring = 'Sun, 23 Sep 2001 20:10:55' + dateargs = (2001, 9, 23, 20, 10, 55) + offsetstring = ' -0700' + utcoffset = datetime.timedelta(hours=-7) + tz = datetime.timezone(utcoffset) + naive_dt = datetime.datetime(*dateargs) + aware_dt = datetime.datetime(*dateargs, tzinfo=tz) + + def test_naive_datetime(self): + self.assertEqual(utils.format_datetime(self.naive_dt), + self.datestring + ' -0000') + + def test_aware_datetime(self): + self.assertEqual(utils.format_datetime(self.aware_dt), + self.datestring + self.offsetstring) + + def test_usegmt(self): + utc_dt = datetime.datetime(*self.dateargs, + tzinfo=datetime.timezone.utc) + self.assertEqual(utils.format_datetime(utc_dt, usegmt=True), + self.datestring + ' GMT') + + def test_usegmt_with_naive_datetime_raises(self): + with self.assertRaises(ValueError): + utils.format_datetime(self.naive_dt, usegmt=True) + + def test_usegmt_with_non_utc_datetime_raises(self): + with self.assertRaises(ValueError): + utils.format_datetime(self.aware_dt, usegmt=True) + + +class LocaltimeTests(unittest.TestCase): + + def test_localtime(self): + # Based on Issue 9527 patch + t = utils.localtime() + self.assertIsNot(t.tzinfo, None) + t0 = datetime.datetime(1970, 1, 1, tzinfo = datetime.timezone.utc) + t1 = utils.localtime(t0) + self.assertEqual(t0, t1) + t2 = utils.localtime(t1.replace(tzinfo=None)) + self.assertEqual(t1, t2) + # The following tests use local time that is ambiguous in the + # US, but should work in any location + t0 = datetime.datetime(2010, 11, 7, 1, 30) + t1 = utils.localtime(t0, isdst=0) + t2 = utils.localtime(t1) + self.assertEqual(t1, t2) + t1 = utils.localtime(t0, isdst=1) + t2 = utils.localtime(t1) + self.assertEqual(t1, t2) + + +if __name__ == '__main__': + unittest.main() diff -r 7520f1bf0a81 -r b22698463737 Lib/test/test_email/torture_test.py --- a/Lib/test/test_email/torture_test.py Sun Jul 17 22:50:12 2011 -0500 +++ b/Lib/test/test_email/torture_test.py Tue Jul 19 13:24:56 2011 -0400 @@ -12,10 +12,10 @@ from io import StringIO from types import ListType -from email.test.test_email import TestEmailBase +from email import message_from_file +from test.test_email import TestEmailBase from test.support import TestSkipped, run_unittest -import email from email import __file__ as testfile from email.iterators import _structure @@ -36,7 +36,7 @@ def _msgobj(self, filename): fp = openfile(filename) try: - msg = email.message_from_file(fp) + msg = message_from_file(fp) finally: fp.close() return msg