diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -42,7 +42,6 @@ import re import base64 import binascii -import functools from string import ascii_letters, digits from email import errors @@ -60,13 +59,11 @@ # Quoted Printable # -# regex based decoder. -_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) def decode_q(encoded): encoded = encoded.replace(b'_', b' ') - return _q_byte_subber(encoded), [] + return re.sub(br'=([a-fA-F0-9]{2})', + lambda m: bytes([int(m.group(1), 16)]), encoded), [] # dict mapping bytes to their encoded form diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -31,9 +31,7 @@ NLCRE_bol = re.compile('(\r\n|\r|\n)') NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') NLCRE_crack = re.compile('(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character -# except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') + EMPTYSTRING = '' NL = '\n' @@ -214,7 +212,9 @@ if line is NeedMoreData: yield NeedMoreData continue - if not headerRE.match(line): + # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, + # Any character except controls, SP, and ":". + if not re.match(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])', line): # If we saw the RFC defined header/body separator # (i.e. newline), just throw it away. Otherwise the line is # part of the body so push it back. diff --git a/Lib/email/header.py b/Lib/email/header.py --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -34,23 +34,14 @@ # Match encoded-word strings in the form =?charset?q?Hello_World?= ecre = re.compile(r''' =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset + (?P[^?]*) # up to the next ? is the charset \? # literal ? (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the encoded string + (?P[^?]*) # up to the next ?= is the encoded string \?= # literal ?= ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) -# Field name regexp, including trailing colon, but not separating whitespace, -# according to RFC 2822. Character range is from tilde to exclamation mark. -# For use with .match() -fcre = re.compile(r'[\041-\176]+:$') - -# Find a header embedded in a putative header value. Used to check for -# header injection attack. -_embeded_header = re.compile(r'\n[^ \t]+:') - # Helpers @@ -390,7 +381,9 @@ if self._chunks: formatter.add_transition() value = formatter._str(linesep) - if _embeded_header.search(value): + # Find a header embedded in a putative header value. + # Used to check for header injection attack. + if re.search(r'\n[^ \t]+:', value): raise HeaderParseError("header value appears to contain " "an embedded header: {!r}".format(value)) return value diff --git a/Lib/email/message.py b/Lib/email/message.py --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -22,10 +22,6 @@ SEMISPACE = '; ' -# Regular expression that matches `special' characters in parameters, the -# existence of which force quoting of the parameter value. -tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - def _splitparam(param): # Split header parameters. BAW: this may be too simple. It isn't @@ -64,7 +60,7 @@ return '%s=%s' % (param, value) # BAW: Please check this. I think that if quote is set it should # force quoting even if not necessary. - if quote or tspecials.search(value): + if quote or re.search(r'[ \(\)<>@,;:\\"/\[\]\?=]', value): return '%s="%s"' % (param, utils.quote(value)) else: return '%s=%s' % (param, value) diff --git a/Lib/email/utils.py b/Lib/email/utils.py --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -54,9 +54,6 @@ CRLF = '\r\n' TICK = "'" -specialsre = re.compile(r'[][\\()<>@,:;".]') -escapesre = re.compile(r'[\\"]') - # How to figure out if we are processing strings that come from a byte # source with undecodable characters. _has_surrogates = re.compile( @@ -98,9 +95,9 @@ return "%s <%s>" % (encoded_name, address) else: quotes = '' - if specialsre.search(name): + if re.search(r'[][\\()<>@,:;".]', name): quotes = '"' - name = escapesre.sub(r'\\\g<0>', name) + name = re.sub(r'[\\"]', r'\\\g<0>', name) return '%s%s%s <%s>' % (quotes, name, quotes, address) return address @@ -114,17 +111,6 @@ -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) - - def _format_timetuple_and_zone(timetuple, zone): return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], @@ -290,9 +276,6 @@ return "%s'%s'%s" % (charset, language, s) -rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$', - re.ASCII) - def decode_params(params): """Decode parameters list according to RFC 2231. @@ -314,7 +297,8 @@ else: encoded = False value = unquote(value) - mo = rfc2231_continuation.match(name) + # match rfc2231 continuation + mo = re.match(r'^(?P\w+)\*((?P[0-9]+)\*?)?$', name, re.ASCII) if mo: name, num = mo.group('name', 'num') if num is not None: