--- Lib/email/oheader.py 2009-04-02 16:14:14.000000000 -0400 +++ Lib/email/header.py 2009-04-03 21:47:00.000000000 -0400 @@ -37,10 +37,10 @@ \? # literal ? (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the encoded string + (?P[!->@-~]*?) #GAN 03Apr09 RFC2047 2 explicitly exclude space and controls + # non-greedy up to the next ?= is the encoded string \?= # literal ?= - (?=[ \t]|$) # whitespace or the end of the string - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.IGNORECASE ) #GAN 03Apr09 RFC2047 6.1, 6.2 Encoded-words not always delimited by WS. # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. --- Lib/email/test/otest_email.py 2008-01-19 07:32:27.000000000 -0500 +++ Lib/email/test/test_email.py 2009-04-03 21:53:51.000000000 -0400 @@ -1542,7 +1542,10 @@ def test_rfc2047_without_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) - self.assertEqual(dh, [(s, None)]) + self.assertEqual(dh, [ + ('Sm', None), ('\xf6', 'iso-8859-1'), ('rg', None), + ('\xe5', 'iso-8859-1'), ('sbord', None) + ]) def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' @@ -2950,7 +3018,7 @@ def test_broken_base64_header(self): raises = self.assertRaises - s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?=' + s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I)?=' raises(Errors.HeaderParseError, decode_header, s)