diff -u -r email.orig/header.py email/header.py --- email.orig/header.py 2010-09-28 09:13:07.000000000 +0900 +++ email/header.py 2010-09-28 10:36:05.000000000 +0900 @@ -32,15 +32,17 @@ # Match encoded-word strings in the form =?charset?q?Hello_World?= ecre = re.compile(r''' + (?:[ \t]|^) # whitespace or the start of the string =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the encoded string + (?P[^? \t]*?) # literal ? and SPACE are excluded from encoded-text + # non-greedy up to the next ?= is the encoded string \?= # literal ?= (?=[ \t]|$) # whitespace or the end of the string - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.IGNORECASE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. diff -u -r email.orig/test/test_email.py email/test/test_email.py --- email.orig/test/test_email.py 2010-09-28 09:13:07.000000000 +0900 +++ email/test/test_email.py 2010-09-28 09:09:24.000000000 +0900 @@ -1556,6 +1556,20 @@ dh = decode_header(s) self.assertEqual(dh, [(s, None)]) + def test_rfc2047_without_whitespace2(self): + s = """[Apache-Users 7784] ServerAlias =?ISO-2022-JP?B?GyRAGyRCJEsbKEI=?=:( + =?ISO-2022-JP?B?GyRCJTMlbSVzGyhC?=)=?ISO-2022-JP?B?GyRCJHI7SBsoQg==?= + =?ISO-2022-JP?B?GyRAGyRCTVEkOSRrJEg1c0YwJCwkKiQrJDckJBsoQg==?=""" + dh = decode_header(s) + self.assertEqual(dh, [('[Apache-Users 7784] ServerAlias =?ISO-2022-JP?B?GyRAGyRCJEsbKEI=?=:(', None), + (' =?ISO-2022-JP?B?GyRCJTMlbSVzGyhC?=)=?ISO-2022-JP?B?GyRCJHI7SBsoQg==?=', None), + ('\x1b$@\x1b$BMQ$9$k$H5sF0$,$*$+$7$$\x1b(B', 'iso-2022-jp')]) + + def test_rfc2047_without_whitespace3(self): + s = 'Python=?ISO-2022-JP?B?GyRCJWEhPCVqJXMlMCVqJTklSBsoQg==?= Japan' + dh = decode_header(s) + self.assertEqual(dh, [('Python=?ISO-2022-JP?B?GyRCJWEhPCVqJXMlMCVqJTklSBsoQg==?= Japan', None)]) + def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' dh = decode_header(s)