Index: Doc/library/base64.rst =================================================================== --- Doc/library/base64.rst (revision 74261) +++ Doc/library/base64.rst (working copy) @@ -35,7 +35,7 @@ The encoded string is returned. -.. function:: b64decode(s, altchars=None) +.. function:: b64decode(s, altchars=None, validate=False) Decode a Base64 encoded string. @@ -43,11 +43,14 @@ length 2 (additional characters are ignored) which specifies the alternative alphabet used instead of the ``+`` and ``/`` characters. - The decoded string is returned. A :exc:`TypeError` is raised if *s* were - incorrectly padded or if there are non-alphabet characters present in the - string. + The decoded string is returned. A `binascii.Error` is raised if *s* is + incorrectly padded. + If *validate* is not set to True, non-base64-alphabet characters are + discarded prior to the padding check, rather than rejected with a + :exc:`TypeError`. + .. function:: standard_b64encode(s) Encode string *s* using the standard Base64 alphabet. Index: Lib/base64.py =================================================================== --- Lib/base64.py (revision 74261) +++ Lib/base64.py (working copy) @@ -5,6 +5,7 @@ # Modified 04-Oct-1995 by Jack Jansen to use binascii module # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere +# Modified 29-Jul-2009 by Neil Tallim to support base64 validation import re import struct @@ -65,16 +66,19 @@ return encoded -def b64decode(s, altchars=None): +def b64decode(s, altchars=None, validate=False): """Decode a Base64 encoded byte string. s is the byte string to decode. Optional altchars must be a string of length 2 which specifies the alternative alphabet used instead of the '+' and '/' characters. - - The decoded byte string is returned. binascii.Error is raised if - s were incorrectly padded or if there are non-alphabet characters - present in the string. + + The decoded string is returned. A binascii.Error is raised if s is + incorrectly padded. + + If validate is not set to True, non-base64-alphabet characters are + discarded prior to the padding check, rather than rejected with a + TypeError. """ if not isinstance(s, bytes_types): raise TypeError("expected bytes, not %s" % s.__class__.__name__) @@ -84,6 +88,8 @@ % altchars.__class__.__name__) assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) + if validate and not re.match(b'^[A-Za-z0-9+/\r\n]*={0,2}$', s): + raise TypeError('Non-base64 digit found') return binascii.a2b_base64(s) Index: Lib/test/test_base64.py =================================================================== --- Lib/test/test_base64.py (revision 74261) +++ Lib/test/test_base64.py (working copy) @@ -137,7 +137,18 @@ self.assertRaises(TypeError, base64.urlsafe_b64decode, "") def test_b64decode_error(self): + # Test padding errors. self.assertRaises(binascii.Error, base64.b64decode, b'abc') + # Test some invalid characters; addresses [1466065]. + arr = self.assertRaisesRegexp + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'%3d=', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'$3=', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'[==', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b']3==', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'3{=', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'3d}=', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'@@', None, True) + arr(TypeError, 'Non-base64 digit found', base64.b64decode, b'!', None, True) def test_b32encode(self): eq = self.assertEqual