# HG changeset patch # Parent 937774aa18531e1296e06cd754431d96e07e70f4 Issue #22088: Clarify base-64 alphabets and which characters are discarded * There are only two base-64 alphabets defined by the RFCs, not three * Due to the internal translation, plus (+) and slash (/) are never discarded * standard_ and urlsafe_b64decode() discard characters as well diff -r 937774aa1853 Doc/library/base64.rst --- a/Doc/library/base64.rst Mon Dec 14 03:41:59 2015 +0000 +++ b/Doc/library/base64.rst Mon Dec 14 05:19:33 2015 +0000 @@ -16,8 +16,8 @@ encoding algorithm is not the same as the :program:`uuencode` program. There are two interfaces provided by this module. The modern interface supports -encoding and decoding string objects using all three :rfc:`3548` defined -alphabets (normal, URL-safe, and filesystem-safe). The legacy +encoding and decoding string objects using both base-64 alphabets defined +in :rfc:`3548` (normal, and URL- and filesystem-safe). The legacy interface provides for encoding and decoding to and from file-like objects as well as strings, but only using the Base64 standard alphabet. @@ -46,7 +46,8 @@ alphabet used instead of the ``+`` and ``/`` characters. The decoded string is returned. A :exc:`TypeError` is raised if *s* is - incorrectly padded. Non-base64-alphabet characters are + incorrectly padded. Characters that are neither in the normal + base-64 alphabet nor the alternative alphabet are discarded prior to the padding check. @@ -62,14 +63,16 @@ .. function:: urlsafe_b64encode(s) - Encode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + Encode string *s* using the URL- and filesystem-safe + alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. The result can still contain ``=``. .. function:: urlsafe_b64decode(s) - Decode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + Decode string *s* using the URL- and filesystem-safe + alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. diff -r 937774aa1853 Lib/base64.py --- a/Lib/base64.py Mon Dec 14 03:41:59 2015 +0000 +++ b/Lib/base64.py Mon Dec 14 05:19:33 2015 +0000 @@ -65,8 +65,9 @@ alternative alphabet used instead of the '+' and '/' characters. The decoded string is returned. A TypeError is raised if s is - incorrectly padded. Non-base64-alphabet characters are discarded prior - to the padding check. + incorrectly padded. Characters that are neither in the normal base-64 + alphabet nor the alternative alphabet are discarded prior to the padding + check. """ if altchars is not None: s = s.translate(string.maketrans(altchars[:2], '+/')) @@ -88,8 +89,8 @@ """Decode a string encoded with the standard Base64 alphabet. s is the string to decode. The decoded string is returned. A TypeError - is raised if the string is incorrectly padded or if there are non-alphabet - characters present in the string. + is raised if the string is incorrectly padded. Characters that are not + in the standard alphabet are discarded prior to the padding check. """ return b64decode(s) @@ -97,7 +98,7 @@ _urlsafe_decode_translation = string.maketrans(b'-_', b'+/') def urlsafe_b64encode(s): - """Encode a string using a url-safe Base64 alphabet. + """Encode a string using the URL- and filesystem-safe Base64 alphabet. s is the string to encode. The encoded string is returned. The alphabet uses '-' instead of '+' and '_' instead of '/'. @@ -105,11 +106,12 @@ return b64encode(s).translate(_urlsafe_encode_translation) def urlsafe_b64decode(s): - """Decode a string encoded with the standard Base64 alphabet. + """Decode a string with the URL- and filesystem-safe Base64 alphabet. s is the string to decode. The decoded string is returned. A TypeError - is raised if the string is incorrectly padded or if there are non-alphabet - characters present in the string. + is raised if the string is incorrectly padded. Characters that are not + in the URL-safe base-64 alphabet, and are not a plus '+' or slash '/', + are discarded prior to the padding check. The alphabet uses '-' instead of '+' and '_' instead of '/'. """ diff -r 937774aa1853 Lib/test/test_base64.py --- a/Lib/test/test_base64.py Mon Dec 14 03:41:59 2015 +0000 +++ b/Lib/test/test_base64.py Mon Dec 14 05:19:33 2015 +0000 @@ -153,6 +153,13 @@ (b'YWJj\nYWI=', b'abcab')) for bstr, res in tests: self.assertEqual(base64.b64decode(bstr), res) + self.assertEqual(base64.standard_b64decode(bstr), res) + self.assertEqual(base64.urlsafe_b64decode(bstr), res) + + # Normal alphabet characters not discarded when alternative given + res = b'\xFB\xEF\xBE\xFF\xFF\xFF' + self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) + self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): eq = self.assertEqual