diff -r 29dbd42970ff Doc/library/base64.rst --- a/Doc/library/base64.rst Sun Feb 12 11:13:06 2012 +0200 +++ b/Doc/library/base64.rst Sun Feb 19 19:05:44 2012 +0100 @@ -18,9 +18,14 @@ There are two interfaces provided by this module. The modern interface supports encoding and decoding ASCII byte string objects using all three -alphabets. The legacy interface provides for encoding and decoding to and from -file-like objects as well as byte strings, but only using the Base64 standard -alphabet. +alphabets. Additionally, the decoding functions of the modern interface also +accept Unicode strings containing only ASCII characters. The legacy interface +provides for encoding and decoding to and from file-like objects as well as +byte strings, but only using the Base64 standard alphabet. + +.. versionchanged:: 3.3 + ASCII-only Unicode strings are now accepted by the decoding functions of + the modern interface. The modern interface provides: @@ -41,9 +46,10 @@ Decode a Base64 encoded byte string. - *s* is the byte string to decode. Optional *altchars* must be a string of - at least length 2 (additional characters are ignored) which specifies the - alternative alphabet used instead of the ``+`` and ``/`` characters. + *s* is the byte string to decode. Optional *altchars* must be a byte string + of at least length 2 (additional characters are ignored) which specifies the + alternative alphabet used instead of the ``+`` and ``/`` characters. *s* and + *altchars* can also be ASCII-only Unicode strings. The decoded string is returned. A :exc:`binascii.Error` exception is raised if *s* is incorrectly padded. @@ -53,6 +59,9 @@ non-base64-alphabet characters in the input result in a :exc:`binascii.Error`. + .. versionchanged:: 3.3 + :meth:`b64decode` also accepts ASCII-only Unicode strings + .. function:: standard_b64encode(s) @@ -61,7 +70,11 @@ .. function:: standard_b64decode(s) - Decode byte string *s* using the standard Base64 alphabet. + Decode byte string or ASCII-only Unicode string *s* using the standard Base64 + alphabet. + + .. versionchanged:: 3.3 + :meth:`standard_b64decode` also accepts ASCII-only Unicode strings .. function:: urlsafe_b64encode(s) @@ -73,9 +86,12 @@ .. function:: urlsafe_b64decode(s) - Decode byte string *s* using a URL-safe alphabet, which substitutes ``-`` instead of - ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. + Decode byte string or ASCII-only Unicode string *s* using a URL-safe + alphabet, which substitutes ``-`` instead of ``+`` and ``_`` instead of ``/`` + in the standard Base64 alphabet. + .. versionchanged:: 3.3 + :meth:`urlsafe_b64decode` also accepts ASCII-only Unicode strings .. function:: b32encode(s) @@ -87,9 +103,9 @@ Decode a Base32 encoded byte string. - *s* is the byte string to decode. Optional *casefold* is a flag specifying - whether a lowercase alphabet is acceptable as input. For security purposes, - the default is ``False``. + *s* is the byte string or ASCII-only Unicode string to decode. Optional + *casefold* is a flag specifying whether a lowercase alphabet is acceptable as + input. For security purposes, the default is ``False``. :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O (oh), and for optional mapping of the digit 1 (one) to either the letter I (eye) @@ -102,6 +118,9 @@ incorrectly padded or if there are non-alphabet characters present in the string. + .. versionchanged:: 3.3 + :meth:`b32decode` also accepts ASCII-only Unicode strings + .. function:: b16encode(s) @@ -114,14 +133,17 @@ Decode a Base16 encoded byte string. - *s* is the string to decode. Optional *casefold* is a flag specifying whether a - lowercase alphabet is acceptable as input. For security purposes, the default - is ``False``. + *s* is the byte string or ASCII-only Unicode string to decode. Optional + *casefold* is a flag specifying whether a lowercase alphabet is acceptable as + input. For security purposes, the default is ``False``. The decoded byte string is returned. A :exc:`TypeError` is raised if *s* were incorrectly padded or if there are non-alphabet characters present in the string. + .. versionchanged:: 3.3 + :meth:`b16decode` also accepts ASCII-only Unicode strings + The legacy interface: diff -r 29dbd42970ff Lib/base64.py --- a/Lib/base64.py Sun Feb 12 11:13:06 2012 +0200 +++ b/Lib/base64.py Sun Feb 19 19:05:44 2012 +0100 @@ -29,6 +29,26 @@ bytes_types = (bytes, bytearray) # Types acceptable as binary data +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + elif isinstance(s, bytes_types): + return s + else: + raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__) + +def _check_str_bytes_mix(data, extra_arg): + if extra_arg is None: + return + + both_str = isinstance(data, str) and isinstance(extra_arg, str) + both_binary_data = isinstance(data, bytes_types) and \ + isinstance(extra_arg, bytes_types) + if (not both_str) and (not both_binary_data): + raise TypeError("mixed bytes and str arguments are not accepted") def _translate(s, altchars): if not isinstance(s, bytes_types): @@ -79,12 +99,10 @@ discarded prior to the padding check. If validate is True, non-base64-alphabet characters in the input result in a binascii.Error. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + _check_str_bytes_mix(s, altchars) + s = _bytes_from_decode_data(s) if altchars is not None: - if not isinstance(altchars, bytes_types): - raise TypeError("expected bytes, not %s" - % altchars.__class__.__name__) + altchars = _bytes_from_decode_data(altchars) assert len(altchars) == 2, repr(altchars) s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'}) if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): @@ -128,7 +146,10 @@ The alphabet uses '-' instead of '+' and '_' instead of '/'. """ - return b64decode(s, b'-_') + if isinstance(s, str): + return b64decode(s, '-_') + else: + return b64decode(s, b'-_') @@ -211,8 +232,8 @@ the input is incorrectly padded or if there are non-alphabet characters present in the input. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + _check_str_bytes_mix(s, map01) + s = _bytes_from_decode_data(s) quanta, leftover = divmod(len(s), 8) if leftover: raise binascii.Error('Incorrect padding') @@ -220,8 +241,7 @@ # False, or the character to map the digit 1 (one) to. It should be # either L (el) or I (eye). if map01 is not None: - if not isinstance(map01, bytes_types): - raise TypeError("expected bytes, not %s" % map01.__class__.__name__) + map01 = _bytes_from_decode_data(map01) assert len(map01) == 1, repr(map01) s = _translate(s, {b'0': b'O', b'1': map01}) if casefold: @@ -292,8 +312,7 @@ s were incorrectly padded or if there are non-alphabet characters present in the string. """ - if not isinstance(s, bytes_types): - raise TypeError("expected bytes, not %s" % s.__class__.__name__) + s = _bytes_from_decode_data(s) if casefold: s = s.upper() if re.search(b'[^0-9A-F]', s): diff -r 29dbd42970ff Lib/test/test_base64.py --- a/Lib/test/test_base64.py Sun Feb 12 11:13:06 2012 +0200 +++ b/Lib/test/test_base64.py Sun Feb 19 19:05:44 2012 +0100 @@ -102,44 +102,56 @@ def test_b64decode(self): eq = self.assertEqual - eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.b64decode(b'AA=='), b'\x00') - eq(base64.b64decode(b"YQ=="), b"a") - eq(base64.b64decode(b"YWI="), b"ab") - eq(base64.b64decode(b"YWJj"), b"abc") - eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - eq(base64.b64decode(b''), b'') + + tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org", + b'AA==': b'\x00', + b"YQ==": b"a", + b"YWI=": b"ab", + b"YWJj": b"abc", + b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" + b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT" + b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==": + + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}", + b'': b'', + } + for data, res in tests.items(): + eq(base64.b64decode(data), res) + eq(base64.b64decode(data.decode('ascii')), res) + # Test with arbitrary alternative characters - eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d') - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.b64decode, "") - self.assertRaises(TypeError, base64.b64decode, b"", altchars="") + tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d', + } + for (data, altchars), res in tests_altchars.items(): + data_str = data.decode('ascii') + altchars_str = altchars.decode('ascii') + + eq(base64.b64decode(data, altchars=altchars), res) + eq(base64.b64decode(data_str, altchars=altchars_str), res) + + # No mixing of bytes and str + with self.assertRaises(TypeError): + base64.b64decode(data, altchars=altchars_str) + base64.b64decode(data_str, altchars=altchars) + # Test standard alphabet - eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org") - eq(base64.standard_b64decode(b"YQ=="), b"a") - eq(base64.standard_b64decode(b"YWI="), b"ab") - eq(base64.standard_b64decode(b"YWJj"), b"abc") - eq(base64.standard_b64decode(b""), b"") - eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE" - b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT" - b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="), - b"abcdefghijklmnopqrstuvwxyz" - b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"0123456789!@#0^&*();:<>,. []{}") - # Check if passing a str object raises an error - self.assertRaises(TypeError, base64.standard_b64decode, "") - self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="") + for data, res in tests.items(): + eq(base64.standard_b64decode(data), res) + eq(base64.standard_b64decode(data.decode('ascii')), res) + # Test with 'URL safe' alternative characters - eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d') - self.assertRaises(TypeError, base64.urlsafe_b64decode, "") + tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d', + b'': b'', + } + for data, res in tests_urlsafe.items(): + eq(base64.urlsafe_b64decode(data), res) + eq(base64.urlsafe_b64decode(data.decode('ascii')), res) def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') + self.assertRaises(binascii.Error, base64.b64decode, 'abc') def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. @@ -154,8 +166,10 @@ (b'YWJj\nYWI=', b'abcab')) for bstr, res in tests: self.assertEqual(base64.b64decode(bstr), res) + self.assertEqual(base64.b64decode(bstr.decode('ascii')), res) with self.assertRaises(binascii.Error): base64.b64decode(bstr, validate=True) + base64.b64decode(bstr.decode('ascii'), validate=True) def test_b32encode(self): eq = self.assertEqual @@ -170,40 +184,65 @@ def test_b32decode(self): eq = self.assertEqual - eq(base64.b32decode(b''), b'') - eq(base64.b32decode(b'AA======'), b'\x00') - eq(base64.b32decode(b'ME======'), b'a') - eq(base64.b32decode(b'MFRA===='), b'ab') - eq(base64.b32decode(b'MFRGG==='), b'abc') - eq(base64.b32decode(b'MFRGGZA='), b'abcd') - eq(base64.b32decode(b'MFRGGZDF'), b'abcde') - self.assertRaises(TypeError, base64.b32decode, "") + tests = {b'': b'', + b'AA======': b'\x00', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + } + for data, res in tests.items(): + eq(base64.b32decode(data), res) + eq(base64.b32decode(data.decode('ascii')), res) def test_b32decode_casefold(self): eq = self.assertEqual - eq(base64.b32decode(b'', True), b'') - eq(base64.b32decode(b'ME======', True), b'a') - eq(base64.b32decode(b'MFRA====', True), b'ab') - eq(base64.b32decode(b'MFRGG===', True), b'abc') - eq(base64.b32decode(b'MFRGGZA=', True), b'abcd') - eq(base64.b32decode(b'MFRGGZDF', True), b'abcde') - # Lower cases - eq(base64.b32decode(b'me======', True), b'a') - eq(base64.b32decode(b'mfra====', True), b'ab') - eq(base64.b32decode(b'mfrgg===', True), b'abc') - eq(base64.b32decode(b'mfrggza=', True), b'abcd') - eq(base64.b32decode(b'mfrggzdf', True), b'abcde') - # Expected exceptions + tests = {b'': b'', + b'ME======': b'a', + b'MFRA====': b'ab', + b'MFRGG===': b'abc', + b'MFRGGZA=': b'abcd', + b'MFRGGZDF': b'abcde', + # Lower cases + b'me======': b'a', + b'mfra====': b'ab', + b'mfrgg===': b'abc', + b'mfrggza=': b'abcd', + b'mfrggzdf': b'abcde', + } + + for data, res in tests.items(): + eq(base64.b32decode(data, True), res) + eq(base64.b32decode(data.decode('ascii'), True), res) + self.assertRaises(TypeError, base64.b32decode, b'me======') + self.assertRaises(TypeError, base64.b32decode, 'me======') + # Mapping zero and one eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe') - self.assertRaises(TypeError, base64.b32decode, b"", map01="") + eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + + map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe', + (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe', + } + for (data, map01), res in map_tests.items(): + data_str = data.decode('ascii') + map01_str = map01.decode('ascii') + + eq(base64.b32decode(data, map01=map01), res) + eq(base64.b32decode(data_str, map01=map01_str), res) + + # No mixing of bytes and str + with self.assertRaises(TypeError): + base64.b32decode(data, map01=map01_str) + base64.b32decode(data_str, map01=map01) def test_b32decode_error(self): - self.assertRaises(binascii.Error, base64.b32decode, b'abc') - self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==') + for data in [b'abc', b'ABCDEF==']: + with self.assertRaises(binascii.Error): + base64.b32decode(data) + base64.b32decode(data.decode('ascii')) def test_b16encode(self): eq = self.assertEqual @@ -214,12 +253,24 @@ def test_b16decode(self): eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') + eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode(b'00'), b'\x00') + eq(base64.b16decode('00'), b'\x00') # Lower case is not allowed without a flag self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef') + self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef') # Case fold eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef') - self.assertRaises(TypeError, base64.b16decode, "") + eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef') + + def test_decode_nonascii_str(self): + decode_funcs = (base64.b64decode, + base64.standard_b64decode, + base64.urlsafe_b64decode, + base64.b32decode, + base64.b16decode) + for f in decode_funcs: + self.assertRaises(ValueError, f, 'with non-ascii \xcb') def test_ErrorHeritage(self): self.assertTrue(issubclass(binascii.Error, ValueError))