diff -r cf91ae981afd Doc/library/base64.rst --- a/Doc/library/base64.rst Thu Jul 23 09:11:13 2015 +0300 +++ b/Doc/library/base64.rst Thu Jul 23 17:49:34 2015 +0300 @@ -37,6 +37,9 @@ strings, but only using the Base64 stand Any :term:`bytes-like object`\ s are now accepted by all encoding and decoding functions in this module. Ascii85/Base85 support added. +.. versionchanged:: 3.6 + Base32hex support added. + The modern interface provides: .. function:: b64encode(s, altchars=None) @@ -118,6 +121,32 @@ The modern interface provides: string. +.. function:: b32hexencode(s) + + Encode a byte string using Base32hex, which is a Base32 encoding with Extended + Hex Alphabet as specified in :rfc:`4648` Section 7. + + *s* is the string to encode. The encoded string is returned. + + .. versionadded:: 3.6 + + +.. function:: b32hexdecode(s, casefold=False) + + Decode a Base32hex encoded byte string. Base32hex is a Base32 encoding with + Extended Hex Alphabet as specified in :rfc:`4648` Section 7. + + *s* is the byte string to decode. Optional *casefold* is a flag specifying + whether a lowercase alphabet is acceptable as input. For security purposes, + the default is ``False``. + + The decoded byte string is returned. A :exc:`binascii.Error` is raised if *s* is + incorrectly padded or if there are non-alphabet characters present in the + string. + + .. versionadded:: 3.6 + + .. function:: b16encode(s) Encode a byte string using Base16. diff -r cf91ae981afd Lib/base64.py --- a/Lib/base64.py Thu Jul 23 09:11:13 2015 +0300 +++ b/Lib/base64.py Thu Jul 23 17:49:34 2015 +0300 @@ -16,7 +16,7 @@ import binascii 'encode', 'decode', 'encodebytes', 'decodebytes', # Generalized interface for other encodings 'b64encode', 'b64decode', 'b32encode', 'b32decode', - 'b16encode', 'b16decode', + 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings 'b85encode', 'b85decode', 'a85encode', 'a85decode', # Standard Base64 encoding @@ -141,19 +141,11 @@ def urlsafe_b64decode(s): _b32tab2 = None _b32rev = None -def b32encode(s): - """Encode a byte string using Base32. +_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV' +_b32hextab = None +_b32hexrev = None - s is the byte string to encode. The encoded byte string is returned. - """ - global _b32tab2 - # Delay the initialization of the table to not waste memory - # if the function is never called - if _b32tab2 is None: - b32tab = [bytes((i,)) for i in _b32alphabet] - _b32tab2 = [a + b for a in b32tab for b in b32tab] - b32tab = None - +def _b32encode(s, b32tab2): if not isinstance(s, bytes_types): s = memoryview(s).tobytes() leftover = len(s) % 5 @@ -162,7 +154,6 @@ def b32encode(s): s = s + bytes(5 - leftover) # Don't use += ! encoded = bytearray() from_bytes = int.from_bytes - b32tab2 = _b32tab2 for i in range(0, len(s), 5): c = from_bytes(s[i: i + 5], 'big') encoded += (b32tab2[c >> 30] + # bits 1 - 10 @@ -181,6 +172,83 @@ def b32encode(s): encoded[-1:] = b'=' return bytes(encoded) +def b32encode(s): + """Encode a byte string using Base32. + + s is the byte string to encode. The encoded byte string is returned. + """ + global _b32tab2 + # Delay the initialization of the table to not waste memory + # if the function is never called + if _b32tab2 is None: + b32tab = [bytes((i,)) for i in _b32alphabet] + _b32tab2 = [a + b for a in b32tab for b in b32tab] + b32tab = None + + return _b32encode(s, _b32tab2) + +def b32hexencode(s): + """Encode a byte string using Base32hex, which is a Base32 encoding + with Extended Hex Alphabet as specified in RFC 4648 Section 7. + + s is the byte string to encode. The encoded byte string is returned. + """ + global _b32hextab + # Delay the initialization of the table to not waste memory + # if the function is never called + if _b32hextab is None: + b32tab = [bytes((i,)) for i in _b32hexalphabet] + _b32hextab = [a + b for a in b32tab for b in b32tab] + b32tab = None + + return _b32encode(s, _b32hextab) + +def _b32decode(s, b32rev, casefold=False, map01=None): + s = _bytes_from_decode_data(s) + if len(s) % 8: + raise binascii.Error('Incorrect padding') + # Handle section 2.4 zero and one mapping. The flag map01 will be either + # False, or the character to map the digit 1 (one) to. It should be + # either L (el) or I (eye). + if map01 is not None: + map01 = _bytes_from_decode_data(map01) + assert len(map01) == 1, repr(map01) + s = s.translate(bytes.maketrans(b'01', b'O' + map01)) + if casefold: + s = s.upper() + # Strip off pad characters from the right. We need to count the pad + # characters because this will tell us how many null bytes to remove from + # the end of the decoded string. + l = len(s) + s = s.rstrip(b'=') + padchars = l - len(s) + # Now decode the full quanta + decoded = bytearray() + for i in range(0, len(s), 8): + quanta = s[i: i + 8] + acc = 0 + try: + for c in quanta: + acc = (acc << 5) + b32rev[c] + except KeyError: + raise binascii.Error('Non-base32 digit found') from None + decoded += acc.to_bytes(5, 'big') + # Process the last, partial quanta + if padchars: + acc <<= 5 * padchars + last = acc.to_bytes(5, 'big') + if padchars == 1: + decoded[-5:] = last[:-1] + elif padchars == 3: + decoded[-5:] = last[:-2] + elif padchars == 4: + decoded[-5:] = last[:-3] + elif padchars == 6: + decoded[-5:] = last[:-4] + else: + raise binascii.Error('Incorrect padding') + return bytes(decoded) + def b32decode(s, casefold=False, map01=None): """Decode a Base32 encoded byte string. @@ -205,51 +273,29 @@ def b32decode(s, casefold=False, map01=N # if the function is never called if _b32rev is None: _b32rev = {v: k for k, v in enumerate(_b32alphabet)} - s = _bytes_from_decode_data(s) - if len(s) % 8: - raise binascii.Error('Incorrect padding') - # Handle section 2.4 zero and one mapping. The flag map01 will be either - # False, or the character to map the digit 1 (one) to. It should be - # either L (el) or I (eye). - if map01 is not None: - map01 = _bytes_from_decode_data(map01) - assert len(map01) == 1, repr(map01) - s = s.translate(bytes.maketrans(b'01', b'O' + map01)) - if casefold: - s = s.upper() - # Strip off pad characters from the right. We need to count the pad - # characters because this will tell us how many null bytes to remove from - # the end of the decoded string. - l = len(s) - s = s.rstrip(b'=') - padchars = l - len(s) - # Now decode the full quanta - decoded = bytearray() - b32rev = _b32rev - for i in range(0, len(s), 8): - quanta = s[i: i + 8] - acc = 0 - try: - for c in quanta: - acc = (acc << 5) + b32rev[c] - except KeyError: - raise binascii.Error('Non-base32 digit found') from None - decoded += acc.to_bytes(5, 'big') - # Process the last, partial quanta - if padchars: - acc <<= 5 * padchars - last = acc.to_bytes(5, 'big') - if padchars == 1: - decoded[-5:] = last[:-1] - elif padchars == 3: - decoded[-5:] = last[:-2] - elif padchars == 4: - decoded[-5:] = last[:-3] - elif padchars == 6: - decoded[-5:] = last[:-4] - else: - raise binascii.Error('Incorrect padding') - return bytes(decoded) + + return _b32decode(s, _b32rev, casefold, map01) + +def b32hexdecode(s, casefold=False): + """Decode a Base32hex encoded byte string. Base32hex is a Base32 + encoding with Extended Hex Alphabet as specified in RFC 4648 + Section 7. + + s is the byte string to decode. Optional casefold is a flag + specifying whether a lowercase alphabet is acceptable as input. + For security purposes, the default is False. + + The decoded byte string is returned. binascii.Error is raised if + the input is incorrectly padded or if there are non-alphabet + characters present in the input. + """ + global _b32hexrev + # Delay the initialization of the table to not waste memory + # if the function is never called + if _b32hexrev is None: + _b32hexrev = {v: k for k, v in enumerate(_b32hexalphabet)} + + return _b32decode(s, _b32hexrev, casefold) diff -r cf91ae981afd Lib/test/test_base64.py --- a/Lib/test/test_base64.py Thu Jul 23 09:11:13 2015 +0300 +++ b/Lib/test/test_base64.py Thu Jul 23 17:49:34 2015 +0300 @@ -331,6 +331,28 @@ class BaseXYTestCase(unittest.TestCase): with self.assertRaises(binascii.Error): base64.b32decode(data.decode('ascii')) + def test_b32hexencode(self): + eq = self.assertEqual + eq(base64.b32hexencode(b''), b'') + eq(base64.b32hexencode(b'f'), b'CO======') + eq(base64.b32hexencode(b'fo'), b'CPNG====') + eq(base64.b32hexencode(b'foo'), b'CPNMU===') + eq(base64.b32hexencode(b'foob'), b'CPNMUOG=') + eq(base64.b32hexencode(b'fooba'), b'CPNMUOJ1') + eq(base64.b32hexencode(b'foobar'), b'CPNMUOJ1E8======') + eq(base64.b32hexencode(b'\x00D2\x14\xc7BT\xb65\xcf\x84e:V\xd7\xc6u\xbew\xdf'), b'0123456789ABCDEFGHIJKLMNOPQRSTUV') + + def test_b32hexdecode(self): + eq = self.assertEqual + eq(base64.b32hexdecode(b''), b'') + eq(base64.b32hexdecode(b'CO======'), b'f') + eq(base64.b32hexdecode(b'CPNG===='), b'fo') + eq(base64.b32hexdecode(b'CPNMU==='), b'foo') + eq(base64.b32hexdecode(b'CPNMUOG='), b'foob') + eq(base64.b32hexdecode(b'CPNMUOJ1'), b'fooba') + eq(base64.b32hexdecode(b'CPNMUOJ1E8======'), b'foobar') + eq(base64.b32hexdecode(b'0123456789ABCDEFGHIJKLMNOPQRSTUV'), b'\x00D2\x14\xc7BT\xb65\xcf\x84e:V\xd7\xc6u\xbew\xdf') + def test_b16encode(self): eq = self.assertEqual eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')