diff -r 723b2abd94f9 Lib/base64.py --- a/Lib/base64.py Sat Apr 06 18:28:19 2013 +0100 +++ b/Lib/base64.py Sun Apr 07 16:50:22 2013 +0100 @@ -17,6 +17,7 @@ # Generalized interface for other encodings 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b16encode', 'b16decode', + 'b85encode', 'b85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -302,7 +303,109 @@ raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) +# +# Ascii85 encoding/decoding +# +_B85START = b"<~" +_B85END = b"~>" +_B85FOLDNUL = b"z" +_B85FOLDSPACE = b"y" + +def b85encode(b, *, foldspaces=False): + """Encode a byte string using Ascii85. + + b is the byte string to encode. The encoded byte string is returned. + + foldspaces is an optional flag that uses the special short sequence 'y' + instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This + feature is not supported by the "standard" Adobe encoding. + """ + if not isinstance(b, bytes_types): + raise TypeError("expected bytes, not {}".format(b.__class__.__name__)) + quanta, leftover = divmod(len(b), 4) + + encoded = _B85START + + def encode(fourbytes, last=False): + val, = struct.unpack("!I", fourbytes) + if val == 0 and not last: + return _B85FOLDNUL + if foldspaces and not last and val == 0x20202020: + return _B85FOLDSPACE + next = [] + for j in range(5): + val, num = divmod(val, 85) + next.append(num + 33) + next.reverse() + return bytes(next) + + for i in range(quanta): + encoded += encode(b[i*4:(i+1)*4]) + if leftover: + last = b[quanta*4:(quanta+1)*4] + bytes([0] * (4 - leftover)) + encoded += encode(last, last=True)[:leftover + 1] + encoded += _B85END + return encoded + +def b85decode(b, *, foldspaces=False): + """Decode an Ascii85 encoded byte string. + + s is the byte string to decode. + + foldspaces is a flag that specifies whether the 'y' short sequence should be + accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is + not supported by the "standard" Adobe encoding. + """ + b = _bytes_from_decode_data(b) + if not b.startswith(_B85START) or not b.endswith(_B85END): + raise binascii.Error("Ascii85 encoded byte sequences must be bracketed " + "by {} and {}".format(_B85START, _B85END)) + # + # We have to go through this stepwise, so as to ignore spaces and handle + # special short sequences + # + b = b[2:-2] + position = 0 + decoded = bytes() + + def decode(fivechars): + val = 0 + for j in range(5): + if fivechars[j] - 33 >= 85: + raise TypeError("Non-base85 digit found: {}" + .format(chr(fivechars[j]))) + val *= 85 + val += fivechars[j] - 33 + next = struct.pack("!I", val) + return next + + try: + while True: + curr = bytes() + while len(curr) < 5: + # Skip whitespace + while b[position] in b' \t\n\r\v': + position += 1 + # handle special short sequences + if b[position] == b'z'[0]: + decoded += b'\0\0\0\0' + position += 1 + break + if b[position] == b'y'[0] and foldspaces: + decoded += b' ' + position += 1 + break + curr += bytes([b[position]]) + position += 1 + else: + decoded += decode(curr) + except IndexError: + # We ran out of characters + if curr: + last = curr + b'u' * (5 - len(curr)) + decoded += decode(last)[:-(5-len(curr))] + return decoded # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff -r 723b2abd94f9 Lib/test/test_base64.py --- a/Lib/test/test_base64.py Sat Apr 06 18:28:19 2013 +0100 +++ b/Lib/test/test_base64.py Sun Apr 07 16:50:22 2013 +0100 @@ -260,12 +260,124 @@ eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef') + def test_b85encode(self): + eq = self.assertEqual + + tests = { + b"www.python.org": b'<~GB\\6`E-ZP=Df.1GEb>~>', + b"no padding..": b'<~DJpY:@:Wn_DJ(RS~>', + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}": + b'<~@:E_WAS,RgBkhF"D/O92EH6,BF`qtRH$VbC6UX@47n?3D92&&T' + b":Jand;cHat='/U/0JP==1c70M3&r-I,;", + b"zero compression\0\0\0\0": b'<~H=_,8+Cf>,E,oN2F(oQ1z~>', + b"Boundary:\0\0\0\0": b'<~6>q!aA79M(3WK-[!!~>', + b"Space compr: ": b'<~;fH/TAKYK$D/aMV+', + bytes(range(255)): b"""<~!!*-'"9eu7#RLhG$k3[W&.oNg'GVB"(`=52*$$""" + b"""(B+<_pR,UFcb-n-Vr/1iJ-0JP==1c70M3&s#]4?Ykm5X@_(6q'R884cE""" + b"""H9MJ8X:f1+h<)lt#=BSg3>[:ZC?t!MSA7]@cBPD3sCi+'.E,fo>FEMbN""" + b"""G^4U^I!pHnJ:W<)KS>/9Ll%"IN/`jYOHG]iPa.Q$R$jD4S=Q7DTV8*TU""" + b"""nsrdW2ZetXKAY/Yd(L?['d?O\\@K2_]Y2%o^qmn*`5Ta:aN;TJbg"GZd""" + b"""*^:jeCE.%f\\,!5gtgiEi8N\\UjQ5OekiqBum-X60nF?)@o_%qPq"ad`""" + b"""r;HT~>""", + } + + for data, res in tests.items(): + eq(base64.b85encode(data), res) + + self.assertRaises(TypeError, base64.b85encode, "") + + def test_b85encode_foldspaces(self): + eq = self.assertEqual + + tests = { + b"www.python.org": b'<~GB\\6`E-ZP=Df.1GEb>~>', + b"no padding..": b'<~DJpY:@:Wn_DJ(RS~>', + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789!@#0^&*();:<>,. []{}": + b'<~@:E_WAS,RgBkhF"D/O92EH6,BF`qtRH$VbC6UX@47n?3D92&&T' + b":Jand;cHat='/U/0JP==1c70M3&r-I,;", + b"zero compression\0\0\0\0": b'<~H=_,8+Cf>,E,oN2F(oQ1z~>', + b"Boundary:\x00\x00\x00\x00": b'<~6>q!aA79M(3WK-[!!~>', + b"Space compr: ": b'<~;fH/TAKYK$D/aMVy~>', + bytes(range(255)): b"""<~!!*-'"9eu7#RLhG$k3[W&.oNg'GVB"(`=52*$$""" + b"""(B+<_pR,UFcb-n-Vr/1iJ-0JP==1c70M3&s#]4?Ykm5X@_(6q'R884cE""" + b"""H9MJ8X:f1+h<)lt#=BSg3>[:ZC?t!MSA7]@cBPD3sCi+'.E,fo>FEMbN""" + b"""G^4U^I!pHnJ:W<)KS>/9Ll%"IN/`jYOHG]iPa.Q$R$jD4S=Q7DTV8*TU""" + b"""nsrdW2ZetXKAY/Yd(L?['d?O\\@K2_]Y2%o^qmn*`5Ta:aN;TJbg"GZd""" + b"""*^:jeCE.%f\\,!5gtgiEi8N\\UjQ5OekiqBum-X60nF?)@o_%qPq"ad`""" + b"""r;HT~>""", + } + + for data, res in tests.items(): + eq(base64.b85encode(data, foldspaces=True), res) + + def test_b85decode(self): + eq = self.assertEqual + + tests = { + b'<~GB\\6`E-ZP=Df.1GEb>~>': b'www.python.org', + b'<~H=_,8+Cf>,E,oN2F(oQ1z~>': b'zero compression\x00\x00\x00\x00', + b"""<~!!*-'"9eu7#RLhG$k3[W&.oNg'GVB"(`=52*$$""" + b"""(B+<_pR,UFcb-n-Vr/1iJ-0JP==1c70M3&s#]4?Ykm5X@_(6q'R884cE""" + b"""H9MJ8X:f1+h<)lt#=BSg3>[:ZC?t!MSA7]@cBPD3sCi+'.E,fo>FEMbN""" + b"""G^4U^I!pHnJ:W<)KS>/9Ll%"IN/`jYOHG]iPa.Q$R$jD4S=Q7DTV8*TU""" + b"""nsrdW2ZetXKAY/Yd(L?['d?O\\@K2_]Y2%o^qmn*`5Ta:aN;TJbg"GZd""" + b"""*^:jeCE.%f\\,!5gtgiEi8N\\UjQ5OekiqBum-X60nF?)@o_%qPq"ad`""" + b"""r;HT~>""": bytes(range(255)), + b'<~DJpY:@:Wn_DJ(RS~>': b'no padding..', + b"""<~@:E_WAS,RgBkhF"D/O92EH6,BF`qtRH$VbC6UX@47n?3D92&&T:Jand;c""" + b"""Hat='/U/0JP==1c70M3&r-I,;""": + b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234' + b'56789!@#0^&*();:<>,. []{}', + b'<~6>q!aA79M(3WK-[!!~>': b"Boundary:\x00\x00\x00\x00", + b'<~;fH/TAKYK$D/aMV+': b'Space compr: ' + } + + for data, res in tests.items(): + eq(base64.b85decode(data), res) + eq(base64.b85decode(data.decode("ascii")), res) + + def test_b85decode_foldspaces(self): + eq = self.assertEqual + + tests = { + b'<~GB\\6`E-ZP=Df.1GEb>~>': b'www.python.org', + b'<~H=_,8+Cf>,E,oN2F(oQ1z~>': b'zero compression\x00\x00\x00\x00', + b"""<~!!*-'"9eu7#RLhG$k3[W&.oNg'GVB"(`=52*$$""" + b"""(B+<_pR,UFcb-n-Vr/1iJ-0JP==1c70M3&s#]4?Ykm5X@_(6q'R884cE""" + b"""H9MJ8X:f1+h<)lt#=BSg3>[:ZC?t!MSA7]@cBPD3sCi+'.E,fo>FEMbN""" + b"""G^4U^I!pHnJ:W<)KS>/9Ll%"IN/`jYOHG]iPa.Q$R$jD4S=Q7DTV8*TU""" + b"""nsrdW2ZetXKAY/Yd(L?['d?O\\@K2_]Y2%o^qmn*`5Ta:aN;TJbg"GZd""" + b"""*^:jeCE.%f\\,!5gtgiEi8N\\UjQ5OekiqBum-X60nF?)@o_%qPq"ad`""" + b"""r;HT~>""": bytes(range(255)), + b'<~DJpY:@:Wn_DJ(RS~>': b'no padding..', + b"""<~@:E_WAS,RgBkhF"D/O92EH6,BF`qtRH$VbC6UX@47n?3D92&&T:Jand;c""" + b"""Hat='/U/0JP==1c70M3&r-I,;""": + b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234' + b'56789!@#0^&*();:<>,. []{}', + b'<~6>q!aA79M(3WK-[!!~>': b'Boundary:\x00\x00\x00\x00', + b'<~;fH/TAKYK$D/aMVy~>': b'Space compr: ' + } + + for data, res in tests.items(): + eq(base64.b85decode(data, foldspaces=True), res) + eq(base64.b85decode(data.decode("ascii"), foldspaces=True), res) + + def test_b85decode_errors(self): + self.assertRaises(binascii.Error, base64.b85decode, "malformed") + self.assertRaises(binascii.Error, base64.b85decode, "<~still malformed") + self.assertRaises(binascii.Error, base64.b85decode, "also malformed~>") + self.assertRaises(TypeError, base64.b85decode, "<~abcx~>") + self.assertRaises(TypeError, base64.b85decode, "<~abcdey~>") + def test_decode_nonascii_str(self): decode_funcs = (base64.b64decode, base64.standard_b64decode, base64.urlsafe_b64decode, base64.b32decode, - base64.b16decode) + base64.b16decode, + base64.b85decode) for f in decode_funcs: self.assertRaises(ValueError, f, 'with non-ascii \xcb')