Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(32775)

Delta Between Two Patch Sets: Lib/base64.py

Issue 17839: base64 module should use memoryview
Left Patch Set: Created 6 years, 4 months ago
Right Patch Set: Created 6 years, 4 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Doc/library/codecs.rst ('k') | Lib/test/test_base64.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 2
3 """RFC 3548: Base16, Base32, Base64 Data Encodings""" 3 """RFC 3548: Base16, Base32, Base64 Data Encodings"""
4 4
5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module 5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module
6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
8 8
9 import re 9 import re
10 import struct 10 import struct
11 import binascii 11 import binascii
12 12
13 13
14 __all__ = [ 14 __all__ = [
15 # Legacy interface exports traditional RFC 1521 Base64 encodings 15 # Legacy interface exports traditional RFC 1521 Base64 encodings
16 'encode', 'decode', 'encodebytes', 'decodebytes', 16 'encode', 'decode', 'encodebytes', 'decodebytes',
17 # Generalized interface for other encodings 17 # Generalized interface for other encodings
18 'b64encode', 'b64decode', 'b32encode', 'b32decode', 18 'b64encode', 'b64decode', 'b32encode', 'b32decode',
19 'b16encode', 'b16decode', 19 'b16encode', 'b16decode',
20 # Standard Base64 encoding 20 # Standard Base64 encoding
21 'standard_b64encode', 'standard_b64decode', 21 'standard_b64encode', 'standard_b64decode',
22 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 22 # Some common Base64 alternatives. As referenced by RFC 3458, see thread
23 # starting at: 23 # starting at:
24 # 24 #
25 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 25 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
26 'urlsafe_b64encode', 'urlsafe_b64decode', 26 'urlsafe_b64encode', 'urlsafe_b64decode',
27 ] 27 ]
28 28
29 29
30 bytes_types = (bytes, bytearray, memoryview) # Types acceptable as binary data 30 bytes_types = (bytes, bytearray) # Types acceptable as binary data
31 31
32 def _bytes_from_decode_data(s): 32 def _bytes_from_decode_data(s):
33 if isinstance(s, str): 33 if isinstance(s, str):
34 try: 34 try:
35 return s.encode('ascii') 35 return s.encode('ascii')
36 except UnicodeEncodeError: 36 except UnicodeEncodeError:
37 raise ValueError('string argument should contain only ASCII characte rs') 37 raise ValueError('string argument should contain only ASCII characte rs')
38 elif isinstance(s, bytes_types): 38 if isinstance(s, bytes_types):
39 return s 39 return s
40 else: 40 try:
41 raise TypeError("argument should be bytes or ASCII string, not %s" % s._ _class__.__name__) 41 return memoryview(s).tobytes()
42 42 except TypeError:
43 raise TypeError("argument should be a bytes-like object or ASCII "
44 "string, not %r" % s.__class__.__name__) from None
43 45
44 46
45 # Base64 encoding/decoding uses binascii 47 # Base64 encoding/decoding uses binascii
46 48
47 def b64encode(s, altchars=None): 49 def b64encode(s, altchars=None):
48 """Encode a byte string using Base64. 50 """Encode a byte string using Base64.
49 51
50 s is the byte string to encode. Optional altchars must be a byte 52 s is the byte string to encode. Optional altchars must be a byte
51 string of length 2 which specifies an alternative alphabet for the 53 string of length 2 which specifies an alternative alphabet for the
52 '+' and '/' characters. This allows an application to 54 '+' and '/' characters. This allows an application to
53 e.g. generate url or filesystem safe Base64 strings. 55 e.g. generate url or filesystem safe Base64 strings.
54 56
55 The encoded byte string is returned. 57 The encoded byte string is returned.
56 """ 58 """
57 if not isinstance(s, bytes_types):
58 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
59 # Strip off the trailing newline 59 # Strip off the trailing newline
60 encoded = binascii.b2a_base64(s)[:-1] 60 encoded = binascii.b2a_base64(s)[:-1]
61 if altchars is not None: 61 if altchars is not None:
62 if not isinstance(altchars, bytes_types):
63 raise TypeError("expected bytes, not %s"
64 % altchars.__class__.__name__)
65 assert len(altchars) == 2, repr(altchars) 62 assert len(altchars) == 2, repr(altchars)
66 return encoded.translate(bytes.maketrans(b'+/', altchars)) 63 return encoded.translate(bytes.maketrans(b'+/', altchars))
67 return encoded 64 return encoded
68 65
69 66
70 def b64decode(s, altchars=None, validate=False): 67 def b64decode(s, altchars=None, validate=False):
71 """Decode a Base64 encoded byte string. 68 """Decode a Base64 encoded byte string.
72 69
73 s is the byte string to decode. Optional altchars must be a 70 s is the byte string to decode. Optional altchars must be a
74 string of length 2 which specifies the alternative alphabet used 71 string of length 2 which specifies the alternative alphabet used
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 128
132 The alphabet uses '-' instead of '+' and '_' instead of '/'. 129 The alphabet uses '-' instead of '+' and '_' instead of '/'.
133 """ 130 """
134 s = _bytes_from_decode_data(s) 131 s = _bytes_from_decode_data(s)
135 s = s.translate(_urlsafe_decode_translation) 132 s = s.translate(_urlsafe_decode_translation)
136 return b64decode(s) 133 return b64decode(s)
137 134
138 135
139 136
140 # Base32 encoding/decoding must be done in Python 137 # Base32 encoding/decoding must be done in Python
141 _b32alphabet = { 138 _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
142 0: b'A', 9: b'J', 18: b'S', 27: b'3', 139 _b32tab = [bytes([i]) for i in _b32alphabet]
143 1: b'B', 10: b'K', 19: b'T', 28: b'4', 140 _b32tab2 = [a + b for a in _b32tab for b in _b32tab]
144 2: b'C', 11: b'L', 20: b'U', 29: b'5', 141 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
145 3: b'D', 12: b'M', 21: b'V', 30: b'6',
146 4: b'E', 13: b'N', 22: b'W', 31: b'7',
147 5: b'F', 14: b'O', 23: b'X',
148 6: b'G', 15: b'P', 24: b'Y',
149 7: b'H', 16: b'Q', 25: b'Z',
150 8: b'I', 17: b'R', 26: b'2',
151 }
152
153 _b32tab = [v[0] for k, v in sorted(_b32alphabet.items())]
154 _b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()])
155
156 142
157 def b32encode(s): 143 def b32encode(s):
158 """Encode a byte string using Base32. 144 """Encode a byte string using Base32.
159 145
160 s is the byte string to encode. The encoded byte string is returned. 146 s is the byte string to encode. The encoded byte string is returned.
161 """ 147 """
162 if not isinstance(s, bytes_types): 148 if not isinstance(s, bytes_types):
163 raise TypeError("expected bytes, not %s" % s.__class__.__name__) 149 s = memoryview(s).tobytes()
164 if isinstance(s, memoryview): 150 leftover = len(s) % 5
165 s = s.tobytes()
166 quanta, leftover = divmod(len(s), 5)
167 # Pad the last quantum with zero bits if necessary 151 # Pad the last quantum with zero bits if necessary
168 if leftover: 152 if leftover:
169 s = s + bytes(5 - leftover) # Don't use += ! 153 s = s + bytes(5 - leftover) # Don't use += !
170 quanta += 1 154 encoded = bytearray()
171 encoded = bytes() 155 from_bytes = int.from_bytes
172 for i in range(quanta): 156 b32tab2 = _b32tab2
173 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this 157 for i in range(0, len(s), 5):
174 # code is to process the 40 bits in units of 5 bits. So we take the 1 158 c = from_bytes(s[i: i + 5], 'big')
175 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover 159 encoded += (b32tab2[c >> 30] + # bits 1 - 10
176 # bits of c2 and tack them onto c3. The shifts and masks are intended 160 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
177 # to give us values of exactly 5 bits in width. 161 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
178 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) 162 b32tab2[c & 0x3ff] # bits 31 - 40
179 c2 += (c1 & 1) << 16 # 17 bits wide 163 )
180 c3 += (c2 & 3) << 8 # 10 bits wide
181 encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5
182 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
183 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
184 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
185 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
186 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
187 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
188 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
189 ])
190 # Adjust for any leftover partial quanta 164 # Adjust for any leftover partial quanta
191 if leftover == 1: 165 if leftover == 1:
192 return encoded[:-6] + b'======' 166 encoded[-6:] = b'======'
193 elif leftover == 2: 167 elif leftover == 2:
194 return encoded[:-4] + b'====' 168 encoded[-4:] = b'===='
195 elif leftover == 3: 169 elif leftover == 3:
196 return encoded[:-3] + b'===' 170 encoded[-3:] = b'==='
197 elif leftover == 4: 171 elif leftover == 4:
198 return encoded[:-1] + b'=' 172 encoded[-1:] = b'='
199 return encoded 173 return bytes(encoded)
200
201 174
202 def b32decode(s, casefold=False, map01=None): 175 def b32decode(s, casefold=False, map01=None):
203 """Decode a Base32 encoded byte string. 176 """Decode a Base32 encoded byte string.
204 177
205 s is the byte string to decode. Optional casefold is a flag 178 s is the byte string to decode. Optional casefold is a flag
206 specifying whether a lowercase alphabet is acceptable as input. 179 specifying whether a lowercase alphabet is acceptable as input.
207 For security purposes, the default is False. 180 For security purposes, the default is False.
208 181
209 RFC 3548 allows for optional mapping of the digit 0 (zero) to the 182 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
210 letter O (oh), and for optional mapping of the digit 1 (one) to 183 letter O (oh), and for optional mapping of the digit 1 (one) to
211 either the letter I (eye) or letter L (el). The optional argument 184 either the letter I (eye) or letter L (el). The optional argument
212 map01 when not None, specifies which letter the digit 1 should be 185 map01 when not None, specifies which letter the digit 1 should be
213 mapped to (when map01 is not None, the digit 0 is always mapped to 186 mapped to (when map01 is not None, the digit 0 is always mapped to
214 the letter O). For security purposes the default is None, so that 187 the letter O). For security purposes the default is None, so that
215 0 and 1 are not allowed in the input. 188 0 and 1 are not allowed in the input.
216 189
217 The decoded byte string is returned. binascii.Error is raised if 190 The decoded byte string is returned. binascii.Error is raised if
218 the input is incorrectly padded or if there are non-alphabet 191 the input is incorrectly padded or if there are non-alphabet
219 characters present in the input. 192 characters present in the input.
220 """ 193 """
221 s = _bytes_from_decode_data(s) 194 s = _bytes_from_decode_data(s)
222 quanta, leftover = divmod(len(s), 8) 195 if len(s) % 8:
223 if leftover:
224 raise binascii.Error('Incorrect padding') 196 raise binascii.Error('Incorrect padding')
225 # Handle section 2.4 zero and one mapping. The flag map01 will be either 197 # Handle section 2.4 zero and one mapping. The flag map01 will be either
226 # False, or the character to map the digit 1 (one) to. It should be 198 # False, or the character to map the digit 1 (one) to. It should be
227 # either L (el) or I (eye). 199 # either L (el) or I (eye).
228 if map01 is not None: 200 if map01 is not None:
229 map01 = _bytes_from_decode_data(map01) 201 map01 = _bytes_from_decode_data(map01)
230 assert len(map01) == 1, repr(map01) 202 assert len(map01) == 1, repr(map01)
231 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 203 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
232 if casefold: 204 if casefold:
233 s = s.upper() 205 s = s.upper()
234 # Strip off pad characters from the right. We need to count the pad 206 # Strip off pad characters from the right. We need to count the pad
235 # characters because this will tell us how many null bytes to remove from 207 # characters because this will tell us how many null bytes to remove from
236 # the end of the decoded string. 208 # the end of the decoded string.
237 padchars = 0 209 l = len(s)
238 mo = re.search(b'(?P<pad>[=]*)$', s) 210 s = s.rstrip(b'=')
239 if mo: 211 padchars = l - len(s)
240 padchars = len(mo.group('pad'))
241 if padchars > 0:
242 s = s[:-padchars]
243 # Now decode the full quanta 212 # Now decode the full quanta
244 parts = [] 213 decoded = bytearray()
245 acc = 0 214 b32rev = _b32rev
246 shift = 35 215 for i in range(0, len(s), 8):
247 for c in s: 216 quanta = s[i: i + 8]
248 val = _b32rev.get(c) 217 acc = 0
249 if val is None: 218 try:
219 for c in quanta:
220 acc = (acc << 5) + b32rev[c]
221 except KeyError:
250 raise TypeError('Non-base32 digit found') 222 raise TypeError('Non-base32 digit found')
251 acc += _b32rev[c] << shift 223 decoded += acc.to_bytes(5, 'big')
252 shift -= 5
253 if shift < 0:
254 parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii")))
255 acc = 0
256 shift = 35
257 # Process the last, partial quanta 224 # Process the last, partial quanta
258 last = binascii.unhexlify(bytes('%010x' % acc, "ascii")) 225 if padchars:
259 if padchars == 0: 226 acc <<= 5 * padchars
260 last = b'' # No characters 227 last = acc.to_bytes(5, 'big')
261 elif padchars == 1: 228 if padchars == 1:
262 last = last[:-1] 229 decoded[-5:] = last[:-1]
263 elif padchars == 3: 230 elif padchars == 3:
264 last = last[:-2] 231 decoded[-5:] = last[:-2]
265 elif padchars == 4: 232 elif padchars == 4:
266 last = last[:-3] 233 decoded[-5:] = last[:-3]
267 elif padchars == 6: 234 elif padchars == 6:
268 last = last[:-4] 235 decoded[-5:] = last[:-4]
269 else: 236 else:
270 raise binascii.Error('Incorrect padding') 237 raise binascii.Error('Incorrect padding')
271 parts.append(last) 238 return bytes(decoded)
272 return b''.join(parts)
273 239
274 240
275 241
276 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 242 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
277 # lowercase. The RFC also recommends against accepting input case 243 # lowercase. The RFC also recommends against accepting input case
278 # insensitively. 244 # insensitively.
279 def b16encode(s): 245 def b16encode(s):
280 """Encode a byte string using Base16. 246 """Encode a byte string using Base16.
281 247
282 s is the byte string to encode. The encoded byte string is returned. 248 s is the byte string to encode. The encoded byte string is returned.
283 """ 249 """
284 if not isinstance(s, bytes_types):
285 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
286 return binascii.hexlify(s).upper() 250 return binascii.hexlify(s).upper()
287 251
288 252
289 def b16decode(s, casefold=False): 253 def b16decode(s, casefold=False):
290 """Decode a Base16 encoded byte string. 254 """Decode a Base16 encoded byte string.
291 255
292 s is the byte string to decode. Optional casefold is a flag 256 s is the byte string to decode. Optional casefold is a flag
293 specifying whether a lowercase alphabet is acceptable as input. 257 specifying whether a lowercase alphabet is acceptable as input.
294 For security purposes, the default is False. 258 For security purposes, the default is False.
295 259
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
330 294
331 def decode(input, output): 295 def decode(input, output):
332 """Decode a file; input and output are binary files.""" 296 """Decode a file; input and output are binary files."""
333 while True: 297 while True:
334 line = input.readline() 298 line = input.readline()
335 if not line: 299 if not line:
336 break 300 break
337 s = binascii.a2b_base64(line) 301 s = binascii.a2b_base64(line)
338 output.write(s) 302 output.write(s)
339 303
304 def _input_type_check(s):
305 try:
306 memoryview(s)
307 except TypeError as err:
308 msg = "expected bytes-like object, not %s" % s.__class__.__name__
309 raise TypeError(msg) from err
340 310
341 def encodebytes(s): 311 def encodebytes(s):
342 """Encode a bytestring into a bytestring containing multiple lines 312 """Encode a bytestring into a bytestring containing multiple lines
343 of base-64 data.""" 313 of base-64 data."""
344 if not isinstance(s, bytes_types): 314 _input_type_check(s)
345 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
346 pieces = [] 315 pieces = []
347 for i in range(0, len(s), MAXBINSIZE): 316 for i in range(0, len(s), MAXBINSIZE):
348 chunk = s[i : i + MAXBINSIZE] 317 chunk = s[i : i + MAXBINSIZE]
349 pieces.append(binascii.b2a_base64(chunk)) 318 pieces.append(binascii.b2a_base64(chunk))
350 return b"".join(pieces) 319 return b"".join(pieces)
351 320
352 def encodestring(s): 321 def encodestring(s):
353 """Legacy alias of encodebytes().""" 322 """Legacy alias of encodebytes()."""
354 import warnings 323 import warnings
355 warnings.warn("encodestring() is a deprecated alias, use encodebytes()", 324 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
356 DeprecationWarning, 2) 325 DeprecationWarning, 2)
357 return encodebytes(s) 326 return encodebytes(s)
358 327
359 328
360 def decodebytes(s): 329 def decodebytes(s):
361 """Decode a bytestring of base-64 data into a bytestring.""" 330 """Decode a bytestring of base-64 data into a bytestring."""
362 if not isinstance(s, bytes_types): 331 _input_type_check(s)
363 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
364 return binascii.a2b_base64(s) 332 return binascii.a2b_base64(s)
365 333
366 def decodestring(s): 334 def decodestring(s):
367 """Legacy alias of decodebytes().""" 335 """Legacy alias of decodebytes()."""
368 import warnings 336 import warnings
369 warnings.warn("decodestring() is a deprecated alias, use decodebytes()", 337 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
370 DeprecationWarning, 2) 338 DeprecationWarning, 2)
371 return decodebytes(s) 339 return decodebytes(s)
372 340
373 341
(...skipping 29 matching lines...) Expand all
403 print(repr(s0)) 371 print(repr(s0))
404 s1 = encodebytes(s0) 372 s1 = encodebytes(s0)
405 print(repr(s1)) 373 print(repr(s1))
406 s2 = decodebytes(s1) 374 s2 = decodebytes(s1)
407 print(repr(s2)) 375 print(repr(s2))
408 assert s0 == s2 376 assert s0 == s2
409 377
410 378
411 if __name__ == '__main__': 379 if __name__ == '__main__':
412 main() 380 main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+