Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(13336)

Delta Between Two Patch Sets: Lib/base64.py

Issue 17839: base64 module should use memoryview
Left Patch Set: Created 6 years, 4 months ago
Right Patch Set: Created 6 years, 3 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Doc/library/codecs.rst ('k') | Lib/test/test_base64.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 2
3 """RFC 3548: Base16, Base32, Base64 Data Encodings""" 3 """RFC 3548: Base16, Base32, Base64 Data Encodings"""
4 4
5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module 5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module
6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
8 8
9 import re 9 import re
10 import struct 10 import struct
(...skipping 20 matching lines...) Expand all
31 31
32 def _bytes_from_decode_data(s): 32 def _bytes_from_decode_data(s):
33 if isinstance(s, str): 33 if isinstance(s, str):
34 try: 34 try:
35 return s.encode('ascii') 35 return s.encode('ascii')
36 except UnicodeEncodeError: 36 except UnicodeEncodeError:
37 raise ValueError('string argument should contain only ASCII characte rs') 37 raise ValueError('string argument should contain only ASCII characte rs')
38 if isinstance(s, bytes_types): 38 if isinstance(s, bytes_types):
39 return s 39 return s
40 try: 40 try:
41 return memoryview(s).tobytes() 41 return memoryview(s).tobytes()
Nick Coghlan 2013/05/19 13:22:58 While I don't believe the current incarnation of t
42 except TypeError: 42 except TypeError:
43 raise TypeError("argument should be bytes-like object or ASCII string, " 43 raise TypeError("argument should be a bytes-like object or ASCII "
ezio.melotti 2013/05/07 09:38:03 should be a bytes-like object
44 "not %s" % s.__class__.__name__) from None 44 "string, not %r" % s.__class__.__name__) from None
45 45
46 46
47 # Base64 encoding/decoding uses binascii 47 # Base64 encoding/decoding uses binascii
48 48
49 def b64encode(s, altchars=None): 49 def b64encode(s, altchars=None):
50 """Encode a byte string using Base64. 50 """Encode a byte string using Base64.
51 51
52 s is the byte string to encode. Optional altchars must be a byte 52 s is the byte string to encode. Optional altchars must be a byte
53 string of length 2 which specifies an alternative alphabet for the 53 string of length 2 which specifies an alternative alphabet for the
54 '+' and '/' characters. This allows an application to 54 '+' and '/' characters. This allows an application to
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
128 128
129 The alphabet uses '-' instead of '+' and '_' instead of '/'. 129 The alphabet uses '-' instead of '+' and '_' instead of '/'.
130 """ 130 """
131 s = _bytes_from_decode_data(s) 131 s = _bytes_from_decode_data(s)
132 s = s.translate(_urlsafe_decode_translation) 132 s = s.translate(_urlsafe_decode_translation)
133 return b64decode(s) 133 return b64decode(s)
134 134
135 135
136 136
137 # Base32 encoding/decoding must be done in Python 137 # Base32 encoding/decoding must be done in Python
138 _b32alphabet = { 138 _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
139 0: b'A', 9: b'J', 18: b'S', 27: b'3', 139 _b32tab = [bytes([i]) for i in _b32alphabet]
140 1: b'B', 10: b'K', 19: b'T', 28: b'4', 140 _b32tab2 = [a + b for a in _b32tab for b in _b32tab]
141 2: b'C', 11: b'L', 20: b'U', 29: b'5', 141 _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
142 3: b'D', 12: b'M', 21: b'V', 30: b'6',
143 4: b'E', 13: b'N', 22: b'W', 31: b'7',
144 5: b'F', 14: b'O', 23: b'X',
145 6: b'G', 15: b'P', 24: b'Y',
146 7: b'H', 16: b'Q', 25: b'Z',
147 8: b'I', 17: b'R', 26: b'2',
148 }
149
150 _b32tab = [v[0] for k, v in sorted(_b32alphabet.items())]
151 _b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()])
152
153 142
154 def b32encode(s): 143 def b32encode(s):
155 """Encode a byte string using Base32. 144 """Encode a byte string using Base32.
156 145
157 s is the byte string to encode. The encoded byte string is returned. 146 s is the byte string to encode. The encoded byte string is returned.
158 """ 147 """
159 if not isinstance(s, bytes_types): 148 if not isinstance(s, bytes_types):
160 try: 149 s = memoryview(s).tobytes()
161 s = memoryview(s).tobytes() 150 leftover = len(s) % 5
Nick Coghlan 2013/05/19 13:22:58 Same comment as above - doing the tobytes() call i
162 except TypeError:
163 raise TypeError("expected bytes-like object, not %s" %
ezio.melotti 2013/05/07 09:38:03 This (and the previous one) could be %r instead of
164 s.__class__.__name__) from None
165 quanta, leftover = divmod(len(s), 5)
166 # Pad the last quantum with zero bits if necessary 151 # Pad the last quantum with zero bits if necessary
167 if leftover: 152 if leftover:
168 s = s + bytes(5 - leftover) # Don't use += ! 153 s = s + bytes(5 - leftover) # Don't use += !
169 quanta += 1 154 encoded = bytearray()
170 encoded = bytes() 155 from_bytes = int.from_bytes
171 for i in range(quanta): 156 b32tab2 = _b32tab2
172 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this 157 for i in range(0, len(s), 5):
173 # code is to process the 40 bits in units of 5 bits. So we take the 1 158 c = from_bytes(s[i: i + 5], 'big')
174 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover 159 encoded += (b32tab2[c >> 30] + # bits 1 - 10
175 # bits of c2 and tack them onto c3. The shifts and masks are intended 160 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
176 # to give us values of exactly 5 bits in width. 161 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
177 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) 162 b32tab2[c & 0x3ff] # bits 31 - 40
178 c2 += (c1 & 1) << 16 # 17 bits wide 163 )
179 c3 += (c2 & 3) << 8 # 10 bits wide
180 encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5
181 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
182 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
183 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
184 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
185 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
186 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
187 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
188 ])
189 # Adjust for any leftover partial quanta 164 # Adjust for any leftover partial quanta
190 if leftover == 1: 165 if leftover == 1:
191 return encoded[:-6] + b'======' 166 encoded[-6:] = b'======'
192 elif leftover == 2: 167 elif leftover == 2:
193 return encoded[:-4] + b'====' 168 encoded[-4:] = b'===='
194 elif leftover == 3: 169 elif leftover == 3:
195 return encoded[:-3] + b'===' 170 encoded[-3:] = b'==='
196 elif leftover == 4: 171 elif leftover == 4:
197 return encoded[:-1] + b'=' 172 encoded[-1:] = b'='
198 return encoded 173 return bytes(encoded)
199
200 174
201 def b32decode(s, casefold=False, map01=None): 175 def b32decode(s, casefold=False, map01=None):
202 """Decode a Base32 encoded byte string. 176 """Decode a Base32 encoded byte string.
203 177
204 s is the byte string to decode. Optional casefold is a flag 178 s is the byte string to decode. Optional casefold is a flag
205 specifying whether a lowercase alphabet is acceptable as input. 179 specifying whether a lowercase alphabet is acceptable as input.
206 For security purposes, the default is False. 180 For security purposes, the default is False.
207 181
208 RFC 3548 allows for optional mapping of the digit 0 (zero) to the 182 RFC 3548 allows for optional mapping of the digit 0 (zero) to the
209 letter O (oh), and for optional mapping of the digit 1 (one) to 183 letter O (oh), and for optional mapping of the digit 1 (one) to
210 either the letter I (eye) or letter L (el). The optional argument 184 either the letter I (eye) or letter L (el). The optional argument
211 map01 when not None, specifies which letter the digit 1 should be 185 map01 when not None, specifies which letter the digit 1 should be
212 mapped to (when map01 is not None, the digit 0 is always mapped to 186 mapped to (when map01 is not None, the digit 0 is always mapped to
213 the letter O). For security purposes the default is None, so that 187 the letter O). For security purposes the default is None, so that
214 0 and 1 are not allowed in the input. 188 0 and 1 are not allowed in the input.
215 189
216 The decoded byte string is returned. binascii.Error is raised if 190 The decoded byte string is returned. binascii.Error is raised if
217 the input is incorrectly padded or if there are non-alphabet 191 the input is incorrectly padded or if there are non-alphabet
218 characters present in the input. 192 characters present in the input.
219 """ 193 """
220 s = _bytes_from_decode_data(s) 194 s = _bytes_from_decode_data(s)
221 quanta, leftover = divmod(len(s), 8) 195 if len(s) % 8:
222 if leftover:
223 raise binascii.Error('Incorrect padding') 196 raise binascii.Error('Incorrect padding')
224 # Handle section 2.4 zero and one mapping. The flag map01 will be either 197 # Handle section 2.4 zero and one mapping. The flag map01 will be either
225 # False, or the character to map the digit 1 (one) to. It should be 198 # False, or the character to map the digit 1 (one) to. It should be
226 # either L (el) or I (eye). 199 # either L (el) or I (eye).
227 if map01 is not None: 200 if map01 is not None:
228 map01 = _bytes_from_decode_data(map01) 201 map01 = _bytes_from_decode_data(map01)
229 assert len(map01) == 1, repr(map01) 202 assert len(map01) == 1, repr(map01)
230 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 203 s = s.translate(bytes.maketrans(b'01', b'O' + map01))
231 if casefold: 204 if casefold:
232 s = s.upper() 205 s = s.upper()
233 # Strip off pad characters from the right. We need to count the pad 206 # Strip off pad characters from the right. We need to count the pad
234 # characters because this will tell us how many null bytes to remove from 207 # characters because this will tell us how many null bytes to remove from
235 # the end of the decoded string. 208 # the end of the decoded string.
236 padchars = 0 209 l = len(s)
237 mo = re.search(b'(?P<pad>[=]*)$', s) 210 s = s.rstrip(b'=')
238 if mo: 211 padchars = l - len(s)
239 padchars = len(mo.group('pad'))
240 if padchars > 0:
241 s = s[:-padchars]
242 # Now decode the full quanta 212 # Now decode the full quanta
243 parts = [] 213 decoded = bytearray()
244 acc = 0 214 b32rev = _b32rev
245 shift = 35 215 for i in range(0, len(s), 8):
246 for c in s: 216 quanta = s[i: i + 8]
247 val = _b32rev.get(c) 217 acc = 0
248 if val is None: 218 try:
219 for c in quanta:
220 acc = (acc << 5) + b32rev[c]
221 except KeyError:
249 raise TypeError('Non-base32 digit found') 222 raise TypeError('Non-base32 digit found')
250 acc += _b32rev[c] << shift 223 decoded += acc.to_bytes(5, 'big')
251 shift -= 5
252 if shift < 0:
253 parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii")))
254 acc = 0
255 shift = 35
256 # Process the last, partial quanta 224 # Process the last, partial quanta
257 last = binascii.unhexlify(bytes('%010x' % acc, "ascii")) 225 if padchars:
258 if padchars == 0: 226 acc <<= 5 * padchars
259 last = b'' # No characters 227 last = acc.to_bytes(5, 'big')
260 elif padchars == 1: 228 if padchars == 1:
261 last = last[:-1] 229 decoded[-5:] = last[:-1]
262 elif padchars == 3: 230 elif padchars == 3:
263 last = last[:-2] 231 decoded[-5:] = last[:-2]
264 elif padchars == 4: 232 elif padchars == 4:
265 last = last[:-3] 233 decoded[-5:] = last[:-3]
266 elif padchars == 6: 234 elif padchars == 6:
267 last = last[:-4] 235 decoded[-5:] = last[:-4]
268 else: 236 else:
269 raise binascii.Error('Incorrect padding') 237 raise binascii.Error('Incorrect padding')
270 parts.append(last) 238 return bytes(decoded)
271 return b''.join(parts)
272 239
273 240
274 241
275 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 242 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
276 # lowercase. The RFC also recommends against accepting input case 243 # lowercase. The RFC also recommends against accepting input case
277 # insensitively. 244 # insensitively.
278 def b16encode(s): 245 def b16encode(s):
279 """Encode a byte string using Base16. 246 """Encode a byte string using Base16.
280 247
281 s is the byte string to encode. The encoded byte string is returned. 248 s is the byte string to encode. The encoded byte string is returned.
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
327 294
328 def decode(input, output): 295 def decode(input, output):
329 """Decode a file; input and output are binary files.""" 296 """Decode a file; input and output are binary files."""
330 while True: 297 while True:
331 line = input.readline() 298 line = input.readline()
332 if not line: 299 if not line:
333 break 300 break
334 s = binascii.a2b_base64(line) 301 s = binascii.a2b_base64(line)
335 output.write(s) 302 output.write(s)
336 303
304 def _input_type_check(s):
305 try:
306 memoryview(s)
307 except TypeError as err:
308 msg = "expected bytes-like object, not %s" % s.__class__.__name__
309 raise TypeError(msg) from err
337 310
338 def encodebytes(s): 311 def encodebytes(s):
339 """Encode a bytestring into a bytestring containing multiple lines 312 """Encode a bytestring into a bytestring containing multiple lines
340 of base-64 data.""" 313 of base-64 data."""
341 if not isinstance(s, bytes_types): 314 _input_type_check(s)
342 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
343 pieces = [] 315 pieces = []
344 for i in range(0, len(s), MAXBINSIZE): 316 for i in range(0, len(s), MAXBINSIZE):
345 chunk = s[i : i + MAXBINSIZE] 317 chunk = s[i : i + MAXBINSIZE]
346 pieces.append(binascii.b2a_base64(chunk)) 318 pieces.append(binascii.b2a_base64(chunk))
347 return b"".join(pieces) 319 return b"".join(pieces)
348 320
349 def encodestring(s): 321 def encodestring(s):
350 """Legacy alias of encodebytes().""" 322 """Legacy alias of encodebytes()."""
351 import warnings 323 import warnings
352 warnings.warn("encodestring() is a deprecated alias, use encodebytes()", 324 warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
353 DeprecationWarning, 2) 325 DeprecationWarning, 2)
354 return encodebytes(s) 326 return encodebytes(s)
355 327
356 328
357 def decodebytes(s): 329 def decodebytes(s):
358 """Decode a bytestring of base-64 data into a bytestring.""" 330 """Decode a bytestring of base-64 data into a bytestring."""
359 if not isinstance(s, bytes_types): 331 _input_type_check(s)
360 raise TypeError("expected bytes, not %s" % s.__class__.__name__)
361 return binascii.a2b_base64(s) 332 return binascii.a2b_base64(s)
362 333
363 def decodestring(s): 334 def decodestring(s):
364 """Legacy alias of decodebytes().""" 335 """Legacy alias of decodebytes()."""
365 import warnings 336 import warnings
366 warnings.warn("decodestring() is a deprecated alias, use decodebytes()", 337 warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
367 DeprecationWarning, 2) 338 DeprecationWarning, 2)
368 return decodebytes(s) 339 return decodebytes(s)
369 340
370 341
(...skipping 29 matching lines...) Expand all
400 print(repr(s0)) 371 print(repr(s0))
401 s1 = encodebytes(s0) 372 s1 = encodebytes(s0)
402 print(repr(s1)) 373 print(repr(s1))
403 s2 = decodebytes(s1) 374 s2 = decodebytes(s1)
404 print(repr(s2)) 375 print(repr(s2))
405 assert s0 == s2 376 assert s0 == s2
406 377
407 378
408 if __name__ == '__main__': 379 if __name__ == '__main__':
409 main() 380 main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+