diff -r 4d4277941a45 Lib/pickle.py --- a/Lib/pickle.py Fri Apr 12 23:30:59 2013 +0300 +++ b/Lib/pickle.py Sat Apr 13 16:18:15 2013 +0300 @@ -26,9 +26,10 @@ from types import FunctionType, BuiltinFunctionType from copyreg import dispatch_table from copyreg import _extension_registry, _inverted_registry, _extension_cache -import marshal +from itertools import islice import sys -import struct +from sys import maxsize +from struct import pack, unpack import re import io import codecs @@ -58,11 +59,6 @@ # there are too many issues with that. DEFAULT_PROTOCOL = 3 -# Why use struct.pack() for pickling but marshal.loads() for -# unpickling? struct.pack() is 40% faster than marshal.dumps(), but -# marshal.loads() is twice as fast as struct.unpack()! -mloads = marshal.loads - class PickleError(Exception): """A common base class for the other pickling exceptions.""" pass @@ -231,7 +227,7 @@ raise PicklingError("Pickler.__init__() was not called by " "%s.__init__()" % (self.__class__.__name__,)) if self.proto >= 2: - self.write(PROTO + bytes([self.proto])) + self.write(PROTO + pack("= 2: - self.write(obj and NEWTRUE or NEWFALSE) + self.write(NEWTRUE if obj else NEWFALSE) else: - self.write(obj and TRUE or FALSE) + self.write(TRUE if obj else FALSE) dispatch[bool] = save_bool - def save_long(self, obj, pack=struct.pack): + def save_long(self, obj): if self.bin: # If the int is small enough to fit in a signed 4-byte 2's-comp # format, we can store it more efficiently than the general @@ -461,39 +457,36 @@ # First one- and two-byte unsigned ints: if obj >= 0: if obj <= 0xff: - self.write(BININT1 + bytes([obj])) + self.write(BININT1 + pack(">8])) + self.write(BININT2 + pack("> 31 # note that Python shift sign-extends - if high_bits == 0 or high_bits == -1: - # All high bits are copies of bit 2**31, so the value - # fits in a 4-byte signed int. + if -0x80000000 <= obj <= 0x7fffffff: self.write(BININT + pack("= 2: encoded = encode_long(obj) n = len(encoded) if n < 256: - self.write(LONG1 + bytes([n]) + encoded) + self.write(LONG1 + pack("d', obj)) else: self.write(FLOAT + repr(obj).encode("ascii") + b'\n') dispatch[float] = save_float - def save_bytes(self, obj, pack=struct.pack): + def save_bytes(self, obj): if self.proto < 3: - if len(obj) == 0: + if not obj: # bytes object is empty self.save_reduce(bytes, (), obj=obj) else: self.save_reduce(codecs.encode, @@ -501,13 +494,13 @@ return n = len(obj) if n < 256: - self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj)) + self.write(SHORT_BINBYTES + pack("= 2: + if n <= 3 and self.proto >= 2: for element in obj: save(element) # Subtle. Same as in the big comment below. if id(obj) in memo: get = self.get(memo[id(obj)][0]) - write(POP * n + get) + self.write(POP * n + get) else: - write(_tuplesize2code[n]) + self.write(_tuplesize2code[n]) self.memoize(obj) return # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple # has more than 3 elements. + write = self.write write(MARK) for element in obj: save(element) @@ -561,25 +551,23 @@ # could have been done in the "for element" loop instead, but # recursive tuples are a rare thing. get = self.get(memo[id(obj)][0]) - if proto: + if self.bin: write(POP_MARK + get) else: # proto 0 -- POP_MARK not available write(POP * (n+1) + get) return # No recursion. - self.write(TUPLE) + write(TUPLE) self.memoize(obj) dispatch[tuple] = save_tuple def save_list(self, obj): - write = self.write - if self.bin: - write(EMPTY_LIST) + self.write(EMPTY_LIST) else: # proto 0 -- can't use EMPTY_LIST - write(MARK + LIST) + self.write(MARK + LIST) self.memoize(obj) self._batch_appends(obj) @@ -599,17 +587,9 @@ write(APPEND) return - items = iter(items) - r = range(self._BATCHSIZE) - while items is not None: - tmp = [] - for i in r: - try: - x = next(items) - tmp.append(x) - except StopIteration: - items = None - break + it = iter(items) + while True: + tmp = list(islice(it, self._BATCHSIZE)) n = len(tmp) if n > 1: write(MARK) @@ -620,14 +600,14 @@ save(tmp[0]) write(APPEND) # else tmp is empty, and we're done + if n < self._BATCHSIZE: + return def save_dict(self, obj): - write = self.write - if self.bin: - write(EMPTY_DICT) + self.write(EMPTY_DICT) else: # proto 0 -- can't use EMPTY_DICT - write(MARK + DICT) + self.write(MARK + DICT) self.memoize(obj) self._batch_setitems(obj.items()) @@ -648,16 +628,9 @@ write(SETITEM) return - items = iter(items) - r = range(self._BATCHSIZE) - while items is not None: - tmp = [] - for i in r: - try: - tmp.append(next(items)) - except StopIteration: - items = None - break + it = iter(items) + while True: + tmp = list(islice(it, self._BATCHSIZE)) n = len(tmp) if n > 1: write(MARK) @@ -671,8 +644,10 @@ save(v) write(SETITEM) # else tmp is empty, and we're done + if n < self._BATCHSIZE: + return - def save_global(self, obj, name=None, pack=struct.pack): + def save_global(self, obj, name=None): write = self.write memo = self.memo @@ -702,9 +677,9 @@ if code: assert code > 0 if code <= 0xff: - write(EXT1 + bytes([code])) + write(EXT1 + pack(">8])) + write(EXT2 + pack("d', self.read(8))[0]) dispatch[BINFLOAT[0]] = load_binfloat def load_string(self): orig = self.readline() rep = orig[:-1] - for q in (b'"', b"'"): # double or single quote - if rep.startswith(q): - if not rep.endswith(q): - raise ValueError("insecure string pickle") - rep = rep[len(q):-len(q)] - break + # Strip outermost quotes + if rep[0] == rep[-1] and rep[0] in b'"\'': + rep = rep[1:-1] else: - raise ValueError("insecure string pickle: %r" % orig) + raise ValueError("insecure string pickle") self.append(codecs.escape_decode(rep)[0] .decode(self.encoding, self.errors)) dispatch[STRING[0]] = load_string def load_binstring(self): # Deprecated BINSTRING uses signed 32-bit length - len = mloads(b'i' + self.read(4)) + len, = unpack(' maxsize: - raise UnpicklingError("BINBYTES exceeds system's maximum size of %d bytes" % maxsize); + raise UnpicklingError("BINBYTES exceeds system's maximum size " + "of %d bytes" % maxsize) self.append(self.read(len)) dispatch[BINBYTES[0]] = load_binbytes @@ -982,23 +952,24 @@ self.append(str(self.readline()[:-1], 'raw-unicode-escape')) dispatch[UNICODE[0]] = load_unicode - def load_binunicode(self, unpack=struct.unpack, maxsize=sys.maxsize): + def load_binunicode(self): len, = unpack(' maxsize: - raise UnpicklingError("BINUNICODE exceeds system's maximum size of %d bytes" % maxsize); + raise UnpicklingError("BINUNICODE exceeds system's maximum size " + "of %d bytes" % maxsize) self.append(str(self.read(len), 'utf-8', 'surrogatepass')) dispatch[BINUNICODE[0]] = load_binunicode def load_short_binstring(self): - len = ord(self.read(1)) - data = bytes(self.read(len)) + len = self.read(1)[0] + data = self.read(len) value = str(data, self.encoding, self.errors) self.append(value) dispatch[SHORT_BINSTRING[0]] = load_short_binstring def load_short_binbytes(self): - len = ord(self.read(1)) - self.append(bytes(self.read(len))) + len = self.read(1)[0] + self.append(self.read(len)) dispatch[SHORT_BINBYTES[0]] = load_short_binbytes def load_tuple(self): @@ -1037,12 +1008,9 @@ def load_dict(self): k = self.marker() - d = {} items = self.stack[k+1:] - for i in range(0, len(items), 2): - key = items[i] - value = items[i+1] - d[key] = value + d = {items[i]: items[i+1] + for i in range(0, len(items), 2)} self.stack[k:] = [d] dispatch[DICT[0]] = load_dict @@ -1094,17 +1062,17 @@ dispatch[GLOBAL[0]] = load_global def load_ext1(self): - code = ord(self.read(1)) + code = self.read(1)[0] self.get_extension(code) dispatch[EXT1[0]] = load_ext1 def load_ext2(self): - code = mloads(b'i' + self.read(2) + b'\000\000') + code, = unpack(' maxsize: raise ValueError("negative LONG_BINPUT argument") @@ -1236,7 +1204,7 @@ state = stack.pop() inst = stack[-1] setstate = getattr(inst, "__setstate__", None) - if setstate: + if setstate is not None: setstate(state) return slotstate = None