diff -r 2012e85638d9 Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1218,6 +1218,78 @@ class _Unpickler: raise _Stop(value) dispatch[STOP[0]] = load_stop +# Framing streams +# A framing stream starts with the magic PYFRAMES, followed +# by an arbitrary number of frame blocks. Each frame block starts +# with a 64-bit big endian block size, followed by the actual +# block data. No specific end marker is provided; the unpickler +# using this simply stops reading more data eventually. +PYFRAMES_MAGIC = b'PYFRAMES' +class FramingWriter(io.IOBase): + def __init__(self, raw, framesize=64000): + self.raw = raw + self.framesize = framesize + self.blocks = [] + self.size = 0 + raw.write(PYFRAMES_MAGIC) + + def flush(self): + self.raw.write(self.size.to_bytes(8, 'big')) + self.raw.writelines(self.blocks) + self.size = 0 + self.blocks = [] + + def write(self, data): + self.blocks.append(data) + self.size += len(data) + if self.size > self.framesize: + self.flush() + return len(data) + +class FramingReader(io.IOBase): + def __init__(self, raw): + self.raw = raw + header = raw.read(8) + if header != PYFRAMES_MAGIC: + self._framing_error() + self.buffer = b'' + self.offset = 0 + + def _framing_error(self): + raise ValueError("Error in framing structure") + + def _fill(self): + """Fill the buffer with another frame. + Return true if more data have been read.""" + + # Clear any data that have been consumed already + if self.offset: + self.buffer = self.buffer[self.offset:] + self.offset = 0 + framesize = self.raw.read(8) + if len(framesize) == 0: + return False + if len(framesize) < 8: + self._framing_error() + framesize = int.from_bytes(framesize, 'big') + data = self.raw.read(framesize) + if len(data) != framesize: + self._framing_error() + self.buffer = self.buffer + data + return True + + def peek(self, n): + "Try to fill the buffer with n bytes." + while self.offset + n > len(self.buffer): + if not self._fill(): + break + return self.buffer[self.offset:self.offset+n] + + def read(self, n): + result = self.peek(n) + self.offset += len(result) + return result + # Encode/decode ints. def encode_long(x): diff -r 2012e85638d9 Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1242,6 +1242,22 @@ class AbstractPickleTests(unittest.TestC else: self._check_pickling_with_opcode(obj, pickle.SETITEMS, proto) + def test_framing(self): + data = list(range(1000)) + for proto in protocols: + raw = io.BytesIO() + # use small frame size to exercise framing + framed = pickle.FramingWriter(raw, framesize=32) + self.pickler(framed, proto).dump(data) + framed.flush() + raw.seek(0) + pickled = raw.read() + + raw = io.BytesIO(pickled) + framed = pickle.FramingReader(raw) + obj = self.unpickler(framed).load() + + self.assertEquals(obj, data) class BigmemPickleTests(unittest.TestCase):