diff -r cb392bce91e6 -r 9f1be171da08 Lib/pickle.py --- a/Lib/pickle.py Thu Apr 18 09:41:34 2013 +0200 +++ b/Lib/pickle.py Thu Apr 18 03:24:00 2013 -0700 @@ -42,17 +42,18 @@ bytes_types = (bytes, bytearray) # These are purely informational; no code uses these. -format_version = "3.0" # File format version we write +format_version = "4.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 "1.1", # Protocol 0 with INST added "1.2", # Original protocol 1 "1.3", # Protocol 1 with BINFLOAT added "2.0", # Protocol 2 "3.0", # Protocol 3 + "4.0", # Protocol 4 ] # Old format versions we can read # This is the highest protocol number we know how to read. -HIGHEST_PROTOCOL = 3 +HIGHEST_PROTOCOL = 4 # The protocol we write by default. May be less than HIGHEST_PROTOCOL. # We intentionally write a protocol that Python 2.x cannot read; @@ -164,7 +165,18 @@ BINBYTES = b'B' # push bytes; counted binary string argument SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes -__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) +# Protocol 4 +SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes +BINUNICODE8 = b'\x8d' # push very long string +BINBYTES8 = b'\x8e' # push very long bytes string +EMPTY_SET = b'\x8f' # push empty set on the stack +ADDITEM = b'\x90' # add topmost stack item to set +ADDITEMS = b'\x91' # modify set by adding topmost stack items +EMPTY_FROZENSET = b'\x92' # push empty frozenset on the stack +FROZENSET = b'\x93' # build frozenset from topmost stack items +STACK_GLOBAL = b'\x94' # same as GLOBAL but using names on the stacks + +__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) # Pickling machinery @@ -174,9 +186,9 @@ """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the - given protocol; supported protocols are 0, 1, 2, 3. The default - protocol is 3; a backward-incompatible protocol designed for - Python 3.0. + given protocol; supported protocols are 0, 1, 2, 3 and 4. The + default protocol is 3; a backward-incompatible protocol designed for + Python 3. Specifying a negative protocol version selects the highest protocol version supported. The higher the protocol used, the @@ -189,8 +201,8 @@ meets this interface. If fix_imports is True and protocol is less than 3, pickle will try to - map the new Python 3.x names to the old module names used in Python - 2.x, so that the pickle data stream is readable with Python 2.x. + map the new Python 3 names to the old module names used in Python 2, + so that the pickle data stream is readable with Python 2. """ if protocol is None: protocol = DEFAULT_PROTOCOL @@ -409,7 +421,13 @@ write(REDUCE) if obj is not None: - self.memoize(obj) + # If the object is already in the memo, this means it is + # recursive. In this case, throw away everything we put on the + # stack, and fetch the object back from the memo. + if id(obj) in self.memo: + write(POP + self.get(self.memo[id(obj)][0])) + else: + self.memoize(obj) # More new special cases (that work with older protocols as # well): when __reduce__ returns a tuple with 4 or 5 items, @@ -493,23 +511,32 @@ (str(obj, 'latin1'), 'latin1'), obj=obj) return n = len(obj) - if n < 256: + if n <= 0xff: self.write(SHORT_BINBYTES + pack(" 0xffffffff and self.proto >= 4: + self.write(BINBYTES8 + pack("= 4: + self.write(SHORT_BINUNICODE + pack(" 0xffffffff and self.proto >= 4: + self.write(BINUNICODE8 + pack(" 1: + write(MARK) + for item in batch: + save(item) + write(ADDITEMS) + elif n: + save(batch[0]) + write(ADDITEM) + # else batch is empty, and we're done + dispatch[set] = save_set + + def save_frozenset(self, obj): + save = self.save + write = self.write + + if self.proto < 4: + self.save_reduce(set, (list(obj),), obj=obj) + return + + n = len(obj) + if n == 0: + write(EMPTY_FROZENSET) + return + + write(MARK) + for item in obj: + save(item) + + if id(obj) in self.memo: + # If the object is already in the memo, this means it is + # recursive. In this case, throw away everything we put on the + # stack, and fetch the object back from the memo. + write(POP_MARK + self.get(self.memo[id(obj)][0])) + return + + write(FROZENSET) + self.memoize(obj) + dispatch[frozenset] = save_frozenset + def save_global(self, obj, name=None): write = self.write memo = self.memo @@ -684,7 +771,11 @@ write(EXT4 + pack("= 3. - if self.proto >= 3: + if self.proto >= 4: + self.save_str(module, memoize=False) + self.save_str(name, memoize=False) + write(STACK_GLOBAL) + elif self.proto == 3: write(GLOBAL + bytes(module, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: @@ -940,6 +1031,14 @@ self.append(str(self.read(len), 'utf-8', 'surrogatepass')) dispatch[BINUNICODE[0]] = load_binunicode + def load_binunicode8(self): + len, = unpack(' maxsize: + raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " + "of %d bytes" % maxsize) + self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + dispatch[BINUNICODE8[0]] = load_binunicode8 + def load_short_binstring(self): len = self.read(1)[0] data = self.read(len) @@ -952,6 +1051,11 @@ self.append(self.read(len)) dispatch[SHORT_BINBYTES[0]] = load_short_binbytes + def load_short_binunicode(self): + len = ord(self.read(1)) + self.append(str(self.read(len), 'utf-8', 'surrogatepass')) + dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode + def load_tuple(self): k = self.marker() self.stack[k:] = [tuple(self.stack[k+1:])] @@ -981,6 +1085,19 @@ self.append({}) dispatch[EMPTY_DICT[0]] = load_empty_dictionary + def load_empty_set(self): + self.append(set()) + dispatch[EMPTY_SET[0]] = load_empty_set + + def load_empty_frozenset(self): + self.append(frozenset()) + dispatch[EMPTY_FROZENSET[0]] = load_empty_frozenset + + def load_frozenset(self): + k = self.marker() + self.stack[k:] = [frozenset(self.stack[k+1:])] + dispatch[FROZENSET[0]] = load_frozenset + def load_list(self): k = self.marker() self.stack[k:] = [self.stack[k+1:]] @@ -1041,6 +1158,14 @@ self.append(klass) dispatch[GLOBAL[0]] = load_global + def load_stack_global(self): + name = self.stack.pop() + module = self.stack.pop() + if type(name) is not str or type(module) is not str: + raise UnpicklingError + self.append(self.find_class(module, name)) + dispatch[STACK_GLOBAL[0]] = load_stack_global + def load_ext1(self): code = self.read(1)[0] self.get_extension(code) @@ -1179,6 +1304,20 @@ del stack[mark:] dispatch[SETITEMS[0]] = load_setitems + def load_additem(self): + stack = self.stack + item = stack.pop() + stack[-1].add(item) + dispatch[ADDITEM[0]] = load_additem + + def load_additems(self): + stack = self.stack + mark = self.marker() + set = stack[mark - 1] + set.update(stack[mark + 1:]) + del stack[mark:] + dispatch[ADDITEMS[0]] = load_additems + def load_build(self): stack = self.stack state = stack.pop() diff -r cb392bce91e6 -r 9f1be171da08 Lib/pickletools.py --- a/Lib/pickletools.py Thu Apr 18 09:41:34 2013 +0200 +++ b/Lib/pickletools.py Thu Apr 18 03:24:00 2013 -0700 @@ -168,6 +168,7 @@ TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int +TAKEN_FROM_ARGUMENT8U = -5 # num bytes is 8-byte unsigned little-endian int class ArgumentDescriptor(object): __slots__ = ( @@ -175,7 +176,7 @@ 'name', # length of argument, in bytes; an int; UP_TO_NEWLINE and - # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length + # TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length # cases 'n', @@ -196,7 +197,8 @@ n in (UP_TO_NEWLINE, TAKEN_FROM_ARGUMENT1, TAKEN_FROM_ARGUMENT4, - TAKEN_FROM_ARGUMENT4U)) + TAKEN_FROM_ARGUMENT4U, + TAKEN_FROM_ARGUMENT8U)) self.n = n self.reader = reader @@ -288,6 +290,27 @@ doc="Four-byte unsigned integer, little-endian.") +def read_uint8(f): + r""" + >>> import io + >>> read_uint8(io.BytesIO(b'\xff\x00\x00\x00\x00\x00\x00\x00')) + 255 + >>> read_uint8(io.BytesIO(b'\xff' * 8)) == 2**64-1 + True + """ + + data = f.read(8) + if len(data) == 8: + return _unpack(">> import io @@ -381,6 +404,36 @@ a single blank separating the two strings. """) + +def read_string1(f): + r""" + >>> import io + >>> read_string1(io.BytesIO(b"\x00")) + '' + >>> read_string1(io.BytesIO(b"\x03abcdef")) + 'abc' + """ + + n = read_uint1(f) + assert n >= 0 + data = f.read(n) + if len(data) == n: + return data.decode("latin-1") + raise ValueError("expected %d bytes in a string1, but only %d remain" % + (n, len(data))) + +string1 = ArgumentDescriptor( + name="string1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_string1, + doc="""A counted string. + + The first argument is a 1-byte unsigned int giving the number + of bytes in the string, and the second argument is that many + bytes. + """) + + def read_string4(f): r""" >>> import io @@ -415,28 +468,28 @@ """) -def read_string1(f): +def read_bytes1(f): r""" >>> import io - >>> read_string1(io.BytesIO(b"\x00")) - '' - >>> read_string1(io.BytesIO(b"\x03abcdef")) - 'abc' + >>> read_bytes1(io.BytesIO(b"\x00")) + b'' + >>> read_bytes1(io.BytesIO(b"\x03abcdef")) + b'abc' """ n = read_uint1(f) assert n >= 0 data = f.read(n) if len(data) == n: - return data.decode("latin-1") - raise ValueError("expected %d bytes in a string1, but only %d remain" % + return data + raise ValueError("expected %d bytes in a bytes1, but only %d remain" % (n, len(data))) -string1 = ArgumentDescriptor( - name="string1", +bytes1 = ArgumentDescriptor( + name="bytes1", n=TAKEN_FROM_ARGUMENT1, - reader=read_string1, - doc="""A counted string. + reader=read_bytes1, + doc="""A counted bytes string. The first argument is a 1-byte unsigned int giving the number of bytes in the string, and the second argument is that many @@ -505,6 +558,38 @@ """) +def read_bytes8(f): + r""" + >>> import io + >>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc")) + b'' + >>> read_bytes8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef")) + b'abc' + >>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x03\x00abcdef")) + Traceback (most recent call last): + ... + ValueError: expected 844424930131968 bytes in a bytes8, but only 6 remain + """ + + n = read_uint8(f) + if n > sys.maxsize: + raise ValueError("bytes8 byte count > sys.maxsize: %d" % n) + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes8, but only %d remain" % + (n, len(data))) + +bytes8 = ArgumentDescriptor( + name="bytes8", + n=TAKEN_FROM_ARGUMENT8U, + reader=read_bytes8, + doc="""A counted bytes string. + + The first argument is a 8-byte little-endian unsigned int giving + the number of bytes, and the second argument is that many bytes. + """) + def read_unicodestringnl(f): r""" >>> import io @@ -530,6 +615,47 @@ escape sequences. """) + +def read_unicodestring1(f): + r""" + >>> import io + >>> s = 'abcd\uabcd' + >>> enc = s.encode('utf-8') + >>> enc + b'abcd\xea\xaf\x8d' + >>> n = bytes([len(enc)]) # little-endian 1-byte length + >>> t = read_unicodestring1(io.BytesIO(n + enc + b'junk')) + >>> s == t + True + + >>> read_unicodestring1(io.BytesIO(n + enc[:-1])) + Traceback (most recent call last): + ... + ValueError: expected 7 bytes in a unicodestring1, but only 6 remain + """ + + n = read_uint1(f) + if n < 0: + raise ValueError("unicodestring1 byte count < 0: %d" % n) + data = f.read(n) + if len(data) == n: + return str(data, 'utf-8', 'surrogatepass') + raise ValueError("expected %d bytes in a unicodestring1, but only %d " + "remain" % (n, len(data))) + +unicodestring1 = ArgumentDescriptor( + name="unicodestring1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_unicodestring1, + doc="""A counted Unicode string. + + The first argument is a 1-byte little-endian signed int + giving the number of bytes in the string, and the second + argument-- the UTF-8 encoding of the Unicode string -- + contains that many bytes. + """) + + def read_unicodestring4(f): r""" >>> import io @@ -570,6 +696,46 @@ """) +def read_unicodestring8(f): + r""" + >>> import io + >>> s = 'abcd\uabcd' + >>> enc = s.encode('utf-8') + >>> enc + b'abcd\xea\xaf\x8d' + >>> n = bytes([len(enc)]) + bytes(7) # little-endian 8-byte length + >>> t = read_unicodestring8(io.BytesIO(n + enc + b'junk')) + >>> s == t + True + + >>> read_unicodestring8(io.BytesIO(n + enc[:-1])) + Traceback (most recent call last): + ... + ValueError: expected 7 bytes in a unicodestring8, but only 6 remain + """ + + n = read_uint8(f) + if n < 0: + raise ValueError("unicodestring8 byte count < 0: %d" % n) + data = f.read(n) + if len(data) == n: + return str(data, 'utf-8', 'surrogatepass') + raise ValueError("expected %d bytes in a unicodestring8, but only %d " + "remain" % (n, len(data))) + +unicodestring8 = ArgumentDescriptor( + name="unicodestring8", + n=TAKEN_FROM_ARGUMENT8U, + reader=read_unicodestring8, + doc="""A counted Unicode string. + + The first argument is a 8-byte little-endian signed int + giving the number of bytes in the string, and the second + argument-- the UTF-8 encoding of the Unicode string -- + contains that many bytes. + """) + + def read_decimalnl_short(f): r""" >>> import io @@ -863,6 +1029,16 @@ obtype=dict, doc="A Python dict object.") +pyset = StackObject( + name="set", + obtype=set, + doc="A Python set object.") + +pyfrozenset = StackObject( + name="frozenset", + obtype=set, + doc="A Python frozenset object.") + anyobject = StackObject( name='any', obtype=object, @@ -1146,6 +1322,19 @@ literally as the string content. """), + I(name='BINBYTES8', + code='\x8e', + arg=bytes8, + stack_before=[], + stack_after=[pybytes], + proto=4, + doc="""Push a Python bytes object. + + There are two arguments: the first is a 8-byte unsigned int giving + the number of bytes in the string, and the second is that many bytes, + which are taken literally as the string content. + """), + # Ways to spell None. I(name='NONE', @@ -1194,6 +1383,19 @@ until the next newline character. """), + I(name='SHORT_BINUNICODE', + code='\x8c', + arg=unicodestring1, + stack_before=[], + stack_after=[pyunicode], + proto=4, + doc="""Push a Python Unicode string object. + + There are two arguments: the first is a 1-byte little-endian signed int + giving the number of bytes in the string. The second is that many + bytes, and is the UTF-8 encoding of the Unicode string. + """), + I(name='BINUNICODE', code='X', arg=unicodestring4, @@ -1207,6 +1409,19 @@ bytes, and is the UTF-8 encoding of the Unicode string. """), + I(name='BINUNICODE8', + code='\x8d', + arg=unicodestring8, + stack_before=[], + stack_after=[pyunicode], + proto=4, + doc="""Push a Python Unicode string object. + + There are two arguments: the first is a 8-byte little-endian signed int + giving the number of bytes in the string. The second is that many + bytes, and is the UTF-8 encoding of the Unicode string. + """), + # Ways to spell floats. I(name='FLOAT', @@ -1432,6 +1647,77 @@ 1, 2, ..., n, and in that order. """), + # Ways to build sets + + I(name='EMPTY_SET', + code='\x8f', + arg=None, + stack_before=[], + stack_after=[pyset], + proto=4, + doc="Push an empty set."), + + I(name='ADDITEM', + code='\x90', + arg=None, + stack_before=[pyset, anyobject], + stack_after=[pyset], + proto=4, + doc="""Add an item to an existing set. + + Stack before: ... pyset item + Stack after: ... pyset + + where pyset has been modified via pyset.add(item). + """), + + I(name='ADDITEMS', + code='\x91', + arg=None, + stack_before=[pyset, markobject, stackslice], + stack_after=[pyset], + proto=4, + doc="""Add an arbitrary number of items to an existing set. + + The slice of the stack following the topmost markobject is taken as + a sequence of items, added to the set immediately under the topmost + markobject. Everything at and after the topmost markobject is popped, + leaving the mutated set at the top of the stack. + + Stack before: ... pyset markobject item_1 ... item_n + Stack after: ... pyset + + where pyset has been modified via pyset.add(item_i) = item_i for i in + 1, 2, ..., n, and in that order. + """), + + # Ways to build frozensets + + I(name='EMPTY_FROZENSET', + code='\x92', + arg=None, + stack_before=[], + stack_after=[pyfrozenset], + proto=4, + doc="Push an empty frozenset."), + + I(name='FROZENSET', + code='\x93', + arg=None, + stack_before=[markobject, stackslice], + stack_after=[pyfrozenset], + proto=4, + doc="""Build a frozenset out of the topmost slice, after markobject. + + All the stack entries following the topmost markobject are placed into + a single Python frozenset, which single frozenset object replaces all + of the stack from the topmost markobject onward. For example, + + Stack before: ... markobject 1 2 3 + Stack after: ... frozenset({1, 2, 3}) + """), + + # Stack manipulation. I(name='POP', @@ -1618,6 +1904,15 @@ stack, so unpickling subclasses can override this form of lookup. """), + I(name='STACK_GLOBAL', + code='\x94', + arg=None, + stack_before=[pyunicode, pyunicode], + stack_after=[anyobject], + proto=0, + doc="""Push a global object (module.attr) on the stack. + """), + # Ways to build objects of classes pickle doesn't know about directly # (user-defined classes). I despair of documenting this accurately # and comprehensibly -- you really have to read the pickle code to diff -r cb392bce91e6 -r 9f1be171da08 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Thu Apr 18 09:41:34 2013 +0200 +++ b/Lib/test/pickletester.py Thu Apr 18 03:24:00 2013 -0700 @@ -95,6 +95,9 @@ def __getinitargs__(self): return () +class H(object): + pass + import __main__ __main__.C = C C.__module__ = "__main__" @@ -102,6 +105,8 @@ D.__module__ = "__main__" __main__.E = E E.__module__ = "__main__" +__main__.H = H +H.__module__ = "__main__" class myint(int): def __init__(self, x): @@ -574,6 +579,26 @@ self.assertEqual(list(x.keys()), [1]) self.assertTrue(x[1] is x) + def test_recursive_set(self): + h = H() + y = set({h}) + h.attr = y + for proto in protocols: + s = self.dumps(y, proto) + x = self.loads(s) + self.assertIs(list(x)[0].attr, x) + self.assertEqual(len(x), 1) + + def test_recursive_frozenset(self): + h = H() + y = frozenset({h}) + h.attr = y + for proto in protocols: + s = self.dumps(y, proto) + x = self.loads(s) + self.assertIs(list(x)[0].attr, x) + self.assertEqual(len(x), 1) + def test_recursive_inst(self): i = C() i.attr = i @@ -817,7 +842,7 @@ s = self.dumps(x, proto) y = self.loads(s) self.assertEqual(x, y, (proto, x, s, y)) - expected = expected_opcode[proto, len(x)] + expected = expected_opcode[min(proto, 3), len(x)] self.assertEqual(opcode_in_pickle(expected, s), True) def test_singletons(self): @@ -842,7 +867,7 @@ s = self.dumps(x, proto) y = self.loads(s) self.assertTrue(x is y, (proto, x, s, y)) - expected = expected_opcode[proto, x] + expected = expected_opcode[min(proto, 3), x] self.assertEqual(opcode_in_pickle(expected, s), True) def test_newobj_tuple(self): @@ -990,6 +1015,31 @@ else: self.assertTrue(num_setitems >= 2) + def test_set_chunking(self): + n = 10 # too small to chunk + x = set(range(n)) + for proto in protocols: + s = self.dumps(x, proto) + y = self.loads(s) + self.assertEqual(x, y) + num_additems = count_opcode(pickle.ADDITEMS, s) + if proto < 4: + self.assertEqual(num_additems, 0) + else: + self.assertEqual(num_additems, 1) + + n = 2500 # expect at least two chunks when proto >= 4 + x = set(range(n)) + for proto in protocols: + s = self.dumps(x, proto) + y = self.loads(s) + self.assertEqual(x, y) + num_additems = count_opcode(pickle.ADDITEMS, s) + if proto < 4: + self.assertEqual(num_additems, 0) + else: + self.assertGreaterEqual(num_additems, 2) + def test_simple_newobj(self): x = object.__new__(SimpleNewObj) # avoid __init__ x.abc = 666 @@ -1285,18 +1335,27 @@ finally: data = None - # BINUNICODE (protocols 1, 2 and 3) cannot carry more than - # 2**32 - 1 bytes of utf-8 encoded unicode. + # BINUNICODE (protocols 1, 2 and 3) cannot carry more than 2**32 - 1 bytes + # of utf-8 encoded unicode. BINUNICODE8 (protocol 4) supports these huge + # unicode strings however. - @bigmemtest(size=_4G, memuse=1 + ascii_char_size, dry_run=False) + @bigmemtest(size=_4G, memuse=2 + ascii_char_size, dry_run=False) def test_huge_str_64b(self, size): - data = "a" * size + data = "abcd" * (size // 4) try: for proto in protocols: if proto == 0: continue - with self.assertRaises((ValueError, OverflowError)): - self.dumps(data, protocol=proto) + if proto < 4: + with self.assertRaises((ValueError, OverflowError)): + self.dumps(data, protocol=proto) + else: + try: + pickled = self.dumps(data, protocol=proto) + self.assertTrue(b"abcd" in pickled[:15]) + self.assertTrue(b"abcd" in pickled[-15:]) + finally: + pickled = None finally: data = None @@ -1365,10 +1424,16 @@ class MyDict(dict): sample = {"a": 1, "b": 2} +class MySet(set): + sample = {"a", "b"} + +class MyFrozenSet(frozenset): + sample = frozenset({"a", "b"}) + myclasses = [MyInt, MyFloat, MyComplex, MyStr, MyUnicode, - MyTuple, MyList, MyDict] + MyTuple, MyList, MyDict, MySet, MyFrozenSet] class SlotList(MyList): @@ -1414,7 +1479,7 @@ def test_highest_protocol(self): # Of course this needs to be changed when HIGHEST_PROTOCOL changes. - self.assertEqual(pickle.HIGHEST_PROTOCOL, 3) + self.assertEqual(pickle.HIGHEST_PROTOCOL, 4) def test_callapi(self): f = io.BytesIO() diff -r cb392bce91e6 -r 9f1be171da08 Modules/_pickle.c --- a/Modules/_pickle.c Thu Apr 18 09:41:34 2013 +0200 +++ b/Modules/_pickle.c Thu Apr 18 03:24:00 2013 -0700 @@ -6,7 +6,7 @@ /* Bump this when new opcodes are added to the pickle protocol. */ enum { - HIGHEST_PROTOCOL = 3, + HIGHEST_PROTOCOL = 4, DEFAULT_PROTOCOL = 3 }; @@ -71,7 +71,18 @@ /* Protocol 3 (Python 3.x) */ BINBYTES = 'B', - SHORT_BINBYTES = 'C' + SHORT_BINBYTES = 'C', + + /* Protocol 4 */ + SHORT_BINUNICODE = '\x8c', + BINUNICODE8 = '\x8d', + BINBYTES8 = '\x8e', + EMPTY_SET = '\x8f', + ADDITEM = '\x90', + ADDITEMS = '\x91', + EMPTY_FROZENSET = '\x92', + FROZENSET = '\x93', + STACK_GLOBAL = '\x94' }; /* These aren't opcodes -- they're ways to pickle bools before protocol 2 @@ -1766,14 +1777,14 @@ } else { Py_ssize_t size; - char header[5]; + char header[9]; Py_ssize_t len; size = PyBytes_GET_SIZE(obj); if (size < 0) return -1; - if (size < 256) { + if (size <= 0xff) { header[0] = SHORT_BINBYTES; header[1] = (unsigned char)size; len = 2; @@ -1786,6 +1797,14 @@ header[4] = (unsigned char)((size >> 24) & 0xff); len = 5; } + else if (self->proto >= 4) { + int i; + header[0] = BINBYTES8; + for (i = 0; i < 8; i++) { + header[i+1] = (unsigned char)((size >> (8 * i)) & 0xff); + } + len = 8; + } else { PyErr_SetString(PyExc_OverflowError, "cannot serialize a bytes object larger than 4 GiB"); @@ -1875,26 +1894,39 @@ static int write_utf8(PicklerObject *self, char *data, Py_ssize_t size) { - char pdata[5]; - -#if SIZEOF_SIZE_T > 4 - if (size > 0xffffffffUL) { - /* string too large */ + char header[9]; + Py_ssize_t len; + + if (size <= 0xff && self->proto >= 4) { + header[0] = SHORT_BINUNICODE; + header[1] = (unsigned char)(size & 0xff); + len = 2; + } + else if (size <= 0xffffffffUL) { + header[0] = BINUNICODE; + header[1] = (unsigned char)(size & 0xff); + header[2] = (unsigned char)((size >> 8) & 0xff); + header[3] = (unsigned char)((size >> 16) & 0xff); + header[4] = (unsigned char)((size >> 24) & 0xff); + len = 5; + } + else if (self->proto >= 4) { + int i; + + header[0] = BINUNICODE8; + for (i = 0; i < 8; i++) { + header[i+1] = (unsigned char)((size >> (8 * i)) & 0xff); + } + len = 9; + } + else { PyErr_SetString(PyExc_OverflowError, "cannot serialize a string larger than 4GiB"); return -1; } -#endif - - pdata[0] = BINUNICODE; - pdata[1] = (unsigned char)(size & 0xff); - pdata[2] = (unsigned char)((size >> 8) & 0xff); - pdata[3] = (unsigned char)((size >> 16) & 0xff); - pdata[4] = (unsigned char)((size >> 24) & 0xff); - - if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0) - return -1; - + + if (_Pickler_Write(self, header, len) < 0) + return -1; if (_Pickler_Write(self, data, size) < 0) return -1; @@ -1930,7 +1962,7 @@ } static int -save_unicode(PicklerObject *self, PyObject *obj) +save_unicode(PicklerObject *self, PyObject *obj, int memoize) { if (self->bin) { if (write_unicode_binary(self, obj) < 0) @@ -1960,7 +1992,7 @@ if (_Pickler_Write(self, "\n", 1) < 0) return -1; } - if (memo_put(self, obj) < 0) + if (memoize && memo_put(self, obj) < 0) return -1; return 0; @@ -2591,6 +2623,235 @@ } static int +save_set(PicklerObject *self, PyObject *obj) +{ + PyObject *item; + int i; + Py_ssize_t set_size, ppos = 0; + Py_hash_t hash; + + const char empty_set_op = EMPTY_SET; + const char mark_op = MARK; + const char additem_op = ADDITEM; + const char additems_op = ADDITEMS; + + if (self->proto < 4) { + PyObject *items; + PyObject *reduce_value; + int status; + + items = PySequence_List(obj); + if (items == NULL) { + return -1; + } + reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items); + Py_DECREF(items); + if (reduce_value == NULL) { + return -1; + } + /* save_reduce() will memoize the object automatically. */ + status = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; + } + + if (_Pickler_Write(self, &empty_set_op, 1) < 0) + return -1; + + if (memo_put(self, obj) < 0) + return -1; + + set_size = PySet_GET_SIZE(obj); + if (set_size == 0) + return 0; /* nothing to do */ + + /* Special-case len(obj) == 1 to save space. */ + if (set_size == 1) { + _PySet_NextEntry(obj, &ppos, &item, &hash); + + if (save(self, item, 0) < 0) + return -1; + if (_Pickler_Write(self, &additem_op, 1) < 0) + return -1; + return 0; + } + + /* Write in batches of BATCHSIZE. */ + do { + i = 0; + if (_Pickler_Write(self, &mark_op, 1) < 0) + return -1; + while (_PySet_NextEntry(obj, &ppos, &item, &hash)) { + if (save(self, item, 0) < 0) + return -1; + if (++i == BATCHSIZE) + break; + } + if (_Pickler_Write(self, &additems_op, 1) < 0) + return -1; + if (PySet_GET_SIZE(obj) != set_size) { + PyErr_Format( + PyExc_RuntimeError, + "set changed size during iteration"); + return -1; + } + } while (i == BATCHSIZE); + + return 0; +} + +static int +save_frozenset(PicklerObject *self, PyObject *obj) +{ + PyObject *iter; + Py_ssize_t len; + + const char mark_op = MARK; + const char frozenset_op = FROZENSET; + const char empty_frozenset_op = EMPTY_FROZENSET; + + if (self->fast && !fast_save_enter(self, obj)) + return -1; + + if (self->proto < 4) { + PyObject *items; + PyObject *reduce_value; + int status; + + items = PySequence_List(obj); + if (items == NULL) { + return -1; + } + reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type, + items); + Py_DECREF(items); + if (reduce_value == NULL) { + return -1; + } + /* save_reduce() will memoize the object automatically. */ + status = save_reduce(self, reduce_value, obj); + Py_DECREF(reduce_value); + return status; + } + + len = PySet_GET_SIZE(obj); + if (len == 0) { + if (_Pickler_Write(self, &empty_frozenset_op, 1) < 0) + return -1; + return 0; + } + + if (_Pickler_Write(self, &mark_op, 1) < 0) + return -1; + + iter = PyObject_GetIter(obj); + for (;;) { + PyObject *item; + + item = PyIter_Next(iter); + if (item == NULL) { + if (PyErr_Occurred()) { + Py_DECREF(iter); + return -1; + } + break; + } + if (save(self, item, 0) < 0) { + Py_DECREF(item); + Py_DECREF(iter); + return -1; + } + Py_DECREF(item); + } + Py_DECREF(iter); + + /* If the object is already in the memo, this means it is + recursive. In this case, throw away everything we put on the + stack, and fetch the object back from the memo. */ + if (PyMemoTable_Get(self->memo, obj)) { + const char pop_mark_op = POP_MARK; + + if (_Pickler_Write(self, &pop_mark_op, 1) < 0) + return -1; + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + + if (_Pickler_Write(self, &frozenset_op, 1) < 0) + return -1; + if (memo_put(self, obj) < 0) + return -1; + + return 0; +} + +static int +fix_imports(PyObject **module_name, PyObject **global_name) +{ + PyObject *key; + PyObject *item; + + key = PyTuple_Pack(2, *module_name, *global_name); + if (key == NULL) + return -1; + item = PyDict_GetItemWithError(name_mapping_3to2, key); + Py_DECREF(key); + if (item) { + PyObject *fixed_module_name; + PyObject *fixed_global_name; + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be 2-tuples, not %.200s", + Py_TYPE(item)->tp_name); + return -1; + } + fixed_module_name = PyTuple_GET_ITEM(item, 0); + fixed_global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(fixed_module_name) || + !PyUnicode_Check(fixed_global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be pairs of str, not (%.200s, %.200s)", + Py_TYPE(fixed_module_name)->tp_name, + Py_TYPE(fixed_global_name)->tp_name); + return -1; + } + + Py_CLEAR(*module_name); + Py_CLEAR(*global_name); + Py_INCREF(fixed_module_name); + Py_INCREF(fixed_global_name); + *module_name = fixed_module_name; + *global_name = fixed_global_name; + } + else if (PyErr_Occurred()) { + return -1; + } + + item = PyDict_GetItemWithError(import_mapping_3to2, *module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING values " + "should be strings, not %.200s", + Py_TYPE(item)->tp_name); + return -1; + } + Py_CLEAR(*module_name); + Py_INCREF(item); + *module_name = item; + } + else if (PyErr_Occurred()) { + return -1; + } + + return 0; +} + +static int save_global(PicklerObject *self, PyObject *obj, PyObject *name) { static PyObject *name_str = NULL; @@ -2715,120 +2976,79 @@ goto error; } else { - /* Generate a normal global opcode if we are using a pickle - protocol <= 2, or if the object is not registered in the - extension registry. */ - PyObject *encoded; - PyObject *(*unicode_encoder)(PyObject *); - gen_global: - if (_Pickler_Write(self, &global_op, 1) < 0) - goto error; - - /* Since Python 3.0 now supports non-ASCII identifiers, we encode both - the module name and the global name using UTF-8. We do so only when - we are using the pickle protocol newer than version 3. This is to - ensure compatibility with older Unpickler running on Python 2.x. */ - if (self->proto >= 3) { - unicode_encoder = PyUnicode_AsUTF8String; + if (self->proto >= 4) { + const char stack_global_op = STACK_GLOBAL; + + save_unicode(self, module_name, 0 /* memoize */); + save_unicode(self, global_name, 0 /* memoize */); + + if (_Pickler_Write(self, &stack_global_op, 1) < 0) + goto error; } else { - unicode_encoder = PyUnicode_AsASCIIString; - } - - /* For protocol < 3 and if the user didn't request against doing so, - we convert module names to the old 2.x module names. */ - if (self->fix_imports) { - PyObject *key; - PyObject *item; - - key = PyTuple_Pack(2, module_name, global_name); - if (key == NULL) + /* Generate a normal global opcode if we are using a pickle + protocol < 4, or if the object is not registered in the + extension registry. */ + PyObject *encoded; + PyObject *(*unicode_encoder)(PyObject *); + + if (_Pickler_Write(self, &global_op, 1) < 0) goto error; - item = PyDict_GetItemWithError(name_mapping_3to2, key); - Py_DECREF(key); - if (item) { - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_NAME_MAPPING values " - "should be 2-tuples, not %.200s", - Py_TYPE(item)->tp_name); + + /* For protocol < 3 and if the user didn't request against doing so, + we convert module names to the old 2.x module names. */ + if (self->proto < 3 && self->fix_imports) { + if (fix_imports(&module_name, &global_name) < 0) { goto error; } - Py_CLEAR(module_name); - Py_CLEAR(global_name); - module_name = PyTuple_GET_ITEM(item, 0); - global_name = PyTuple_GET_ITEM(item, 1); - if (!PyUnicode_Check(module_name) || - !PyUnicode_Check(global_name)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_NAME_MAPPING values " - "should be pairs of str, not (%.200s, %.200s)", - Py_TYPE(module_name)->tp_name, - Py_TYPE(global_name)->tp_name); - goto error; - } - Py_INCREF(module_name); - Py_INCREF(global_name); } - else if (PyErr_Occurred()) { + + /* Since Python 3.0 now supports non-ASCII identifiers, we encode both + the module name and the global name using UTF-8. We do so only when + we are using the pickle protocol newer than version 3. This is to + ensure compatibility with older Unpickler running on Python 2.x. */ + if (self->proto == 3) { + unicode_encoder = PyUnicode_AsUTF8String; + } + else { + unicode_encoder = PyUnicode_AsASCIIString; + } + encoded = unicode_encoder(module_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(PicklingError, + "can't pickle module identifier '%S' using " + "pickle protocol %i", module_name, self->proto); goto error; } - - item = PyDict_GetItemWithError(import_mapping_3to2, module_name); - if (item) { - if (!PyUnicode_Check(item)) { - PyErr_Format(PyExc_RuntimeError, - "_compat_pickle.REVERSE_IMPORT_MAPPING values " - "should be strings, not %.200s", - Py_TYPE(item)->tp_name); - goto error; - } - Py_CLEAR(module_name); - module_name = item; - Py_INCREF(module_name); - } - else if (PyErr_Occurred()) { + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); goto error; } + Py_DECREF(encoded); + if(_Pickler_Write(self, "\n", 1) < 0) + goto error; + + /* Save the name of the module. */ + encoded = unicode_encoder(global_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(PicklingError, + "can't pickle global identifier '%S' using " + "pickle protocol %i", global_name, self->proto); + goto error; + } + if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); + goto error; + } + Py_DECREF(encoded); + if(_Pickler_Write(self, "\n", 1) < 0) + goto error; } - - /* Save the name of the module. */ - encoded = unicode_encoder(module_name); - if (encoded == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) - PyErr_Format(PicklingError, - "can't pickle module identifier '%S' using " - "pickle protocol %i", module_name, self->proto); - goto error; - } - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), - PyBytes_GET_SIZE(encoded)) < 0) { - Py_DECREF(encoded); - goto error; - } - Py_DECREF(encoded); - if(_Pickler_Write(self, "\n", 1) < 0) - goto error; - - /* Save the name of the module. */ - encoded = unicode_encoder(global_name); - if (encoded == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) - PyErr_Format(PicklingError, - "can't pickle global identifier '%S' using " - "pickle protocol %i", global_name, self->proto); - goto error; - } - if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), - PyBytes_GET_SIZE(encoded)) < 0) { - Py_DECREF(encoded); - goto error; - } - Py_DECREF(encoded); - if(_Pickler_Write(self, "\n", 1) < 0) - goto error; - /* Memoize the object. */ if (memo_put(self, obj) < 0) goto error; @@ -3118,8 +3338,23 @@ the caller do not want to memoize the object. Not particularly useful, but that is to mimic the behavior save_reduce() in pickle.py when obj is None. */ - if (obj && memo_put(self, obj) < 0) - return -1; + if (obj != NULL) { + /* If the object is already in the memo, this means it is + recursive. In this case, throw away everything we put on the + stack, and fetch the object back from the memo. */ + if (PyMemoTable_Get(self->memo, obj)) { + const char pop_op = POP; + + if (_Pickler_Write(self, &pop_op, 1) < 0) + return -1; + if (memo_get(self, obj) < 0) + return -1; + + return 0; + } + else if (memo_put(self, obj) < 0) + return -1; + } if (listitems && batch_list(self, listitems) < 0) return -1; @@ -3207,13 +3442,21 @@ goto done; } else if (type == &PyUnicode_Type) { - status = save_unicode(self, obj); + status = save_unicode(self, obj, 1 /* memoize */); goto done; } else if (type == &PyDict_Type) { status = save_dict(self, obj); goto done; } + else if (type == &PySet_Type) { + status = save_set(self, obj); + goto done; + } + else if (type == &PyFrozenSet_Type) { + status = save_frozenset(self, obj); + goto done; + } else if (type == &PyList_Type) { status = save_list(self, obj); goto done; @@ -3479,9 +3722,9 @@ "This takes a binary file for writing a pickle data stream.\n" "\n" "The optional protocol argument tells the pickler to use the\n" -"given protocol; supported protocols are 0, 1, 2, 3. The default\n" -"protocol is 3; a backward-incompatible protocol designed for\n" -"Python 3.0.\n" +"given protocol; supported protocols are 0, 1, 2, 3 and 4. The\n" +"default protocol is 3; a backward-incompatible protocol designed for\n" +"Python 3.\n" "\n" "Specifying a negative protocol version selects the highest\n" "protocol version supported. The higher the protocol used, the\n" @@ -3494,8 +3737,8 @@ "meets this interface.\n" "\n" "If fix_imports is True and protocol is less than 3, pickle will try to\n" -"map the new Python 3.x names to the old module names used in Python\n" -"2.x, so that the pickle data stream is readable with Python 2.x.\n"); +"map the new Python 3 names to the old module names used in Python 2,\n" +"so that the pickle data stream is readable with Python 2.\n"); static int Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) @@ -3988,17 +4231,15 @@ * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX. */ static Py_ssize_t -calc_binsize(char *bytes, int size) +calc_binsize(char *bytes, int nbytes) { unsigned char *s = (unsigned char *)bytes; + int i; size_t x = 0; - assert(size == 4); - - x = (size_t) s[0]; - x |= (size_t) s[1] << 8; - x |= (size_t) s[2] << 16; - x |= (size_t) s[3] << 24; + for (i = 0; i < nbytes; i++) { + x |= (size_t) s[i] << (8 * i); + } if (x > PY_SSIZE_T_MAX) return -1; @@ -4012,21 +4253,21 @@ * of x-platform bugs. */ static long -calc_binint(char *bytes, int size) +calc_binint(char *bytes, int nbytes) { unsigned char *s = (unsigned char *)bytes; - int i = size; + int i; long x = 0; - for (i = 0; i < size; i++) { - x |= (long)s[i] << (i * 8); + for (i = 0; i < nbytes; i++) { + x |= (long)s[i] << (8 * i); } /* Unlike BININT1 and BININT2, BININT (more accurately BININT4) * is signed, so on a box with longs bigger than 4 bytes we need * to extend a BININT's sign bit to the full width. */ - if (SIZEOF_LONG > 4 && size == 4) { + if (SIZEOF_LONG > 4 && nbytes == 4) { x |= -(x & (1L << 31)); } @@ -4234,26 +4475,27 @@ } static int -load_binbytes(UnpicklerObject *self) +load_counted_binbytes(UnpicklerObject *self, int nbytes) { PyObject *bytes; - Py_ssize_t x; + Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 4) < 0) - return -1; - - x = calc_binsize(s, 4); - if (x < 0) { + if (_Unpickler_Read(self, &s, nbytes) < 0) + return -1; + + size = calc_binsize(s, nbytes); + if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINBYTES exceeds system's maximum size of %zd bytes", PY_SSIZE_T_MAX); return -1; } - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - bytes = PyBytes_FromStringAndSize(s, x); + if (_Unpickler_Read(self, &s, size) < 0) + return -1; + + bytes = PyBytes_FromStringAndSize(s, size); if (bytes == NULL) return -1; @@ -4262,74 +4504,27 @@ } static int -load_short_binbytes(UnpicklerObject *self) -{ - PyObject *bytes; - Py_ssize_t x; +load_counted_binstring(UnpicklerObject *self, int nbytes) +{ + PyObject *str; + Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 1) < 0) - return -1; - - x = (unsigned char)s[0]; - - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - - bytes = PyBytes_FromStringAndSize(s, x); - if (bytes == NULL) - return -1; - - PDATA_PUSH(self->stack, bytes, -1); - return 0; -} - -static int -load_binstring(UnpicklerObject *self) -{ - PyObject *str; - Py_ssize_t x; - char *s; - - if (_Unpickler_Read(self, &s, 4) < 0) - return -1; - - x = calc_binint(s, 4); - if (x < 0) { - PyErr_SetString(UnpicklingError, - "BINSTRING pickle has negative byte count"); - return -1; - } - - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - + if (_Unpickler_Read(self, &s, nbytes) < 0) + return -1; + + size = calc_binsize(s, nbytes); + if (size < 0) { + PyErr_Format(UnpicklingError, + "BINSTRING exceeds system's maximum size of %zd bytes", + PY_SSIZE_T_MAX); + return -1; + } + + if (_Unpickler_Read(self, &s, size) < 0) + return -1; /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); - if (str == NULL) - return -1; - - PDATA_PUSH(self->stack, str, -1); - return 0; -} - -static int -load_short_binstring(UnpicklerObject *self) -{ - PyObject *str; - Py_ssize_t x; - char *s; - - if (_Unpickler_Read(self, &s, 1) < 0) - return -1; - - x = (unsigned char)s[0]; - - if (_Unpickler_Read(self, &s, x) < 0) - return -1; - - /* Convert Python 2.x strings to unicode. */ - str = PyUnicode_Decode(s, x, self->encoding, self->errors); + str = PyUnicode_Decode(s, size, self->encoding, self->errors); if (str == NULL) return -1; @@ -4358,16 +4553,16 @@ } static int -load_binunicode(UnpicklerObject *self) +load_counted_binunicode(UnpicklerObject *self, int nbytes) { PyObject *str; Py_ssize_t size; char *s; - if (_Unpickler_Read(self, &s, 4) < 0) - return -1; - - size = calc_binsize(s, 4); + if (_Unpickler_Read(self, &s, nbytes) < 0) + return -1; + + size = calc_binsize(s, nbytes); if (size < 0) { PyErr_Format(PyExc_OverflowError, "BINUNICODE exceeds system's maximum size of %zd bytes", @@ -4375,7 +4570,6 @@ return -1; } - if (_Unpickler_Read(self, &s, size) < 0) return -1; @@ -4447,6 +4641,28 @@ } static int +load_empty_set(UnpicklerObject *self) +{ + PyObject *set; + + if ((set = PySet_New(NULL)) == NULL) + return -1; + PDATA_PUSH(self->stack, set, -1); + return 0; +} + +static int +load_empty_frozenset(UnpicklerObject *self) +{ + PyObject *set; + + if ((set = PyFrozenSet_New(NULL)) == NULL) + return -1; + PDATA_PUSH(self->stack, set, -1); + return 0; +} + +static int load_list(UnpicklerObject *self) { PyObject *list; @@ -4488,6 +4704,29 @@ return 0; } +static int +load_frozenset(UnpicklerObject *self) +{ + PyObject *items; + PyObject *frozenset; + Py_ssize_t i; + + if ((i = marker(self)) < 0) + return -1; + + items = Pdata_poptuple(self->stack, i); + if (items == NULL) + return -1; + + frozenset = PyFrozenSet_New(items); + Py_DECREF(items); + if (frozenset == NULL) + return -1; + + PDATA_PUSH(self->stack, frozenset, -1); + return 0; +} + static PyObject * instantiate(PyObject *cls, PyObject *args) { @@ -4675,6 +4914,31 @@ } static int +load_stack_global(UnpicklerObject *self) +{ + PyObject *global; + PyObject *module_name; + PyObject *global_name; + + PDATA_POP(self->stack, global_name); + PDATA_POP(self->stack, module_name); + if (module_name == NULL || !PyUnicode_CheckExact(module_name) || + global_name == NULL || !PyUnicode_CheckExact(global_name)) { + PyErr_SetString(UnpicklingError, "STACK_GLOBAL requires str"); + Py_XDECREF(global_name); + Py_XDECREF(module_name); + return -1; + } + global = find_class(self, module_name, global_name); + Py_DECREF(global_name); + Py_DECREF(module_name); + if (global == NULL) + return -1; + PDATA_PUSH(self->stack, global, -1); + return 0; +} + +static int load_persid(UnpicklerObject *self) { PyObject *pid; @@ -5129,6 +5393,70 @@ } static int +do_additems(UnpicklerObject *self, Py_ssize_t x) +{ + PyObject *set; + Py_ssize_t len, i; + + len = Py_SIZE(self->stack); + if (x > len || x <= 0) + return stack_underflow(); + if (len == x) /* nothing to do */ + return 0; + + set = self->stack->data[x - 1]; + + if (PySet_Check(set)) { + PyObject *items; + int status; + + items = Pdata_poptuple(self->stack, x); + if (items == NULL) + return -1; + + status = _PySet_Update(set, items); + Py_DECREF(items); + return status; + } + else { + PyObject *add_func; + _Py_IDENTIFIER(add); + + add_func = _PyObject_GetAttrId(set, &PyId_add); + if (add_func == NULL) + return -1; + for (i = x; i < len; i++) { + PyObject *result; + PyObject *item; + + item = self->stack->data[i]; + result = _Unpickler_FastCall(self, add_func, item); + if (result == NULL) { + Pdata_clear(self->stack, i + 1); + Py_SIZE(self->stack) = x; + return -1; + } + Py_DECREF(result); + } + Py_SIZE(self->stack) = x; + } + + return 0; +} + +static int +load_additem(UnpicklerObject *self) +{ + return do_additems(self, Py_SIZE(self->stack) - 1); +} + +static int +load_additems(UnpicklerObject *self) +{ + return do_additems(self, marker(self)); +} + +static int load_build(UnpicklerObject *self) { PyObject *state, *inst, *slotstate; @@ -5362,13 +5690,16 @@ OP_ARG(LONG4, load_counted_long, 4) OP(FLOAT, load_float) OP(BINFLOAT, load_binfloat) - OP(BINBYTES, load_binbytes) - OP(SHORT_BINBYTES, load_short_binbytes) - OP(BINSTRING, load_binstring) - OP(SHORT_BINSTRING, load_short_binstring) + OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1) + OP_ARG(BINBYTES, load_counted_binbytes, 4) + OP_ARG(BINBYTES8, load_counted_binbytes, 8) + OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1) + OP_ARG(BINSTRING, load_counted_binstring, 4) OP(STRING, load_string) OP(UNICODE, load_unicode) - OP(BINUNICODE, load_binunicode) + OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1) + OP_ARG(BINUNICODE, load_counted_binunicode, 4) + OP_ARG(BINUNICODE8, load_counted_binunicode, 8) OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0) OP_ARG(TUPLE1, load_counted_tuple, 1) OP_ARG(TUPLE2, load_counted_tuple, 2) @@ -5378,10 +5709,16 @@ OP(LIST, load_list) OP(EMPTY_DICT, load_empty_dict) OP(DICT, load_dict) + OP(EMPTY_SET, load_empty_set) + OP(ADDITEM, load_additem) + OP(ADDITEMS, load_additems) + OP(EMPTY_FROZENSET, load_empty_frozenset) + OP(FROZENSET, load_frozenset) OP(OBJ, load_obj) OP(INST, load_inst) OP(NEWOBJ, load_newobj) OP(GLOBAL, load_global) + OP(STACK_GLOBAL, load_stack_global) OP(APPEND, load_append) OP(APPENDS, load_appends) OP(BUILD, load_build)