diff -r aac6b313ef5f Lib/pickletools.py --- a/Lib/pickletools.py Sat Nov 24 20:42:59 2012 +0100 +++ b/Lib/pickletools.py Sun Nov 25 14:09:40 2012 +0200 @@ -13,6 +13,7 @@ import codecs import pickle import re +import sys __all__ = ['dis', 'genops', 'optimize'] @@ -265,6 +266,28 @@ doc="Four-byte signed integer, little-endian, 2's complement.") +def read_uint4(f): + r""" + >>> import io + >>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00')) + 255 + >>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31 + True + """ + + data = f.read(4) + if len(data) == 4: + return _unpack(">> import io @@ -421,6 +444,67 @@ """) +def read_bytes1(f): + r""" + >>> import io + >>> read_bytes1(io.BytesIO(b"\x00")) + b'' + >>> read_bytes1(io.BytesIO(b"\x03abcdef")) + b'abc' + """ + + n = read_uint1(f) + assert n >= 0 + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes1, but only %d remain" % + (n, len(data))) + +bytes1 = ArgumentDescriptor( + name="bytes1", + n=TAKEN_FROM_ARGUMENT1, + reader=read_bytes1, + doc="""A counted bytes. + + The first argument is a 1-byte unsigned int giving the number + of bytes, and the second argument is that many bytes. + """) + + +def read_bytes4(f): + r""" + >>> import io + >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc")) + b'' + >>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef")) + b'abc' + >>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef")) + Traceback (most recent call last): + ... + ValueError: expected 50331648 bytes in a bytes4, but only 6 remain + """ + + n = read_uint4(f) + if n > sys.maxsize: + raise ValueError("bytes4 byte count > maxsize: %d" % n) + data = f.read(n) + if len(data) == n: + return data + raise ValueError("expected %d bytes in a bytes4, but only %d remain" % + (n, len(data))) + +bytes4 = ArgumentDescriptor( + name="bytes4", + n=TAKEN_FROM_ARGUMENT4, + reader=read_bytes4, + doc="""A counted bytes. + + The first argument is a 4-byte little-endian unsigned int giving + the number of bytes, and the second argument is that many bytes. + """) + + def read_unicodestringnl(f): r""" >>> import io @@ -464,9 +548,9 @@ ValueError: expected 7 bytes in a unicodestring4, but only 6 remain """ - n = read_int4(f) - if n < 0: - raise ValueError("unicodestring4 byte count < 0: %d" % n) + n = read_uint4(f) + if n > sys.maxsize: + raise ValueError("unicodestring4 byte count > maxsize: %d" % n) data = f.read(n) if len(data) == n: return str(data, 'utf-8', 'surrogatepass') @@ -1038,28 +1122,28 @@ I(name='BINBYTES', code='B', - arg=string4, + arg=bytes4, stack_before=[], stack_after=[pybytes], proto=3, doc="""Push a Python bytes object. - There are two arguments: the first is a 4-byte little-endian signed int - giving the number of bytes in the string, and the second is that many - bytes, which are taken literally as the bytes content. + There are two arguments: the first is a 4-byte little-endian unsigned int + giving the number of bytes, and the second is that many bytes, which are + taken literally as the bytes content. """), I(name='SHORT_BINBYTES', code='C', - arg=string1, + arg=bytes1, stack_before=[], stack_after=[pybytes], proto=3, - doc="""Push a Python string object. + doc="""Push a Python bytes object. There are two arguments: the first is a 1-byte unsigned int giving - the number of bytes in the string, and the second is that many bytes, - which are taken literally as the string content. + the number of bytes, and the second is that many bytes, which are taken + literally as the string content. """), # Ways to spell None. @@ -1118,7 +1202,7 @@ proto=1, doc="""Push a Python Unicode string object. - There are two arguments: the first is a 4-byte little-endian signed int + There are two arguments: the first is a 4-byte little-endian unsigned int giving the number of bytes in the string. The second is that many bytes, and is the UTF-8 encoding of the Unicode string. """), @@ -1422,13 +1506,13 @@ I(name='LONG_BINGET', code='j', - arg=int4, + arg=uint4, stack_before=[], stack_after=[anyobject], proto=1, doc="""Read an object from the memo and push it on the stack. - The index of the memo object to push is given by the 4-byte signed + The index of the memo object to push is given by the 4-byte unsigned little-endian integer following. """), @@ -1459,14 +1543,14 @@ I(name='LONG_BINPUT', code='r', - arg=int4, + arg=uint4, stack_before=[], stack_after=[], proto=1, doc="""Store the stack top into the memo. The stack is not popped. The index of the memo location to write into is given by the 4-byte - signed little-endian integer following. + unsigned little-endian integer following. """), # Access the extension registry (predefined objects). Akin to the GET