diff -r 9304172ee5eb Lib/plistlib.py --- a/Lib/plistlib.py Thu Mar 29 09:18:14 2012 +0200 +++ b/Lib/plistlib.py Sat Mar 31 10:52:11 2012 +0300 @@ -52,14 +52,18 @@ __all__ = [ "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes", - "Plist", "Data", "Dict" + "Plist", "Data", "Dict", + "InvalidFileException", ] # Note: the Plist and Dict classes have been deprecated. import binascii import datetime +import json from io import BytesIO +import os import re +import struct def readPlist(pathOrFile): @@ -72,8 +76,15 @@ if isinstance(pathOrFile, str): pathOrFile = open(pathOrFile, 'rb') didOpen = True - p = PlistParser() - rootObject = p.parse(pathOrFile) + header = pathOrFile.read(8) + pathOrFile.seek(0) + if header == b'6xBB4xL4xL4xL', trailer) + in_file.seek(offset_table_offset) + object_offsets = [] + offset_format = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offset_size] * num_objects + ref_format = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[ref_size] + object_offsets = struct.unpack(offset_format, in_file.read(offset_size * num_objects)) + atoms = {0: None, 8: False, 9: True, 15: ''} + objects = {} + + def getSize(token_l): + """ return the size of the next object.""" + if token_l == 0xF: + m = in_file.read(1)[0] & 0x3 + return int.from_bytes(in_file.read(1 << m), 'big') + return token_l + + def readRefs(n): + return struct.unpack('>' + ref_format * n, in_file.read(n * ref_size)) + + def getObject(ref): + if ref not in objects: + objects[ref] = readNextObject(object_offsets[ref]) + return objects[ref] + + def readNextObject(offset): + """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """ + in_file.seek(offset) + token = in_file.read(1)[0] + token_h, token_l = token & 0xF0, token & 0x0F #high and low parts + if token_h == 0x00: + if token_l in atoms: + return atoms[token_l] + elif token_h == 0x10: #int + return int.from_bytes(in_file.read(2 << token_l), 'big') + elif token_h == 0x20: #real + if token_l == 2: + return struct.unpack('>f', in_file.read(4))[0] + elif token_l == 3: + return struct.unpack('>d', in_file.read(8))[0] + elif token_h == 0x30: #date + if token_l == 0x03: + f = struct.unpack('>d', in_file.read(8))[0] + return datetime.datetime.utcfromtimestamp(f + MAC_OS_X_TIME_OFFSET) + elif token_h == 0x40: #data + s = getSize(token_l) + return in_file.read(s) + elif token_h == 0x50: #ascii string + s = getSize(token_l) + return in_file.read(s).decode('ascii') + elif token_h == 0x60: #unicode string + s = getSize(token_l) + return in_file.read(s * 2).decode('utf-16be') + elif token_h == 0x80: #uid + return in_file.read(token_l + 1) + elif token_h == 0xA0: #array + s = getSize(token_l) + obj_refs = readRefs(s) + return [getObject(x) for x in obj_refs] + elif token_h == 0xC0: #set + s = getSize(token_l) + obj_refs = readRefs(s) + return set(getObject(x) for x in obj_refs) + elif token_h == 0xD0: #dict + s = getSize(token_l) + key_refs = readRefs(s) + obj_refs = readRefs(s) + return {getObject(k): getObject(o) for k, o in zip(key_refs, obj_refs)} + raise InvalidFileException() + + return getObject(top_object) + # Contents should conform to a subset of ISO 8601 # (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units may be omitted with # a loss of precision)