Message 157152 - Python tracker

➜

This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author	d9pouces
Recipients	d9pouces, ronaldoussoren
Date	2012-03-30.21:56:16
SpamBayes Score	-1.0
Marked as misclassified	Yes
Message-id	<1333144578.81.0.343123708942.issue14455@psf.upfronthosting.co.za>
In-reply-to

Content
Hi, Plist files have actually three flavors : XML ones, binary ones, and now (starting from Mac OS X 10.7 Lion) json one. The plistlib.readPlist function can only read XML plist files and thus cannot read binary and json ones. The binary format is open and described by Apple (http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c). Here is the diff (from Python 2.7 implementation of plistlib) to transparently read both binary and json formats. API of plistlib remains unchanged, since format detection is done by plistlib.readPlist. An InvalidFileException is raised in case of malformed binary file. 57,58c57 < "Plist", "Data", "Dict", < "InvalidFileException", --- > "Plist", "Data", "Dict" 64d62 < import json 66d63 < import os 68d64 < import struct 81,89c77,78 < header = pathOrFile.read(8) < pathOrFile.seek(0) < if header == '<?xml ve' or header[2:] == '<?xml ': #XML plist file, without or with BOM < p = PlistParser() < rootObject = p.parse(pathOrFile) < elif header == 'bplist00': #binary plist file < rootObject = readBinaryPlistFile(pathOrFile) < else: #json plist file < rootObject = json.load(pathOrFile) --- > p = PlistParser() > rootObject = p.parse(pathOrFile) 195,285d183 < < # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970. < MAC_OS_X_TIME_OFFSET = (31 * 365 + 8) * 86400 < < class InvalidFileException(ValueError): < def __str__(self): < return "Invalid file" < def __unicode__(self): < return "Invalid file" < < def readBinaryPlistFile(in_file): < """ < Read a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c < Raise InvalidFileException in case of error, otherwise return the root object, as usual < """ < in_file.seek(-32, os.SEEK_END) < trailer = in_file.read(32) < if len(trailer) != 32: < return InvalidFileException() < offset_size, ref_size, num_objects, top_object, offset_table_offset = struct.unpack('>6xBB4xL4xL4xL', trailer) < in_file.seek(offset_table_offset) < object_offsets = [] < offset_format = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offset_size] * num_objects < ref_format = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[ref_size] < int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), } < object_offsets = struct.unpack(offset_format, in_file.read(offset_size * num_objects)) < def getSize(token_l): < """ return the size of the next object.""" < if token_l == 0xF: < m = ord(in_file.read(1)) & 0x3 < s, f = int_format[m] < return struct.unpack(f, in_file.read(s))[0] < return token_l < def readNextObject(offset): < """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """ < in_file.seek(offset) < token = in_file.read(1) < token_h, token_l = ord(token) & 0xF0, ord(token) & 0x0F #high and low parts < if token == '\x00': < return None < elif token == '\x08': < return False < elif token == '\x09': < return True < elif token == '\x0f': < return '' < elif token_h == 0x10: #int < result = 0 < for k in xrange((2 << token_l) - 1): < result = (result << 8) + ord(in_file.read(1)) < return result < elif token_h == 0x20: #real < if token_l == 2: < return struct.unpack('>f', in_file.read(4))[0] < elif token_l == 3: < return struct.unpack('>d', in_file.read(8))[0] < elif token_h == 0x30: #date < f = struct.unpack('>d', in_file.read(8))[0] < return datetime.datetime.utcfromtimestamp(f + MAC_OS_X_TIME_OFFSET) < elif token_h == 0x80: #data < s = getSize(token_l) < return in_file.read(s) < elif token_h == 0x50: #ascii string < s = getSize(token_l) < return in_file.read(s) < elif token_h == 0x60: #unicode string < s = getSize(token_l) < return in_file.read(s * 2).decode('utf-16be') < elif token_h == 0x80: #uid < return in_file.read(token_l + 1) < elif token_h == 0xA0: #array < s = getSize(token_l) < obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size)) < return map(lambda x: readNextObject(object_offsets[x]), obj_refs) < elif token_h == 0xC0: #set < s = getSize(token_l) < obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size)) < return set(map(lambda x: readNextObject(object_offsets[x]), obj_refs)) < elif token_h == 0xD0: #dict < result = {} < s = getSize(token_l) < key_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size)) < obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size)) < for k, o in zip(key_refs, obj_refs): < key = readNextObject(object_offsets[k]) < obj = readNextObject(object_offsets[o]) < result[key] = obj < return result < raise InvalidFileException() < return readNextObject(object_offsets[top_object]) <

Hi,

Plist files have actually three flavors : XML ones, binary ones, and now (starting from Mac OS X 10.7 Lion) json one. The plistlib.readPlist function can only read XML plist files and thus cannot read binary and json ones.

The binary format is open and described by Apple (http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c).

Here is the diff (from Python 2.7 implementation of plistlib) to transparently read both binary and json formats.

API of plistlib remains unchanged, since format detection is done by plistlib.readPlist. 
An InvalidFileException is raised in case of malformed binary file.


57,58c57
<     "Plist", "Data", "Dict",
<     "InvalidFileException",
---
>     "Plist", "Data", "Dict"
64d62
< import json
66d63
< import os
68d64
< import struct
81,89c77,78
<     header = pathOrFile.read(8)
<     pathOrFile.seek(0)
<     if header == '<?xml ve' or header[2:] == '<?xml ': #XML plist file, without or with BOM 
<         p = PlistParser()
<         rootObject = p.parse(pathOrFile)
<     elif header == 'bplist00': #binary plist file
<         rootObject =  readBinaryPlistFile(pathOrFile)
<     else: #json plist file
<         rootObject = json.load(pathOrFile)
---
>     p = PlistParser()
>     rootObject = p.parse(pathOrFile)
195,285d183
< 
< # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970.
< MAC_OS_X_TIME_OFFSET = (31 * 365 + 8) * 86400
< 
< class InvalidFileException(ValueError):
<     def __str__(self):
<         return "Invalid file"
<     def __unicode__(self):
<         return "Invalid file"
< 
< def readBinaryPlistFile(in_file):
<     """
<     Read a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
<     Raise InvalidFileException in case of error, otherwise return the root object, as usual
<     """
<     in_file.seek(-32, os.SEEK_END)
<     trailer = in_file.read(32)
<     if len(trailer) != 32:
<         return InvalidFileException()
<     offset_size, ref_size, num_objects, top_object, offset_table_offset = struct.unpack('>6xBB4xL4xL4xL', trailer)
<     in_file.seek(offset_table_offset)
<     object_offsets = []
<     offset_format = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offset_size] * num_objects
<     ref_format = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[ref_size]
<     int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }
<     object_offsets = struct.unpack(offset_format, in_file.read(offset_size * num_objects))
<     def getSize(token_l):
<         """ return the size of the next object."""
<         if token_l == 0xF:
<             m = ord(in_file.read(1)) & 0x3
<             s, f = int_format[m]
<             return struct.unpack(f, in_file.read(s))[0]
<         return token_l
<     def readNextObject(offset):
<         """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """
<         in_file.seek(offset)
<         token = in_file.read(1)
<         token_h, token_l = ord(token) & 0xF0, ord(token) & 0x0F #high and low parts 
<         if token == '\x00':
<             return None
<         elif token == '\x08':
<             return False
<         elif token == '\x09':
<             return True
<         elif token == '\x0f':
<             return ''
<         elif token_h == 0x10: #int
<             result = 0
<             for k in xrange((2 << token_l) - 1):
<                 result = (result << 8) + ord(in_file.read(1))
<             return result
<         elif token_h == 0x20: #real
<             if token_l == 2:
<                 return struct.unpack('>f', in_file.read(4))[0]
<             elif token_l == 3:
<                 return struct.unpack('>d', in_file.read(8))[0]
<         elif token_h == 0x30: #date
<             f = struct.unpack('>d', in_file.read(8))[0]
<             return datetime.datetime.utcfromtimestamp(f + MAC_OS_X_TIME_OFFSET)
<         elif token_h == 0x80: #data
<             s = getSize(token_l)
<             return in_file.read(s)
<         elif token_h == 0x50: #ascii string
<             s = getSize(token_l)
<             return in_file.read(s)
<         elif token_h == 0x60: #unicode string
<             s = getSize(token_l)
<             return in_file.read(s * 2).decode('utf-16be')
<         elif token_h == 0x80: #uid
<             return in_file.read(token_l + 1)
<         elif token_h == 0xA0: #array
<             s = getSize(token_l)
<             obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size))
<             return map(lambda x: readNextObject(object_offsets[x]), obj_refs)
<         elif token_h == 0xC0: #set
<             s = getSize(token_l)
<             obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size))
<             return set(map(lambda x: readNextObject(object_offsets[x]), obj_refs))
<         elif token_h == 0xD0: #dict
<             result = {}
<             s = getSize(token_l)
<             key_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size))
<             obj_refs = struct.unpack('>' + ref_format * s, in_file.read(s * ref_size))
<             for k, o in zip(key_refs, obj_refs):
<                 key = readNextObject(object_offsets[k])
<                 obj = readNextObject(object_offsets[o])
<                 result[key] = obj
<             return result
<         raise InvalidFileException()
<     return readNextObject(object_offsets[top_object])
<

History
Date	User	Action	Args
2012-03-30 21:56:18	d9pouces	set	recipients: + d9pouces, ronaldoussoren
2012-03-30 21:56:18	d9pouces	set	messageid: <1333144578.81.0.343123708942.issue14455@psf.upfronthosting.co.za>
2012-03-30 21:56:18	d9pouces	link	issue14455 messages
2012-03-30 21:56:17	d9pouces	create