diff -r 387dcd8d7dec Lib/plistlib.py --- a/Lib/plistlib.py Fri Mar 30 21:50:40 2012 -0700 +++ b/Lib/plistlib.py Sun Apr 08 10:13:53 2012 +0200 @@ -57,12 +57,16 @@ # Note: the Plist and Dict classes have been deprecated. import binascii +import codecs import datetime from io import BytesIO +import json +import os import re +import struct -def readPlist(pathOrFile): +def readPlist(pathOrFile, fmt=None): """Read a .plist file. 'pathOrFile' may either be a file name or a (readable) file object. Return the unpacked root object (which usually is a dictionary). @@ -72,15 +76,30 @@ if isinstance(pathOrFile, str): pathOrFile = open(pathOrFile, 'rb') didOpen = True - p = PlistParser() - rootObject = p.parse(pathOrFile) + header = pathOrFile.read(8) + pathOrFile.seek(0) + def check_xml_header(header): + for bom in b'', codecs.BOM_UTF8, codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE: + for start in b'', b'") - writer.writeValue(rootObject) - writer.writeln("") + if fmt == None or fmt == 'xml1': + writer = PlistWriter(pathOrFile) + writer.writeln("") + writer.writeValue(rootObject) + writer.writeln("") + elif fmt == 'binary1': + p = BinaryPlistParser() + p.write(rootObject, pathOrFile) + else: + t = json.dumps(rootObject, ensure_ascii=False) + pathOrFile.write(t.encode('utf-8')) finally: if didOpen: pathOrFile.close() -def readPlistFromBytes(data): +def readPlistFromBytes(data, fmt=None): """Read a plist data from a bytes object. Return the root object. """ - return readPlist(BytesIO(data)) + return readPlist(BytesIO(data), fmt=fmt) -def writePlistToBytes(rootObject): +def writePlistToBytes(rootObject, fmt=None): """Return 'rootObject' as a plist-formatted bytes object. """ f = BytesIO() - writePlist(rootObject, f) + writePlist(rootObject, f, fmt=fmt) return f.getvalue() @@ -446,3 +472,218 @@ self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) def end_date(self): self.addObject(_dateFromString(self.getData())) + + + +class InvalidFileException(ValueError): + def __str__(self): + return "Invalid file" + def __unicode__(self): + return "Invalid file" + + +class BinaryPlistParser(object): + """ + Read or write a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c + Raise InvalidFileException in case of error, otherwise return the root object, as usual + """ + _atoms = {0: None, 8: False, 9: True, 15: ''} + + def write(self, rootObject, outFile): + self._outFile = outFile + self._outFile.write(b'bplist00') #header + numObjects = self.countObjects(rootObject) + self._objectOffsets = [] + if numObjects < 1 << 8: self._refSize = 1 + elif numObjects < 1 << 16: self._refSize = 2 + elif numObjects < 1 << 32: self._refSize = 4 + else: self._refSize = 8 + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + self._currentRef = 0 + topObject = self._writeNextObject(rootObject) + offsetTableOffset = self._outFile.tell() + if offsetTableOffset < 1 << 8: offsetSize = 1 + elif offsetTableOffset < 1 << 16: offsetSize = 2 + elif offsetTableOffset < 1 << 32: offsetSize = 4 + else: offsetSize = 8 + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._outFile.write(struct.pack(offsetFormat, *self._objectOffsets)) + trailer = offsetSize, self._refSize, numObjects, topObject, offsetTableOffset + self._outFile.write(struct.pack('>6xBB4xL4xL4xL', *trailer)) + + def _putSize(self, token, size): + if size < 15: + self._outFile.write(struct.pack('>B', token | size)) + elif size < 1 << 8: + self._outFile.write(struct.pack('>BBB', token | 0xF, 0x10, size)) + elif size < 1 << 16: + self._outFile.write(struct.pack('>BBH', token | 0xF, 0x11, size)) + elif size < 1 << 32: + self._outFile.write(struct.pack('>BBL', token | 0xF, 0x12, size)) + else: + self._outFile.write(struct.pack('>BBQ', token | 0xF, 0x13, size)) + + def _addRef(self): + self._objectOffsets.append(self._outFile.tell()) + self._currentRef += 1 + return self._currentRef - 1 + + def _writeNextObject(self, rootObject): + if rootObject == None: + currentRef = self._addRef() + self._outFile.write(b'\x00') + elif isinstance(rootObject, bool) and rootObject == False: + currentRef = self._addRef() + self._outFile.write(b'\x08') + elif isinstance(rootObject, bool) and rootObject == True: + currentRef = self._addRef() + self._outFile.write(b'\x09') + elif isinstance(rootObject, int): + currentRef = self._addRef() + if rootObject < 1 << 8: + self._outFile.write(struct.pack('>BB', 0x10, rootObject)) + elif rootObject < 1 << 16: + self._outFile.write(struct.pack('>BH', 0x11, rootObject)) + elif rootObject < 1 << 32: + self._outFile.write(struct.pack('>BL', 0x12, rootObject)) + else: + self._outFile.write(struct.pack('>BQ', 0x13, rootObject)) + elif isinstance(rootObject, float): + currentRef = self._addRef() + self._outFile.write(struct.pack('>Bd', 0x23, rootObject)) + elif isinstance(rootObject, datetime.datetime): + currentRef = self._addRef() + f = (rootObject - datetime.datetime(2001, 1, 1)).total_seconds() + self._outFile.write(struct.pack('>Bd', 0x33, f)) + elif isinstance(rootObject, Data): + currentRef = self._addRef() + self._putSize(0x40, len(rootObject.data)) + self._outFile.write(rootObject.data) + elif isinstance(rootObject, str): + currentRef = self._addRef() + try: + t = rootObject.encode('ascii') + self._putSize(0x50, len(rootObject)) + except: + t = rootObject.encode('utf-16be') + self._putSize(0x60, len(rootObject)) + self._outFile.write(t) + elif isinstance(rootObject, list) or isinstance(rootObject, tuple): + refs = [] + for v in rootObject: + refs.append(self._writeNextObject(v)) + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xA0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + elif isinstance(rootObject, set) or isinstance(rootObject, frozenset): + refs = [refs.append(self._writeNextObject(v)) for v in rootObject] + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xC0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + elif isinstance(rootObject, dict): + keyRefs, obj_refs = [], [] + for k in sorted(rootObject.keys()): #sorted is required to pass test_bytes + keyRefs.append(self._writeNextObject(k)) + obj_refs.append(self._writeNextObject(rootObject[k])) + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xD0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *keyRefs)) + self._outFile.write(struct.pack('>' + self._refFormat * s, *obj_refs)) + else: + raise InvalidFileException() + return currentRef + + def countObjects(self, rootObject): + """ recursive function to count the total number of objects in the rootObject""" + total = 1 + if isinstance(rootObject, dict): + for k, v in rootObject.items(): + if not isinstance(k, str): + raise InvalidFileException() + total += 1 + self.countObjects(v) + elif isinstance(rootObject, list) or isinstance(rootObject, tuple): + for v in rootObject: + total += self.countObjects(v) + elif isinstance(rootObject, set) or isinstance(rootObject, frozenset): + for v in rootObject: + total += self.countObjects(v) + return total + + def read(self, inFile): + self._inFile = inFile + self._inFile.seek(-32, os.SEEK_END) # go to the trailer + trailer = self._inFile.read(32) + if len(trailer) != 32: + return InvalidFileException() + offsetSize, self._refSize, numObjects, topObject, offsetTableOffset = struct.unpack('>6xBB4xL4xL4xL', trailer) + self._inFile.seek(offsetTableOffset) # go to the table with offsets of all objects + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + self._objectOffsets = struct.unpack(offsetFormat, self._inFile.read(offsetSize * numObjects)) + return self._readNextObject(self._objectOffsets[topObject]) + + def _getSize(self, tokenL): + """ return the size of the next object.""" + if tokenL == 0xF: + m = int.from_bytes(self._inFile.read(1), byteorder='big') & 0x3 + int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }# {log2(bit_number): (bit_number, pattern to use with struct.unpack)} + s, f = int_format[m] + return struct.unpack(f, self._inFile.read(s))[0] + return tokenL + + def _readRefs(self, n): + return struct.unpack('>' + self._refFormat * n, self._inFile.read(n * self._refSize)) + + def _readNextObject(self, offset): + """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """ + self._inFile.seek(offset) + token = self._inFile.read(1)[0] + tokenH, tokenL = token & 0xF0, token & 0x0F #high and low parts + if token == 0x00: + return None + elif token == 0x08: + return False + elif token == 0x09: + return True + elif token == 0x0f: + return '' + #if tokenH == 0x00 and token in self._atoms: + #return self._atoms[token] + elif tokenH == 0x10: #int + return int.from_bytes(self._inFile.read(1 << tokenL), 'big') + elif tokenH == 0x20 and tokenL == 0x02: #real + return struct.unpack('>f', self._inFile.read(4))[0] + elif tokenH == 0x20 and tokenL == 0x03: #real + return struct.unpack('>d', self._inFile.read(8))[0] + elif tokenH == 0x30 and tokenL == 0x03: #date + f = struct.unpack('>d', self._inFile.read(8))[0] + return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400) # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970. + elif tokenH == 0x40: #data + s = self._getSize(tokenL) + return Data(self._inFile.read(s)) + elif tokenH == 0x50: #ascii string + s = self._getSize(tokenL) + return self._inFile.read(s).decode('ascii') + elif tokenH == 0x60: #unicode string + s = self._getSize(tokenL) + return self._inFile.read(s * 2).decode('utf-16be') + elif tokenH == 0x80: #uid + return self._inFile.read(tokenL + 1) + elif tokenH == 0xA0: #array + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return [self._readNextObject(self._objectOffsets[x]) for x in obj_refs] + elif tokenH == 0xC0: #set + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return set(self._readNextObject(self._objectOffsets[x]) for x in obj_refs) + elif tokenH == 0xD0: #dict + s = self._getSize(tokenL) + key_refs = self._readRefs(s) + obj_refs = self._readRefs(s) + return {self._readNextObject(self._objectOffsets[k]): self._readNextObject(self._objectOffsets[o]) for k, o in zip(key_refs, obj_refs)} + raise InvalidFileException() + diff -r 387dcd8d7dec Lib/test/test_plistlib.py --- a/Lib/test/test_plistlib.py Fri Mar 30 21:50:40 2012 -0700 +++ b/Lib/test/test_plistlib.py Sun Apr 08 10:13:53 2012 +0200 @@ -8,7 +8,7 @@ # This test data was generated through Cocoa's NSDictionary class -TESTDATA = b""" +TESTDATA_XML1 = b""" @@ -85,7 +85,8 @@ """.replace(b" " * 8, b"\t") # Apple as well as plistlib.py output hard tabs - +TESTDATA_BINARY1 = b'bplist00\xda\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x13\x1c\x1dVaFloatg\x00\xc5\x00b\x00e\x00n\x00r\x00a\x00aXsomeDataUanIntWaStringUaDateZnestedDataUaList\\someMoreDataUaDict#?\xe0\x00\x00\x00\x00\x00\x00_\x10\x17That was a unicode key.M\x11\x02\xd8VDoodah3A\x9c\xb9}\xf4\x00\x00\x00\xa1\x12O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xa5\x14\x15\x16\x17\x18QAQB\x10\x0c#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a\x1b\x10\x01\x10\x02\x10\x03O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xd5\x1e\x1f !"#+,-.ZdeeperDict]anotherString[aFalseValueZaTrueValue]aUnicodeValue\xd3$%&\'()QaQbQc\x10\x11#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a*Ttext_\x10\x15\x08\tk\x00M\x00\xe4\x00s\x00s\x00i\x00g\x00,\x00 \x00M\x00a\x00\xdf\x00\x08\x00\x1d\x00$\x003\x00<\x00B\x00J\x00P\x00[\x00a\x00n\x00t\x00}\x00\x97\x00\xa5\x00\xa8\x00\xaf\x00\xb8\x00\xba\x01\xb7\x01\xbd\x01\xbf\x01\xc1\x01\xc3\x01\xcc\x01\xd0\x01\xd2\x01\xd4\x01\xd6\x02\xd3\x02\xde\x02\xe9\x02\xf7\x03\x03\x03\x0e\x03\x1c\x03#\x03%\x03\'\x03)\x03+\x034\x038\x03=\x03U\x03V\x03W\x00\x00\x00\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x00/\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03n' +TESTDATA_JSON = b'{"aFloat":0.5,"\xc3\x85benraa":"That was a unicode key.","anInt":728,"aString":"Doodah","aList":["A","B",12,32.5,[1,2,3]],"aDict":{"aTrueValue":true,"deeperDict":{"c":[1,2,"text"],"a":17,"b":32.5},"aFalseValue":false,"aUnicodeValue":"M\xc3\xa4ssig, Ma\xc3\x9f","anotherString":""}}' class TestPlistlib(unittest.TestCase): @@ -95,7 +96,7 @@ except: pass - def _create(self): + def _create(self, fmt=None): pl = dict( aString="Doodah", aList=["A", "B", 12, 32.5, [1, 2, 3]], @@ -108,12 +109,13 @@ aFalseValue=False, deeperDict=dict(a=17, b=32.5, c=[1, 2, "text"]), ), - someData = plistlib.Data(b""), - someMoreData = plistlib.Data(b"\0\1\2\3" * 10), - nestedData = [plistlib.Data(b"\0\1\2\3" * 10)], - aDate = datetime.datetime(2004, 10, 26, 10, 33, 33), ) pl['\xc5benraa'] = "That was a unicode key." + if fmt != 'json': + pl['aDate'] = datetime.datetime(2004, 10, 26, 10, 33, 33) + pl['someData'] = plistlib.Data(b"") + pl['someMoreData'] = plistlib.Data(b"\0\1\2\3" * 10) + pl['nestedData'] = [plistlib.Data(b"\0\1\2\3" * 10)] return pl def test_create(self): @@ -122,38 +124,44 @@ self.assertEqual(pl["aDict"]["aFalseValue"], False) def test_io(self): - pl = self._create() - plistlib.writePlist(pl, support.TESTFN) - pl2 = plistlib.readPlist(support.TESTFN) - self.assertEqual(dict(pl), dict(pl2)) + for fmt in ('xml1', 'json', 'binary1'): + pl = self._create(fmt=fmt) + plistlib.writePlist(pl, support.TESTFN, fmt=fmt) + pl2 = plistlib.readPlist(support.TESTFN) + self.assertEqual(dict(pl), dict(pl2)) def test_bytes(self): - pl = self._create() - data = plistlib.writePlistToBytes(pl) - pl2 = plistlib.readPlistFromBytes(data) - self.assertEqual(dict(pl), dict(pl2)) - data2 = plistlib.writePlistToBytes(pl2) - self.assertEqual(data, data2) + for fmt in ('xml1', 'json', 'binary1'): + pl = self._create(fmt=fmt) + data = plistlib.writePlistToBytes(pl, fmt=fmt) + plistlib.writePlist(pl, 'test_output.xml', fmt='binary1') + pl2 = plistlib.readPlistFromBytes(data) + self.assertEqual(dict(pl), dict(pl2)) + data2 = plistlib.writePlistToBytes(pl2, fmt=fmt) + self.assertEqual(data, data2) def test_appleformatting(self): - pl = plistlib.readPlistFromBytes(TESTDATA) - data = plistlib.writePlistToBytes(pl) - self.assertEqual(data, TESTDATA, + pl = plistlib.readPlistFromBytes(TESTDATA_XML1) + data = plistlib.writePlistToBytes(pl, fmt='xml1') + self.assertEqual(data, TESTDATA_XML1, "generated data was not identical to Apple's output") + pl = plistlib.readPlistFromBytes(TESTDATA_JSON) def test_appleformattingfromliteral(self): - pl = self._create() - pl2 = plistlib.readPlistFromBytes(TESTDATA) - self.assertEqual(dict(pl), dict(pl2), - "generated data was not identical to Apple's output") + for fmt, data in ('xml1', TESTDATA_XML1), ('binary1', TESTDATA_BINARY1), ('json', TESTDATA_JSON): + pl = self._create(fmt=fmt) + pl2 = plistlib.readPlistFromBytes(data) + self.assertEqual(dict(pl), dict(pl2), + "generated data was not identical to Apple's output") def test_bytesio(self): from io import BytesIO - b = BytesIO() - pl = self._create() - plistlib.writePlist(pl, b) - pl2 = plistlib.readPlist(BytesIO(b.getvalue())) - self.assertEqual(dict(pl), dict(pl2)) + for fmt in ('xml1', 'json', 'binary1'): + b = BytesIO() + pl = self._create(fmt=fmt) + plistlib.writePlist(pl, b, fmt=fmt) + pl2 = plistlib.readPlist(BytesIO(b.getvalue())) + self.assertEqual(dict(pl), dict(pl2)) def test_controlcharacters(self): for i in range(128): @@ -161,19 +169,20 @@ testString = "string containing %s" % c if i >= 32 or c in "\r\n\t": # \r, \n and \t are the only legal control chars in XML - plistlib.writePlistToBytes(testString) + plistlib.writePlistToBytes(testString, fmt='xml1') else: self.assertRaises(ValueError, plistlib.writePlistToBytes, testString) def test_nondictroot(self): - test1 = "abc" - test2 = [1, 2, 3, "abc"] - result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1)) - result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2)) - self.assertEqual(test1, result1) - self.assertEqual(test2, result2) + for fmt in ('xml1', 'json', 'binary1'): + test1 = "abc" + test2 = [1, 2, 3, "abc"] + result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1, fmt=fmt)) + result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2, fmt=fmt)) + self.assertEqual(test1, result1) + self.assertEqual(test2, result2) def test_invalidarray(self): for i in ["key inside an array",