diff -r 387dcd8d7dec Lib/plistlib.py
--- a/Lib/plistlib.py Fri Mar 30 21:50:40 2012 -0700
+++ b/Lib/plistlib.py Sun Apr 08 10:13:53 2012 +0200
@@ -57,12 +57,16 @@
# Note: the Plist and Dict classes have been deprecated.
import binascii
+import codecs
import datetime
from io import BytesIO
+import json
+import os
import re
+import struct
-def readPlist(pathOrFile):
+def readPlist(pathOrFile, fmt=None):
"""Read a .plist file. 'pathOrFile' may either be a file name or a
(readable) file object. Return the unpacked root object (which
usually is a dictionary).
@@ -72,15 +76,30 @@
if isinstance(pathOrFile, str):
pathOrFile = open(pathOrFile, 'rb')
didOpen = True
- p = PlistParser()
- rootObject = p.parse(pathOrFile)
+ header = pathOrFile.read(8)
+ pathOrFile.seek(0)
+ def check_xml_header(header):
+ for bom in b'', codecs.BOM_UTF8, codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE:
+ for start in b'', b'")
- writer.writeValue(rootObject)
- writer.writeln("")
+ if fmt == None or fmt == 'xml1':
+ writer = PlistWriter(pathOrFile)
+ writer.writeln("")
+ writer.writeValue(rootObject)
+ writer.writeln("")
+ elif fmt == 'binary1':
+ p = BinaryPlistParser()
+ p.write(rootObject, pathOrFile)
+ else:
+ t = json.dumps(rootObject, ensure_ascii=False)
+ pathOrFile.write(t.encode('utf-8'))
finally:
if didOpen:
pathOrFile.close()
-def readPlistFromBytes(data):
+def readPlistFromBytes(data, fmt=None):
"""Read a plist data from a bytes object. Return the root object.
"""
- return readPlist(BytesIO(data))
+ return readPlist(BytesIO(data), fmt=fmt)
-def writePlistToBytes(rootObject):
+def writePlistToBytes(rootObject, fmt=None):
"""Return 'rootObject' as a plist-formatted bytes object.
"""
f = BytesIO()
- writePlist(rootObject, f)
+ writePlist(rootObject, f, fmt=fmt)
return f.getvalue()
@@ -446,3 +472,218 @@
self.addObject(Data.fromBase64(self.getData().encode("utf-8")))
def end_date(self):
self.addObject(_dateFromString(self.getData()))
+
+
+
+class InvalidFileException(ValueError):
+ def __str__(self):
+ return "Invalid file"
+ def __unicode__(self):
+ return "Invalid file"
+
+
+class BinaryPlistParser(object):
+ """
+ Read or write a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
+ Raise InvalidFileException in case of error, otherwise return the root object, as usual
+ """
+ _atoms = {0: None, 8: False, 9: True, 15: ''}
+
+ def write(self, rootObject, outFile):
+ self._outFile = outFile
+ self._outFile.write(b'bplist00') #header
+ numObjects = self.countObjects(rootObject)
+ self._objectOffsets = []
+ if numObjects < 1 << 8: self._refSize = 1
+ elif numObjects < 1 << 16: self._refSize = 2
+ elif numObjects < 1 << 32: self._refSize = 4
+ else: self._refSize = 8
+ self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize]
+ self._currentRef = 0
+ topObject = self._writeNextObject(rootObject)
+ offsetTableOffset = self._outFile.tell()
+ if offsetTableOffset < 1 << 8: offsetSize = 1
+ elif offsetTableOffset < 1 << 16: offsetSize = 2
+ elif offsetTableOffset < 1 << 32: offsetSize = 4
+ else: offsetSize = 8
+ offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects
+ self._outFile.write(struct.pack(offsetFormat, *self._objectOffsets))
+ trailer = offsetSize, self._refSize, numObjects, topObject, offsetTableOffset
+ self._outFile.write(struct.pack('>6xBB4xL4xL4xL', *trailer))
+
+ def _putSize(self, token, size):
+ if size < 15:
+ self._outFile.write(struct.pack('>B', token | size))
+ elif size < 1 << 8:
+ self._outFile.write(struct.pack('>BBB', token | 0xF, 0x10, size))
+ elif size < 1 << 16:
+ self._outFile.write(struct.pack('>BBH', token | 0xF, 0x11, size))
+ elif size < 1 << 32:
+ self._outFile.write(struct.pack('>BBL', token | 0xF, 0x12, size))
+ else:
+ self._outFile.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
+
+ def _addRef(self):
+ self._objectOffsets.append(self._outFile.tell())
+ self._currentRef += 1
+ return self._currentRef - 1
+
+ def _writeNextObject(self, rootObject):
+ if rootObject == None:
+ currentRef = self._addRef()
+ self._outFile.write(b'\x00')
+ elif isinstance(rootObject, bool) and rootObject == False:
+ currentRef = self._addRef()
+ self._outFile.write(b'\x08')
+ elif isinstance(rootObject, bool) and rootObject == True:
+ currentRef = self._addRef()
+ self._outFile.write(b'\x09')
+ elif isinstance(rootObject, int):
+ currentRef = self._addRef()
+ if rootObject < 1 << 8:
+ self._outFile.write(struct.pack('>BB', 0x10, rootObject))
+ elif rootObject < 1 << 16:
+ self._outFile.write(struct.pack('>BH', 0x11, rootObject))
+ elif rootObject < 1 << 32:
+ self._outFile.write(struct.pack('>BL', 0x12, rootObject))
+ else:
+ self._outFile.write(struct.pack('>BQ', 0x13, rootObject))
+ elif isinstance(rootObject, float):
+ currentRef = self._addRef()
+ self._outFile.write(struct.pack('>Bd', 0x23, rootObject))
+ elif isinstance(rootObject, datetime.datetime):
+ currentRef = self._addRef()
+ f = (rootObject - datetime.datetime(2001, 1, 1)).total_seconds()
+ self._outFile.write(struct.pack('>Bd', 0x33, f))
+ elif isinstance(rootObject, Data):
+ currentRef = self._addRef()
+ self._putSize(0x40, len(rootObject.data))
+ self._outFile.write(rootObject.data)
+ elif isinstance(rootObject, str):
+ currentRef = self._addRef()
+ try:
+ t = rootObject.encode('ascii')
+ self._putSize(0x50, len(rootObject))
+ except:
+ t = rootObject.encode('utf-16be')
+ self._putSize(0x60, len(rootObject))
+ self._outFile.write(t)
+ elif isinstance(rootObject, list) or isinstance(rootObject, tuple):
+ refs = []
+ for v in rootObject:
+ refs.append(self._writeNextObject(v))
+ s = len(rootObject)
+ currentRef = self._addRef()
+ self._putSize(0xA0, s)
+ self._outFile.write(struct.pack('>' + self._refFormat * s, *refs))
+ elif isinstance(rootObject, set) or isinstance(rootObject, frozenset):
+ refs = [refs.append(self._writeNextObject(v)) for v in rootObject]
+ s = len(rootObject)
+ currentRef = self._addRef()
+ self._putSize(0xC0, s)
+ self._outFile.write(struct.pack('>' + self._refFormat * s, *refs))
+ elif isinstance(rootObject, dict):
+ keyRefs, obj_refs = [], []
+ for k in sorted(rootObject.keys()): #sorted is required to pass test_bytes
+ keyRefs.append(self._writeNextObject(k))
+ obj_refs.append(self._writeNextObject(rootObject[k]))
+ s = len(rootObject)
+ currentRef = self._addRef()
+ self._putSize(0xD0, s)
+ self._outFile.write(struct.pack('>' + self._refFormat * s, *keyRefs))
+ self._outFile.write(struct.pack('>' + self._refFormat * s, *obj_refs))
+ else:
+ raise InvalidFileException()
+ return currentRef
+
+ def countObjects(self, rootObject):
+ """ recursive function to count the total number of objects in the rootObject"""
+ total = 1
+ if isinstance(rootObject, dict):
+ for k, v in rootObject.items():
+ if not isinstance(k, str):
+ raise InvalidFileException()
+ total += 1 + self.countObjects(v)
+ elif isinstance(rootObject, list) or isinstance(rootObject, tuple):
+ for v in rootObject:
+ total += self.countObjects(v)
+ elif isinstance(rootObject, set) or isinstance(rootObject, frozenset):
+ for v in rootObject:
+ total += self.countObjects(v)
+ return total
+
+ def read(self, inFile):
+ self._inFile = inFile
+ self._inFile.seek(-32, os.SEEK_END) # go to the trailer
+ trailer = self._inFile.read(32)
+ if len(trailer) != 32:
+ return InvalidFileException()
+ offsetSize, self._refSize, numObjects, topObject, offsetTableOffset = struct.unpack('>6xBB4xL4xL4xL', trailer)
+ self._inFile.seek(offsetTableOffset) # go to the table with offsets of all objects
+ offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects
+ self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize]
+ self._objectOffsets = struct.unpack(offsetFormat, self._inFile.read(offsetSize * numObjects))
+ return self._readNextObject(self._objectOffsets[topObject])
+
+ def _getSize(self, tokenL):
+ """ return the size of the next object."""
+ if tokenL == 0xF:
+ m = int.from_bytes(self._inFile.read(1), byteorder='big') & 0x3
+ int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }# {log2(bit_number): (bit_number, pattern to use with struct.unpack)}
+ s, f = int_format[m]
+ return struct.unpack(f, self._inFile.read(s))[0]
+ return tokenL
+
+ def _readRefs(self, n):
+ return struct.unpack('>' + self._refFormat * n, self._inFile.read(n * self._refSize))
+
+ def _readNextObject(self, offset):
+ """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """
+ self._inFile.seek(offset)
+ token = self._inFile.read(1)[0]
+ tokenH, tokenL = token & 0xF0, token & 0x0F #high and low parts
+ if token == 0x00:
+ return None
+ elif token == 0x08:
+ return False
+ elif token == 0x09:
+ return True
+ elif token == 0x0f:
+ return ''
+ #if tokenH == 0x00 and token in self._atoms:
+ #return self._atoms[token]
+ elif tokenH == 0x10: #int
+ return int.from_bytes(self._inFile.read(1 << tokenL), 'big')
+ elif tokenH == 0x20 and tokenL == 0x02: #real
+ return struct.unpack('>f', self._inFile.read(4))[0]
+ elif tokenH == 0x20 and tokenL == 0x03: #real
+ return struct.unpack('>d', self._inFile.read(8))[0]
+ elif tokenH == 0x30 and tokenL == 0x03: #date
+ f = struct.unpack('>d', self._inFile.read(8))[0]
+ return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400) # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970.
+ elif tokenH == 0x40: #data
+ s = self._getSize(tokenL)
+ return Data(self._inFile.read(s))
+ elif tokenH == 0x50: #ascii string
+ s = self._getSize(tokenL)
+ return self._inFile.read(s).decode('ascii')
+ elif tokenH == 0x60: #unicode string
+ s = self._getSize(tokenL)
+ return self._inFile.read(s * 2).decode('utf-16be')
+ elif tokenH == 0x80: #uid
+ return self._inFile.read(tokenL + 1)
+ elif tokenH == 0xA0: #array
+ s = self._getSize(tokenL)
+ obj_refs = self._readRefs(s)
+ return [self._readNextObject(self._objectOffsets[x]) for x in obj_refs]
+ elif tokenH == 0xC0: #set
+ s = self._getSize(tokenL)
+ obj_refs = self._readRefs(s)
+ return set(self._readNextObject(self._objectOffsets[x]) for x in obj_refs)
+ elif tokenH == 0xD0: #dict
+ s = self._getSize(tokenL)
+ key_refs = self._readRefs(s)
+ obj_refs = self._readRefs(s)
+ return {self._readNextObject(self._objectOffsets[k]): self._readNextObject(self._objectOffsets[o]) for k, o in zip(key_refs, obj_refs)}
+ raise InvalidFileException()
+
diff -r 387dcd8d7dec Lib/test/test_plistlib.py
--- a/Lib/test/test_plistlib.py Fri Mar 30 21:50:40 2012 -0700
+++ b/Lib/test/test_plistlib.py Sun Apr 08 10:13:53 2012 +0200
@@ -8,7 +8,7 @@
# This test data was generated through Cocoa's NSDictionary class
-TESTDATA = b"""
+TESTDATA_XML1 = b"""
@@ -85,7 +85,8 @@
""".replace(b" " * 8, b"\t") # Apple as well as plistlib.py output hard tabs
-
+TESTDATA_BINARY1 = b'bplist00\xda\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x13\x1c\x1dVaFloatg\x00\xc5\x00b\x00e\x00n\x00r\x00a\x00aXsomeDataUanIntWaStringUaDateZnestedDataUaList\\someMoreDataUaDict#?\xe0\x00\x00\x00\x00\x00\x00_\x10\x17That was a unicode key.M\x11\x02\xd8VDoodah3A\x9c\xb9}\xf4\x00\x00\x00\xa1\x12O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xa5\x14\x15\x16\x17\x18QAQB\x10\x0c#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a\x1b\x10\x01\x10\x02\x10\x03O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xd5\x1e\x1f !"#+,-.ZdeeperDict]anotherString[aFalseValueZaTrueValue]aUnicodeValue\xd3$%&\'()QaQbQc\x10\x11#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a*Ttext_\x10\x15\x08\tk\x00M\x00\xe4\x00s\x00s\x00i\x00g\x00,\x00 \x00M\x00a\x00\xdf\x00\x08\x00\x1d\x00$\x003\x00<\x00B\x00J\x00P\x00[\x00a\x00n\x00t\x00}\x00\x97\x00\xa5\x00\xa8\x00\xaf\x00\xb8\x00\xba\x01\xb7\x01\xbd\x01\xbf\x01\xc1\x01\xc3\x01\xcc\x01\xd0\x01\xd2\x01\xd4\x01\xd6\x02\xd3\x02\xde\x02\xe9\x02\xf7\x03\x03\x03\x0e\x03\x1c\x03#\x03%\x03\'\x03)\x03+\x034\x038\x03=\x03U\x03V\x03W\x00\x00\x00\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x00/\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03n'
+TESTDATA_JSON = b'{"aFloat":0.5,"\xc3\x85benraa":"That was a unicode key.","anInt":728,"aString":"Doodah","aList":["A","B",12,32.5,[1,2,3]],"aDict":{"aTrueValue":true,"deeperDict":{"c":[1,2,"text"],"a":17,"b":32.5},"aFalseValue":false,"aUnicodeValue":"M\xc3\xa4ssig, Ma\xc3\x9f","anotherString":""}}'
class TestPlistlib(unittest.TestCase):
@@ -95,7 +96,7 @@
except:
pass
- def _create(self):
+ def _create(self, fmt=None):
pl = dict(
aString="Doodah",
aList=["A", "B", 12, 32.5, [1, 2, 3]],
@@ -108,12 +109,13 @@
aFalseValue=False,
deeperDict=dict(a=17, b=32.5, c=[1, 2, "text"]),
),
- someData = plistlib.Data(b""),
- someMoreData = plistlib.Data(b"\0\1\2\3" * 10),
- nestedData = [plistlib.Data(b"\0\1\2\3" * 10)],
- aDate = datetime.datetime(2004, 10, 26, 10, 33, 33),
)
pl['\xc5benraa'] = "That was a unicode key."
+ if fmt != 'json':
+ pl['aDate'] = datetime.datetime(2004, 10, 26, 10, 33, 33)
+ pl['someData'] = plistlib.Data(b"")
+ pl['someMoreData'] = plistlib.Data(b"\0\1\2\3" * 10)
+ pl['nestedData'] = [plistlib.Data(b"\0\1\2\3" * 10)]
return pl
def test_create(self):
@@ -122,38 +124,44 @@
self.assertEqual(pl["aDict"]["aFalseValue"], False)
def test_io(self):
- pl = self._create()
- plistlib.writePlist(pl, support.TESTFN)
- pl2 = plistlib.readPlist(support.TESTFN)
- self.assertEqual(dict(pl), dict(pl2))
+ for fmt in ('xml1', 'json', 'binary1'):
+ pl = self._create(fmt=fmt)
+ plistlib.writePlist(pl, support.TESTFN, fmt=fmt)
+ pl2 = plistlib.readPlist(support.TESTFN)
+ self.assertEqual(dict(pl), dict(pl2))
def test_bytes(self):
- pl = self._create()
- data = plistlib.writePlistToBytes(pl)
- pl2 = plistlib.readPlistFromBytes(data)
- self.assertEqual(dict(pl), dict(pl2))
- data2 = plistlib.writePlistToBytes(pl2)
- self.assertEqual(data, data2)
+ for fmt in ('xml1', 'json', 'binary1'):
+ pl = self._create(fmt=fmt)
+ data = plistlib.writePlistToBytes(pl, fmt=fmt)
+ plistlib.writePlist(pl, 'test_output.xml', fmt='binary1')
+ pl2 = plistlib.readPlistFromBytes(data)
+ self.assertEqual(dict(pl), dict(pl2))
+ data2 = plistlib.writePlistToBytes(pl2, fmt=fmt)
+ self.assertEqual(data, data2)
def test_appleformatting(self):
- pl = plistlib.readPlistFromBytes(TESTDATA)
- data = plistlib.writePlistToBytes(pl)
- self.assertEqual(data, TESTDATA,
+ pl = plistlib.readPlistFromBytes(TESTDATA_XML1)
+ data = plistlib.writePlistToBytes(pl, fmt='xml1')
+ self.assertEqual(data, TESTDATA_XML1,
"generated data was not identical to Apple's output")
+ pl = plistlib.readPlistFromBytes(TESTDATA_JSON)
def test_appleformattingfromliteral(self):
- pl = self._create()
- pl2 = plistlib.readPlistFromBytes(TESTDATA)
- self.assertEqual(dict(pl), dict(pl2),
- "generated data was not identical to Apple's output")
+ for fmt, data in ('xml1', TESTDATA_XML1), ('binary1', TESTDATA_BINARY1), ('json', TESTDATA_JSON):
+ pl = self._create(fmt=fmt)
+ pl2 = plistlib.readPlistFromBytes(data)
+ self.assertEqual(dict(pl), dict(pl2),
+ "generated data was not identical to Apple's output")
def test_bytesio(self):
from io import BytesIO
- b = BytesIO()
- pl = self._create()
- plistlib.writePlist(pl, b)
- pl2 = plistlib.readPlist(BytesIO(b.getvalue()))
- self.assertEqual(dict(pl), dict(pl2))
+ for fmt in ('xml1', 'json', 'binary1'):
+ b = BytesIO()
+ pl = self._create(fmt=fmt)
+ plistlib.writePlist(pl, b, fmt=fmt)
+ pl2 = plistlib.readPlist(BytesIO(b.getvalue()))
+ self.assertEqual(dict(pl), dict(pl2))
def test_controlcharacters(self):
for i in range(128):
@@ -161,19 +169,20 @@
testString = "string containing %s" % c
if i >= 32 or c in "\r\n\t":
# \r, \n and \t are the only legal control chars in XML
- plistlib.writePlistToBytes(testString)
+ plistlib.writePlistToBytes(testString, fmt='xml1')
else:
self.assertRaises(ValueError,
plistlib.writePlistToBytes,
testString)
def test_nondictroot(self):
- test1 = "abc"
- test2 = [1, 2, 3, "abc"]
- result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1))
- result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2))
- self.assertEqual(test1, result1)
- self.assertEqual(test2, result2)
+ for fmt in ('xml1', 'json', 'binary1'):
+ test1 = "abc"
+ test2 = [1, 2, 3, "abc"]
+ result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1, fmt=fmt))
+ result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2, fmt=fmt))
+ self.assertEqual(test1, result1)
+ self.assertEqual(test2, result2)
def test_invalidarray(self):
for i in ["key inside an array",