diff -r 387dcd8d7dec Lib/plistlib.py
--- a/Lib/plistlib.py	Fri Mar 30 21:50:40 2012 -0700
+++ b/Lib/plistlib.py	Sun Apr 08 10:13:53 2012 +0200
@@ -57,12 +57,16 @@
 # Note: the Plist and Dict classes have been deprecated.
 
 import binascii
+import codecs
 import datetime
 from io import BytesIO
+import json
+import os
 import re
+import struct
 
 
-def readPlist(pathOrFile):
+def readPlist(pathOrFile, fmt=None):
     """Read a .plist file. 'pathOrFile' may either be a file name or a
     (readable) file object. Return the unpacked root object (which
     usually is a dictionary).
@@ -72,15 +76,30 @@
         if isinstance(pathOrFile, str):
             pathOrFile = open(pathOrFile, 'rb')
             didOpen = True
-        p = PlistParser()
-        rootObject = p.parse(pathOrFile)
+        header = pathOrFile.read(8)
+        pathOrFile.seek(0)
+        def check_xml_header(header):
+            for bom in b'', codecs.BOM_UTF8, codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE:
+                for start in b'<?xml v', b'<plist>', b'<plist ':
+                    if header == bom + start:
+                        return True
+            return False
+        if fmt == 'xml1' or (fmt == None and check_xml_header(header[0:7])):
+            p = PlistParser()
+            rootObject = p.parse(pathOrFile)
+        elif fmt == 'binary1' or (fmt == None and header == b'bplist00'): #binary plist file
+            p = BinaryPlistParser()
+            rootObject = p.read(pathOrFile)
+        else:
+            t = pathOrFile.read().decode('utf-8')
+            rootObject = json.loads(t)
     finally:
         if didOpen:
             pathOrFile.close()
     return rootObject
 
 
-def writePlist(rootObject, pathOrFile):
+def writePlist(rootObject, pathOrFile, fmt=None):
     """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a
     file name or a (writable) file object.
     """
@@ -89,26 +108,33 @@
         if isinstance(pathOrFile, str):
             pathOrFile = open(pathOrFile, 'wb')
             didOpen = True
-        writer = PlistWriter(pathOrFile)
-        writer.writeln("<plist version=\"1.0\">")
-        writer.writeValue(rootObject)
-        writer.writeln("</plist>")
+        if fmt == None or fmt == 'xml1':
+            writer = PlistWriter(pathOrFile)
+            writer.writeln("<plist version=\"1.0\">")
+            writer.writeValue(rootObject)
+            writer.writeln("</plist>")
+        elif fmt == 'binary1':
+            p = BinaryPlistParser()
+            p.write(rootObject, pathOrFile)
+        else:
+            t = json.dumps(rootObject, ensure_ascii=False)
+            pathOrFile.write(t.encode('utf-8'))
     finally:
         if didOpen:
             pathOrFile.close()
 
 
-def readPlistFromBytes(data):
+def readPlistFromBytes(data, fmt=None):
     """Read a plist data from a bytes object. Return the root object.
     """
-    return readPlist(BytesIO(data))
+    return readPlist(BytesIO(data), fmt=fmt)
 
 
-def writePlistToBytes(rootObject):
+def writePlistToBytes(rootObject, fmt=None):
     """Return 'rootObject' as a plist-formatted bytes object.
     """
     f = BytesIO()
-    writePlist(rootObject, f)
+    writePlist(rootObject, f, fmt=fmt)
     return f.getvalue()
 
 
@@ -446,3 +472,218 @@
         self.addObject(Data.fromBase64(self.getData().encode("utf-8")))
     def end_date(self):
         self.addObject(_dateFromString(self.getData()))
+
+
+
+class InvalidFileException(ValueError):
+    def __str__(self):
+        return "Invalid file"
+    def __unicode__(self):
+        return "Invalid file"
+
+
+class BinaryPlistParser(object):
+    """
+    Read or write a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c
+    Raise InvalidFileException in case of error, otherwise return the root object, as usual
+    """
+    _atoms = {0: None, 8: False, 9: True, 15: ''}
+
+    def write(self, rootObject, outFile):
+        self._outFile = outFile
+        self._outFile.write(b'bplist00') #header
+        numObjects = self.countObjects(rootObject)
+        self._objectOffsets = []
+        if numObjects < 1 << 8: self._refSize = 1
+        elif numObjects < 1 << 16: self._refSize = 2
+        elif numObjects < 1 << 32: self._refSize = 4
+        else: self._refSize = 8
+        self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize]
+        self._currentRef = 0
+        topObject = self._writeNextObject(rootObject)
+        offsetTableOffset = self._outFile.tell()
+        if offsetTableOffset < 1 << 8: offsetSize = 1
+        elif offsetTableOffset < 1 << 16: offsetSize = 2
+        elif offsetTableOffset < 1 << 32: offsetSize = 4
+        else: offsetSize = 8
+        offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects
+        self._outFile.write(struct.pack(offsetFormat, *self._objectOffsets))
+        trailer = offsetSize, self._refSize, numObjects, topObject, offsetTableOffset
+        self._outFile.write(struct.pack('>6xBB4xL4xL4xL', *trailer))
+
+    def _putSize(self, token, size):
+        if size < 15:
+            self._outFile.write(struct.pack('>B', token | size))
+        elif size < 1 << 8:
+            self._outFile.write(struct.pack('>BBB', token | 0xF, 0x10, size))
+        elif size < 1 << 16:
+            self._outFile.write(struct.pack('>BBH', token | 0xF, 0x11, size))
+        elif size < 1 << 32:
+            self._outFile.write(struct.pack('>BBL', token | 0xF, 0x12, size))
+        else:
+            self._outFile.write(struct.pack('>BBQ', token | 0xF, 0x13, size))
+
+    def _addRef(self):
+        self._objectOffsets.append(self._outFile.tell())
+        self._currentRef += 1
+        return self._currentRef - 1
+
+    def _writeNextObject(self, rootObject):
+        if rootObject == None:
+            currentRef = self._addRef()
+            self._outFile.write(b'\x00')
+        elif isinstance(rootObject, bool) and rootObject == False:
+            currentRef = self._addRef()
+            self._outFile.write(b'\x08')
+        elif isinstance(rootObject, bool) and rootObject == True:
+            currentRef = self._addRef()
+            self._outFile.write(b'\x09')
+        elif isinstance(rootObject, int):
+            currentRef = self._addRef()
+            if rootObject < 1 << 8:
+                self._outFile.write(struct.pack('>BB', 0x10, rootObject))
+            elif rootObject < 1 << 16:
+                self._outFile.write(struct.pack('>BH', 0x11, rootObject))
+            elif rootObject < 1 << 32:
+                self._outFile.write(struct.pack('>BL', 0x12, rootObject))
+            else:
+                self._outFile.write(struct.pack('>BQ', 0x13, rootObject))
+        elif isinstance(rootObject, float):
+            currentRef = self._addRef()
+            self._outFile.write(struct.pack('>Bd', 0x23, rootObject))
+        elif isinstance(rootObject, datetime.datetime):
+            currentRef = self._addRef()
+            f = (rootObject - datetime.datetime(2001, 1, 1)).total_seconds()
+            self._outFile.write(struct.pack('>Bd', 0x33, f))
+        elif isinstance(rootObject, Data):
+            currentRef = self._addRef()
+            self._putSize(0x40, len(rootObject.data))
+            self._outFile.write(rootObject.data)
+        elif isinstance(rootObject, str):
+            currentRef = self._addRef()
+            try:
+                t = rootObject.encode('ascii')
+                self._putSize(0x50, len(rootObject))
+            except:
+                t = rootObject.encode('utf-16be')
+                self._putSize(0x60, len(rootObject))
+            self._outFile.write(t)
+        elif isinstance(rootObject, list) or isinstance(rootObject, tuple):
+            refs = []
+            for v in rootObject:
+                refs.append(self._writeNextObject(v))
+            s = len(rootObject)
+            currentRef = self._addRef()
+            self._putSize(0xA0, s)
+            self._outFile.write(struct.pack('>' + self._refFormat * s, *refs))
+        elif isinstance(rootObject, set) or isinstance(rootObject, frozenset):
+            refs = [refs.append(self._writeNextObject(v)) for v in rootObject]
+            s = len(rootObject)
+            currentRef = self._addRef()
+            self._putSize(0xC0, s)
+            self._outFile.write(struct.pack('>' + self._refFormat * s, *refs))
+        elif isinstance(rootObject, dict):
+            keyRefs, obj_refs = [], []
+            for k in sorted(rootObject.keys()): #sorted is required to pass test_bytes
+                keyRefs.append(self._writeNextObject(k))
+                obj_refs.append(self._writeNextObject(rootObject[k]))
+            s = len(rootObject)
+            currentRef = self._addRef()
+            self._putSize(0xD0, s)
+            self._outFile.write(struct.pack('>' + self._refFormat * s, *keyRefs))
+            self._outFile.write(struct.pack('>' + self._refFormat * s, *obj_refs))
+        else:
+            raise InvalidFileException()
+        return currentRef
+
+    def countObjects(self, rootObject):
+        """ recursive function to count the total number of objects in the rootObject"""
+        total = 1
+        if isinstance(rootObject, dict):
+            for k, v in rootObject.items():
+                if not isinstance(k, str):
+                    raise InvalidFileException()
+                total += 1 + self.countObjects(v)
+        elif isinstance(rootObject, list) or isinstance(rootObject, tuple):
+            for v in rootObject:
+                total += self.countObjects(v)
+        elif isinstance(rootObject, set) or isinstance(rootObject, frozenset):
+            for v in rootObject:
+                total += self.countObjects(v)
+        return total
+
+    def read(self, inFile):
+        self._inFile = inFile
+        self._inFile.seek(-32, os.SEEK_END) # go to the trailer
+        trailer = self._inFile.read(32)
+        if len(trailer) != 32:
+            return InvalidFileException()
+        offsetSize, self._refSize, numObjects, topObject, offsetTableOffset = struct.unpack('>6xBB4xL4xL4xL', trailer)
+        self._inFile.seek(offsetTableOffset) # go to the table with offsets of all objects
+        offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects
+        self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize]
+        self._objectOffsets = struct.unpack(offsetFormat, self._inFile.read(offsetSize * numObjects))
+        return self._readNextObject(self._objectOffsets[topObject])
+
+    def _getSize(self, tokenL):
+        """ return the size of the next object."""
+        if tokenL == 0xF:
+            m = int.from_bytes(self._inFile.read(1), byteorder='big') & 0x3
+            int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }# {log2(bit_number): (bit_number, pattern to use with struct.unpack)}
+            s, f = int_format[m]
+            return struct.unpack(f, self._inFile.read(s))[0]
+        return tokenL
+
+    def _readRefs(self, n):
+        return struct.unpack('>' + self._refFormat * n, self._inFile.read(n * self._refSize))
+
+    def _readNextObject(self, offset):
+        """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """
+        self._inFile.seek(offset)
+        token = self._inFile.read(1)[0]
+        tokenH, tokenL = token & 0xF0, token & 0x0F #high and low parts 
+        if token == 0x00:
+            return None
+        elif token == 0x08:
+            return False
+        elif token == 0x09:
+            return True
+        elif token == 0x0f:
+            return ''
+        #if tokenH == 0x00 and token in self._atoms:
+            #return self._atoms[token]
+        elif tokenH == 0x10: #int
+            return int.from_bytes(self._inFile.read(1 << tokenL), 'big')
+        elif tokenH == 0x20 and tokenL == 0x02: #real
+            return struct.unpack('>f', self._inFile.read(4))[0]
+        elif tokenH == 0x20 and tokenL == 0x03: #real
+            return struct.unpack('>d', self._inFile.read(8))[0]
+        elif tokenH == 0x30 and tokenL == 0x03: #date
+            f = struct.unpack('>d', self._inFile.read(8))[0]
+            return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400) # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970.
+        elif tokenH == 0x40: #data
+            s = self._getSize(tokenL)
+            return Data(self._inFile.read(s))
+        elif tokenH == 0x50: #ascii string
+            s = self._getSize(tokenL)
+            return self._inFile.read(s).decode('ascii')
+        elif tokenH == 0x60: #unicode string
+            s = self._getSize(tokenL)
+            return self._inFile.read(s * 2).decode('utf-16be')
+        elif tokenH == 0x80: #uid
+            return self._inFile.read(tokenL + 1)
+        elif tokenH == 0xA0: #array
+            s = self._getSize(tokenL)
+            obj_refs = self._readRefs(s)
+            return [self._readNextObject(self._objectOffsets[x]) for x in obj_refs]
+        elif tokenH == 0xC0: #set
+            s = self._getSize(tokenL)
+            obj_refs = self._readRefs(s)
+            return set(self._readNextObject(self._objectOffsets[x]) for x in obj_refs)
+        elif tokenH == 0xD0: #dict
+            s = self._getSize(tokenL)
+            key_refs = self._readRefs(s)
+            obj_refs = self._readRefs(s)
+            return {self._readNextObject(self._objectOffsets[k]): self._readNextObject(self._objectOffsets[o]) for k, o in zip(key_refs, obj_refs)}
+        raise InvalidFileException()
+
diff -r 387dcd8d7dec Lib/test/test_plistlib.py
--- a/Lib/test/test_plistlib.py	Fri Mar 30 21:50:40 2012 -0700
+++ b/Lib/test/test_plistlib.py	Sun Apr 08 10:13:53 2012 +0200
@@ -8,7 +8,7 @@
 
 
 # This test data was generated through Cocoa's NSDictionary class
-TESTDATA = b"""<?xml version="1.0" encoding="UTF-8"?>
+TESTDATA_XML1 = b"""<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" \
 "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
@@ -85,7 +85,8 @@
 </dict>
 </plist>
 """.replace(b" " * 8, b"\t")  # Apple as well as plistlib.py output hard tabs
-
+TESTDATA_BINARY1 = b'bplist00\xda\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x13\x1c\x1dVaFloatg\x00\xc5\x00b\x00e\x00n\x00r\x00a\x00aXsomeDataUanIntWaStringUaDateZnestedDataUaList\\someMoreDataUaDict#?\xe0\x00\x00\x00\x00\x00\x00_\x10\x17That was a unicode key.M<binary gunk>\x11\x02\xd8VDoodah3A\x9c\xb9}\xf4\x00\x00\x00\xa1\x12O\x10\xfa<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03\xa5\x14\x15\x16\x17\x18QAQB\x10\x0c#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a\x1b\x10\x01\x10\x02\x10\x03O\x10\xfa<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03<lots of binary gunk>\x00\x01\x02\x03\xd5\x1e\x1f !"#+,-.ZdeeperDict]anotherString[aFalseValueZaTrueValue]aUnicodeValue\xd3$%&\'()QaQbQc\x10\x11#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a*Ttext_\x10\x15<hello & \'hi\' there!>\x08\tk\x00M\x00\xe4\x00s\x00s\x00i\x00g\x00,\x00 \x00M\x00a\x00\xdf\x00\x08\x00\x1d\x00$\x003\x00<\x00B\x00J\x00P\x00[\x00a\x00n\x00t\x00}\x00\x97\x00\xa5\x00\xa8\x00\xaf\x00\xb8\x00\xba\x01\xb7\x01\xbd\x01\xbf\x01\xc1\x01\xc3\x01\xcc\x01\xd0\x01\xd2\x01\xd4\x01\xd6\x02\xd3\x02\xde\x02\xe9\x02\xf7\x03\x03\x03\x0e\x03\x1c\x03#\x03%\x03\'\x03)\x03+\x034\x038\x03=\x03U\x03V\x03W\x00\x00\x00\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x00/\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03n'
+TESTDATA_JSON = b'{"aFloat":0.5,"\xc3\x85benraa":"That was a unicode key.","anInt":728,"aString":"Doodah","aList":["A","B",12,32.5,[1,2,3]],"aDict":{"aTrueValue":true,"deeperDict":{"c":[1,2,"text"],"a":17,"b":32.5},"aFalseValue":false,"aUnicodeValue":"M\xc3\xa4ssig, Ma\xc3\x9f","anotherString":"<hello & \'hi\' there!>"}}'
 
 class TestPlistlib(unittest.TestCase):
 
@@ -95,7 +96,7 @@
         except:
             pass
 
-    def _create(self):
+    def _create(self, fmt=None):
         pl = dict(
             aString="Doodah",
             aList=["A", "B", 12, 32.5, [1, 2, 3]],
@@ -108,12 +109,13 @@
                 aFalseValue=False,
                 deeperDict=dict(a=17, b=32.5, c=[1, 2, "text"]),
             ),
-            someData = plistlib.Data(b"<binary gunk>"),
-            someMoreData = plistlib.Data(b"<lots of binary gunk>\0\1\2\3" * 10),
-            nestedData = [plistlib.Data(b"<lots of binary gunk>\0\1\2\3" * 10)],
-            aDate = datetime.datetime(2004, 10, 26, 10, 33, 33),
         )
         pl['\xc5benraa'] = "That was a unicode key."
+        if fmt != 'json':
+            pl['aDate'] = datetime.datetime(2004, 10, 26, 10, 33, 33)
+            pl['someData'] = plistlib.Data(b"<binary gunk>")
+            pl['someMoreData'] = plistlib.Data(b"<lots of binary gunk>\0\1\2\3" * 10)
+            pl['nestedData'] = [plistlib.Data(b"<lots of binary gunk>\0\1\2\3" * 10)]
         return pl
 
     def test_create(self):
@@ -122,38 +124,44 @@
         self.assertEqual(pl["aDict"]["aFalseValue"], False)
 
     def test_io(self):
-        pl = self._create()
-        plistlib.writePlist(pl, support.TESTFN)
-        pl2 = plistlib.readPlist(support.TESTFN)
-        self.assertEqual(dict(pl), dict(pl2))
+        for fmt in ('xml1', 'json', 'binary1'):
+            pl = self._create(fmt=fmt)
+            plistlib.writePlist(pl, support.TESTFN, fmt=fmt)
+            pl2 = plistlib.readPlist(support.TESTFN)
+            self.assertEqual(dict(pl), dict(pl2))
 
     def test_bytes(self):
-        pl = self._create()
-        data = plistlib.writePlistToBytes(pl)
-        pl2 = plistlib.readPlistFromBytes(data)
-        self.assertEqual(dict(pl), dict(pl2))
-        data2 = plistlib.writePlistToBytes(pl2)
-        self.assertEqual(data, data2)
+        for fmt in ('xml1', 'json', 'binary1'):
+            pl = self._create(fmt=fmt)
+            data = plistlib.writePlistToBytes(pl, fmt=fmt)
+            plistlib.writePlist(pl, 'test_output.xml', fmt='binary1')
+            pl2 = plistlib.readPlistFromBytes(data)
+            self.assertEqual(dict(pl), dict(pl2))
+            data2 = plistlib.writePlistToBytes(pl2, fmt=fmt)
+            self.assertEqual(data, data2)
 
     def test_appleformatting(self):
-        pl = plistlib.readPlistFromBytes(TESTDATA)
-        data = plistlib.writePlistToBytes(pl)
-        self.assertEqual(data, TESTDATA,
+        pl = plistlib.readPlistFromBytes(TESTDATA_XML1)
+        data = plistlib.writePlistToBytes(pl, fmt='xml1')
+        self.assertEqual(data, TESTDATA_XML1,
                          "generated data was not identical to Apple's output")
+        pl = plistlib.readPlistFromBytes(TESTDATA_JSON)
 
     def test_appleformattingfromliteral(self):
-        pl = self._create()
-        pl2 = plistlib.readPlistFromBytes(TESTDATA)
-        self.assertEqual(dict(pl), dict(pl2),
-                         "generated data was not identical to Apple's output")
+        for fmt, data in ('xml1', TESTDATA_XML1), ('binary1', TESTDATA_BINARY1), ('json', TESTDATA_JSON):
+            pl = self._create(fmt=fmt)
+            pl2 = plistlib.readPlistFromBytes(data)
+            self.assertEqual(dict(pl), dict(pl2),
+                             "generated data was not identical to Apple's output")
 
     def test_bytesio(self):
         from io import BytesIO
-        b = BytesIO()
-        pl = self._create()
-        plistlib.writePlist(pl, b)
-        pl2 = plistlib.readPlist(BytesIO(b.getvalue()))
-        self.assertEqual(dict(pl), dict(pl2))
+        for fmt in ('xml1', 'json', 'binary1'):
+            b = BytesIO()
+            pl = self._create(fmt=fmt)
+            plistlib.writePlist(pl, b, fmt=fmt)
+            pl2 = plistlib.readPlist(BytesIO(b.getvalue()))
+            self.assertEqual(dict(pl), dict(pl2))
 
     def test_controlcharacters(self):
         for i in range(128):
@@ -161,19 +169,20 @@
             testString = "string containing %s" % c
             if i >= 32 or c in "\r\n\t":
                 # \r, \n and \t are the only legal control chars in XML
-                plistlib.writePlistToBytes(testString)
+                plistlib.writePlistToBytes(testString, fmt='xml1')
             else:
                 self.assertRaises(ValueError,
                                   plistlib.writePlistToBytes,
                                   testString)
 
     def test_nondictroot(self):
-        test1 = "abc"
-        test2 = [1, 2, 3, "abc"]
-        result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1))
-        result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2))
-        self.assertEqual(test1, result1)
-        self.assertEqual(test2, result2)
+        for fmt in ('xml1', 'json', 'binary1'):
+            test1 = "abc"
+            test2 = [1, 2, 3, "abc"]
+            result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1, fmt=fmt))
+            result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2, fmt=fmt))
+            self.assertEqual(test1, result1)
+            self.assertEqual(test2, result2)
 
     def test_invalidarray(self):
         for i in ["<key>key inside an array</key>",