diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst --- a/Doc/library/plistlib.rst +++ b/Doc/library/plistlib.rst @@ -45,35 +45,114 @@ This module defines the following functions: -.. function:: readPlist(pathOrFile) +.. function:: readPlist(pathOrFile, \*, fmt=None, bytes_for_data=False, dict_type=dict) Read a plist file. *pathOrFile* may either be a file name or a (readable and binary) file object. Return the unpacked root object (which usually is a dictionary). - The XML data is parsed using the Expat parser from :mod:`xml.parsers.expat` - -- see its documentation for possible exceptions on ill-formed XML. - Unknown elements will simply be ignored by the plist parser. + The *fmt* is the format of the file and the following values are valid: + * :data:`None`: Autodetect the file format -.. function:: writePlist(rootObject, pathOrFile) + * :data:`FMT_XML`: XML file format + + * :data:`FMT_BINARY`: Binary plist format + + If *bytes_for_data* is false (the default) binary data will be returned as instances + of :class:`Data`, otherwise it is returned as instances of :class:`bytes`. + + The *dict_type* is the type used for dictionaries that are read from the plist + file. The exact structure of the plist can be recovered by using + :class:`collections.OrderedDict` (although the order of keys shouldn't be + important in plist files). + + XML data for the "xml1" format is parsed using the Expat parser + from :mod:`xml.parsers.expat` -- see its documentation for possible + exceptions on ill-formed XML. Unknown elements will simply be ignored + by the plist parser. + + The parser for the binary format raises :exc:`InvalidFileException` + when the file cannot be parsed. + + .. versionchanged:: 3.4 + Added the the *fmt*, *bytes_for_data* and *dict_type* arguments. + + +.. function:: writePlist(rootObject, pathOrFile, \*, fmt=FMT_XML, sort_keys=True, skipkeys=False) Write *rootObject* to a plist file. *pathOrFile* may either be a file name or a (writable and binary) file object. + The *fmt* argument specifies the format of the plist file and can be + one of the following values: + + * :data:`FMT_XML`: XML formatted plist file + + * :data:`FMT_BINARY`: Binary formatted plist file + + When *sort_keys* is true (the default) the keys for dictionaries will be written + to the plist in sorted order, otherwise they will be written in the iteration + order of the dictionary. + + When *skipkeys* is false (the default) the function raises :exc:`TypeError` when a key of + a dictionary is not a string, otherwise such keys are skipped. + A :exc:`TypeError` will be raised if the object is of an unsupported type or a container that contains objects of unsupported types. + .. versionchanged:: 3.4 + Added the the *fmt*, *sort_keys* and *skipkeys* arguments. -.. function:: readPlistFromBytes(data) + +.. function:: readPlistFromBytes(data, \*, fmt=None, bytes_for_data=False, dict_type=dict) Read a plist data from a bytes object. Return the root object. + The *fmt* is the format of the data and the following values are valid: -.. function:: writePlistToBytes(rootObject) + * :data:`None`: Autodetect the file format + + * :data:`FMT_XML`: XML file format + + * :data:`FMT_BINARY`: Binary plist format + + If *bytes_for_data* is false (the default) binary data will be returned as instances + of :class:`Data`, otherwise it is returned as instances of :class:`bytes`. + + The *dict_type* is the type used for dictionaries that are read from the plist + file. The exact structure of the plist can be recovered by using + :class:`collections.OrderedDict` (although the order of keys shouldn't be + important in plist files). + + A :exc:`TypeError` will be raised if the object is of an unsupported type or + a container that contains objects of unsupported types. + + .. versionchanged:: 3.4 + Added the the *fmt*, *bytes_for_data* and *dict_type* arguments. + + +.. function:: writePlistToBytes(rootObject, \*, fmt=FMT_XML, sort_keys=True, skipkeys=False) Return *rootObject* as a plist-formatted bytes object. + The *fmt* argument specifies the format of the plist file and can be + one of the following values: + + * :data:`FMT_XML`: XML formatted plist file + + * :data:`FMT_BINARY`: Binary formatted plist file + + When *sort_keys* is true (the default) the keys for dictionaries will be written + to the plist in sorted order, otherwise they will be written in the iteration + order of the dictionary. + + When *skipkeys* is false (the default) the function raises :exc:`TypeError` when a key of + a dictionary is not a string, otherwise such keys are skipped. + + .. versionchanged:: 3.4 + Added the the *fmt*, *sort_keys* and *skipkeys* arguments. + The following class is available: @@ -86,6 +165,21 @@ It has one attribute, :attr:`data`, that can be used to retrieve the Python bytes object stored in it. +The following constants are avaiable: + + +.. data:: FMT_XML + + The XML format for plist files. + + .. versionadded:: 3.4 + + +.. data:: FMT_BINARY + + The binary format for plist files + + .. versionadded:: 3.4 Examples -------- diff --git a/Lib/plistlib.py b/Lib/plistlib.py --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -48,216 +48,30 @@ pl = readPlist(pathOrFile) print pl["aKey"] """ - - __all__ = [ "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes", - "Plist", "Data", "Dict" + "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY", ] # Note: the Plist and Dict classes have been deprecated. import binascii +import codecs import datetime from io import BytesIO +import os import re +import struct +import itertools +import uuid +FMT_XML="xml1" +FMT_BINARY="binary1" -def readPlist(pathOrFile): - """Read a .plist file. 'pathOrFile' may either be a file name or a - (readable) file object. Return the unpacked root object (which - usually is a dictionary). - """ - didOpen = False - try: - if isinstance(pathOrFile, str): - pathOrFile = open(pathOrFile, 'rb') - didOpen = True - p = PlistParser() - rootObject = p.parse(pathOrFile) - finally: - if didOpen: - pathOrFile.close() - return rootObject - - -def writePlist(rootObject, pathOrFile): - """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a - file name or a (writable) file object. - """ - didOpen = False - try: - if isinstance(pathOrFile, str): - pathOrFile = open(pathOrFile, 'wb') - didOpen = True - writer = PlistWriter(pathOrFile) - writer.writeln("") - writer.writeValue(rootObject) - writer.writeln("") - finally: - if didOpen: - pathOrFile.close() - - -def readPlistFromBytes(data): - """Read a plist data from a bytes object. Return the root object. - """ - return readPlist(BytesIO(data)) - - -def writePlistToBytes(rootObject): - """Return 'rootObject' as a plist-formatted bytes object. - """ - f = BytesIO() - writePlist(rootObject, f) - return f.getvalue() - - -class DumbXMLWriter: - def __init__(self, file, indentLevel=0, indent="\t"): - self.file = file - self.stack = [] - self.indentLevel = indentLevel - self.indent = indent - - def beginElement(self, element): - self.stack.append(element) - self.writeln("<%s>" % element) - self.indentLevel += 1 - - def endElement(self, element): - assert self.indentLevel > 0 - assert self.stack.pop() == element - self.indentLevel -= 1 - self.writeln("" % element) - - def simpleElement(self, element, value=None): - if value is not None: - value = _escape(value) - self.writeln("<%s>%s" % (element, value, element)) - else: - self.writeln("<%s/>" % element) - - def writeln(self, line): - if line: - # plist has fixed encoding of utf-8 - if isinstance(line, str): - line = line.encode('utf-8') - self.file.write(self.indentLevel * self.indent) - self.file.write(line) - self.file.write(b'\n') - - -# Contents should conform to a subset of ISO 8601 -# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units may be omitted with -# a loss of precision) -_dateParser = re.compile(r"(?P\d\d\d\d)(?:-(?P\d\d)(?:-(?P\d\d)(?:T(?P\d\d)(?::(?P\d\d)(?::(?P\d\d))?)?)?)?)?Z", re.ASCII) - -def _dateFromString(s): - order = ('year', 'month', 'day', 'hour', 'minute', 'second') - gd = _dateParser.match(s).groupdict() - lst = [] - for key in order: - val = gd[key] - if val is None: - break - lst.append(int(val)) - return datetime.datetime(*lst) - -def _dateToString(d): - return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( - d.year, d.month, d.day, - d.hour, d.minute, d.second - ) - - -# Regex to find any control chars, except for \t \n and \r -_controlCharPat = re.compile( - r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" - r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") - -def _escape(text): - m = _controlCharPat.search(text) - if m is not None: - raise ValueError("strings can't contains control characters; " - "use plistlib.Data instead") - text = text.replace("\r\n", "\n") # convert DOS line endings - text = text.replace("\r", "\n") # convert Mac line endings - text = text.replace("&", "&") # escape '&' - text = text.replace("<", "<") # escape '<' - text = text.replace(">", ">") # escape '>' - return text - - -PLISTHEADER = b"""\ - - -""" - -class PlistWriter(DumbXMLWriter): - - def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1): - if writeHeader: - file.write(PLISTHEADER) - DumbXMLWriter.__init__(self, file, indentLevel, indent) - - def writeValue(self, value): - if isinstance(value, str): - self.simpleElement("string", value) - elif isinstance(value, bool): - # must switch for bool before int, as bool is a - # subclass of int... - if value: - self.simpleElement("true") - else: - self.simpleElement("false") - elif isinstance(value, int): - self.simpleElement("integer", "%d" % value) - elif isinstance(value, float): - self.simpleElement("real", repr(value)) - elif isinstance(value, dict): - self.writeDict(value) - elif isinstance(value, Data): - self.writeData(value) - elif isinstance(value, datetime.datetime): - self.simpleElement("date", _dateToString(value)) - elif isinstance(value, (tuple, list)): - self.writeArray(value) - else: - raise TypeError("unsupported type: %s" % type(value)) - - def writeData(self, data): - self.beginElement("data") - self.indentLevel -= 1 - maxlinelength = max(16, 76 - len(self.indent.replace(b"\t", b" " * 8) * - self.indentLevel)) - for line in data.asBase64(maxlinelength).split(b"\n"): - if line: - self.writeln(line) - self.indentLevel += 1 - self.endElement("data") - - def writeDict(self, d): - if d: - self.beginElement("dict") - items = sorted(d.items()) - for key, value in items: - if not isinstance(key, str): - raise TypeError("keys must be strings") - self.simpleElement("key", key) - self.writeValue(value) - self.endElement("dict") - else: - self.simpleElement("dict") - - def writeArray(self, array): - if array: - self.beginElement("array") - for value in array: - self.writeValue(value) - self.endElement("array") - else: - self.simpleElement("array") - +class InvalidFileException(ValueError): + def __str__(self): + return "Invalid file" + def __unicode__(self): + return "Invalid file" class _InternalDict(dict): @@ -290,84 +104,13 @@ warn("Attribute access from plist dicts is deprecated, use d[key] " "notation instead", DeprecationWarning, 2) -class Dict(_InternalDict): - - def __init__(self, **kwargs): - from warnings import warn - warn("The plistlib.Dict class is deprecated, use builtin dict instead", - DeprecationWarning, 2) - super().__init__(**kwargs) - - -class Plist(_InternalDict): - - """This class has been deprecated. Use readPlist() and writePlist() - functions instead, together with regular dict objects. - """ - - def __init__(self, **kwargs): - from warnings import warn - warn("The Plist class is deprecated, use the readPlist() and " - "writePlist() functions instead", DeprecationWarning, 2) - super().__init__(**kwargs) - - def fromFile(cls, pathOrFile): - """Deprecated. Use the readPlist() function instead.""" - rootObject = readPlist(pathOrFile) - plist = cls() - plist.update(rootObject) - return plist - fromFile = classmethod(fromFile) - - def write(self, pathOrFile): - """Deprecated. Use the writePlist() function instead.""" - writePlist(self, pathOrFile) - - -def _encodeBase64(s, maxlinelength=76): - # copied from base64.encodebytes(), with added maxlinelength argument - maxbinsize = (maxlinelength//4)*3 - pieces = [] - for i in range(0, len(s), maxbinsize): - chunk = s[i : i + maxbinsize] - pieces.append(binascii.b2a_base64(chunk)) - return b''.join(pieces) - -class Data: - - """Wrapper for binary data.""" - - def __init__(self, data): - if not isinstance(data, bytes): - raise TypeError("data must be as bytes") - self.data = data - - @classmethod - def fromBase64(cls, data): - # base64.decodebytes just calls binascii.a2b_base64; - # it seems overkill to use both base64 and binascii. - return cls(binascii.a2b_base64(data)) - - def asBase64(self, maxlinelength=76): - return _encodeBase64(self.data, maxlinelength) - - def __eq__(self, other): - if isinstance(other, self.__class__): - return self.data == other.data - elif isinstance(other, str): - return self.data == other - else: - return id(self) == id(other) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.data)) - class PlistParser: - - def __init__(self): + def __init__(self, bytes_for_data, dict_type): self.stack = [] self.currentKey = None self.root = None + self.bytes_for_data = bytes_for_data + self.dict_type = dict_type def parse(self, fileobj): from xml.parsers.expat import ParserCreate @@ -416,7 +159,7 @@ # element handlers def begin_dict(self, attrs): - d = _InternalDict() + d = self.dict_type() self.addObject(d) self.stack.append(d) def end_dict(self): @@ -449,6 +192,683 @@ def end_string(self): self.addObject(self.getData()) def end_data(self): - self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) + if self.bytes_for_data: + self.addObject(binascii.a2b_base64(data)(self.getData().encode("utf-8"))) + else: + self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) def end_date(self): self.addObject(_dateFromString(self.getData())) + + +class _BinaryPlistParser(object): + """ + Read or write a binary plist file, following the description of the binary format. + Raise InvalidFileException in case of error, otherwise return the root object, as usual + + see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c + """ + def __init__(self, bytes_for_data, dict_type): + self.bytes_for_data = bytes_for_data + self.dict_type = dict_type + + def parse(self, inFile): + return self.read(inFile) + + def read(self, inFile): + self._inFile = inFile + self._inFile.seek(-32, os.SEEK_END) # go to the trailer + trailer = self._inFile.read(32) + if len(trailer) != 32: + return InvalidFileException() + offsetSize, self._refSize, numObjects, topObject, offsetTableOffset = struct.unpack('>6xBB4xL4xL4xL', trailer) + self._inFile.seek(offsetTableOffset) # go to the table with offsets of all objects + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + self._objectOffsets = struct.unpack(offsetFormat, self._inFile.read(offsetSize * numObjects)) + return self._readNextObject(self._objectOffsets[topObject]) + + def _getSize(self, tokenL): + """ return the size of the next object.""" + if tokenL == 0xF: + m = int.from_bytes(self._inFile.read(1), byteorder='big') & 0x3 + int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }# {log2(bit_number): (bit_number, pattern to use with struct.unpack)} + s, f = int_format[m] + return struct.unpack(f, self._inFile.read(s))[0] + return tokenL + + def _readRefs(self, n): + return struct.unpack('>' + self._refFormat * n, self._inFile.read(n * self._refSize)) + + def _readNextObject(self, offset): + """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """ + self._inFile.seek(offset) + token = self._inFile.read(1)[0] + tokenH, tokenL = token & 0xF0, token & 0x0F #high and low parts + if token == 0x00: + return None + elif token == 0x08: + return False + elif token == 0x09: + return True + # XXX: Missing: url (0xc, 0xd) + elif token == 0x0e: + # 16-byte UUID + # XXX: Not sure if UUID is stored a little or big endian, Apple's external representation in + # lowlevel API's is an array of 16 bytes. + b = self._inFile.read(16) + return uuid.UUID(bytes=b) + + elif token == 0x0f: + # XXX: This appears to be wrong, documentation says 'fill byte' + return b'' + + elif tokenH == 0x10: #int + return int.from_bytes(self._inFile.read(1 << tokenL), 'big') + elif tokenH == 0x20 and tokenL == 0x02: #real + return struct.unpack('>f', self._inFile.read(4))[0] + elif tokenH == 0x20 and tokenL == 0x03: #real + return struct.unpack('>d', self._inFile.read(8))[0] + elif tokenH == 0x30 and tokenL == 0x03: #date + f = struct.unpack('>d', self._inFile.read(8))[0] + return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400) # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970. + elif tokenH == 0x40: #data + s = self._getSize(tokenL) + if self.bytes_for_data: + return self._inFile.read(s) + else: + return Data(self._inFile.read(s)) + elif tokenH == 0x50: #ascii string + s = self._getSize(tokenL) + result = self._inFile.read(s).decode('ascii') + return result + elif tokenH == 0x60: #unicode string + s = self._getSize(tokenL) + return self._inFile.read(s * 2).decode('utf-16be') + elif tokenH == 0x80: #uid + # XXX: UID is an object reference... + return self._inFile.read(tokenL + 1) + elif tokenH == 0xA0: #array + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return [self._readNextObject(self._objectOffsets[x]) for x in obj_refs] + + # The referenced source code for Apple's CFBinaryPlist says that this + # code is not actually implemented, it is unclear what this is intended to + # be... + #elif tokenH == 0xB0: #ordset + # s = self._getSize(tokenL) + # obj_refs = self._readRefs(s) + + elif tokenH == 0xC0: #set + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return set(self._readNextObject(self._objectOffsets[x]) for x in obj_refs) + elif tokenH == 0xD0: #dict + s = self._getSize(tokenL) + key_refs = self._readRefs(s) + obj_refs = self._readRefs(s) + result = self.dict_type() + for k, o in zip(key_refs, obj_refs): + result[self._readNextObject(self._objectOffsets[k])] = self._readNextObject(self._objectOffsets[o]) + return result + + raise InvalidFileException() + +class _BinaryPlistWriter (object): + def __init__(self, file, sort_keys, skipkeys): + self._outFile = file + self._sort_keys = sort_keys + self._skipkeys = skipkeys + + def _flatten(self, rootObject): + if isinstance(rootObject, (str, int, float, datetime.datetime, bytes)) and not isinstance(rootObject, bool): + if (type(rootObject), rootObject) in self._objtable: + return + + elif isinstance(rootObject, Data): + if (type(rootObject.data), rootObject.data) in self._objtable: + return + + refnum = len(self._objlist) + self._objlist.append(rootObject) + try: + if isinstance(rootObject, Data): + self._objtable[(type(rootObject.data), rootObject.data)] = refnum + else: + self._objtable[(type(rootObject), rootObject)] = refnum + except TypeError: + self._objidtable[id(rootObject)] = refnum + + if isinstance(rootObject, dict): + keys = [] + values = [] + if self._sort_keys: + for k, v in sorted(rootObject.items()): + keys.append(k) + values.append(v) + else: + for k, v in rootObject.items(): + keys.append(k) + values.append(v) + + for o in itertools.chain(keys, values): + self._flatten(o) + + elif isinstance(rootObject, (list, tuple, set)): + for o in rootObject: + self._flatten(o) + + def _getrefnum(self, rootObject): + try: + if isinstance(rootObject, Data): + return self._objtable[(type(rootObject.data), rootObject.data)] + else: + return self._objtable[(type(rootObject), rootObject)] + except TypeError: + return self._objidtable[id(rootObject)] + + def write(self, rootObject): + self._objtable = {} + self._objidtable = {} + self._objlist = [] + + self._flatten(rootObject) + + self._outFile.write(b'bplist00') #header + numObjects = len(self._objlist) + self._objectOffsets = [0]*numObjects + if numObjects < 1 << 8: + self._refSize = 1 + elif numObjects < 1 << 16: + self._refSize = 2 + elif numObjects < 1 << 32: + self._refSize = 4 + else: + self._refSize = 8 + + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + for obj in self._objlist: + self._writeNextObject(obj) + + topObject = self._getrefnum(rootObject) + assert topObject == 0 + offsetTableOffset = self._outFile.tell() + if offsetTableOffset < 1 << 8: + offsetSize = 1 + elif offsetTableOffset < 1 << 16: + offsetSize = 2 + elif offsetTableOffset < 1 << 32: + offsetSize = 4 + else: + offsetSize = 8 + + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._outFile.write(struct.pack(offsetFormat, *self._objectOffsets)) + sortVersion = 0 + trailer = sortVersion, offsetSize, self._refSize, numObjects, topObject, offsetTableOffset + # uint8 unused[5] + # uint8 sortVersion + # uint8 offsetIntSize + # uint8 objectRefSize + # uint64 numObjects + # uint64 topObject + # uint64 offsetTableOffset + self._outFile.write(struct.pack('>5xBBBQQQ', *trailer)) + + def _putSize(self, token, size): + if size < 15: + self._outFile.write(struct.pack('>B', token | size)) + elif size < 1 << 8: + self._outFile.write(struct.pack('>BBB', token | 0xF, 0x10, size)) + elif size < 1 << 16: + self._outFile.write(struct.pack('>BBH', token | 0xF, 0x11, size)) + elif size < 1 << 32: + self._outFile.write(struct.pack('>BBL', token | 0xF, 0x12, size)) + else: + self._outFile.write(struct.pack('>BBQ', token | 0xF, 0x13, size)) + + def _writeNextObject(self, rootObject): + ref = self._getrefnum(rootObject) + self._objectOffsets[ref] = self._outFile.tell() + if rootObject == None: + self._outFile.write(b'\x00') + + elif isinstance(rootObject, bool) and rootObject == False: + self._outFile.write(b'\x08') + + elif isinstance(rootObject, bool) and rootObject == True: + self._outFile.write(b'\x09') + + elif isinstance(rootObject, uuid.UUID): + # XXX: See comment in reader + self._outFile.write(b'\x0e') + self._outFile.write(rootObject.bytes) + + elif isinstance(rootObject, int): + if rootObject < 1 << 8: + self._outFile.write(struct.pack('>BB', 0x10, rootObject)) + elif rootObject < 1 << 16: + self._outFile.write(struct.pack('>BH', 0x11, rootObject)) + elif rootObject < 1 << 32: + self._outFile.write(struct.pack('>BL', 0x12, rootObject)) + else: + self._outFile.write(struct.pack('>BQ', 0x13, rootObject)) + + elif isinstance(rootObject, float): + self._outFile.write(struct.pack('>Bd', 0x23, rootObject)) + + elif isinstance(rootObject, datetime.datetime): + f = (rootObject - datetime.datetime(2001, 1, 1)).total_seconds() + self._outFile.write(struct.pack('>Bd', 0x33, f)) + + elif isinstance(rootObject, Data): + self._putSize(0x40, len(rootObject.data)) + self._outFile.write(rootObject.data) + + elif isinstance(rootObject, bytes): + self._putSize(0x40, len(data)) + self._outFile.write(data) + + elif isinstance(rootObject, str): + try: + t = rootObject.encode('ascii') + self._putSize(0x50, len(rootObject)) + except UnicodeEncodeError: + t = rootObject.encode('utf-16be') + self._putSize(0x60, len(rootObject)) + self._outFile.write(t) + + elif isinstance(rootObject, list) or isinstance(rootObject, tuple): + refs = [ self._getrefnum(o) for o in rootObject ] + s = len(refs) + self._putSize(0xA0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + + elif isinstance(rootObject, set) or isinstance(rootObject, frozenset): + refs = [refs.append(self._getrefnum(o)) for o in rootObject] + s = len(refs) + self._putSize(0xC0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + + elif isinstance(rootObject, dict): + keyRefs, valRefs = [], [] + + before = (self._outFile.tell() == 8) + + if self._sort_keys: + rootItems = sorted(rootObject.items()) + else: + rootItems = rootObject.items() + + s = len(rootItems) + + for k, v in rootItems: + # XXX: check Apple binary plist behavior when key is not a string + # if not isinstance(k, str): + # if self._skipkeys: + # continue + # raise TypeError("keys must be strings") + keyRefs.append(self._getrefnum(k)) + valRefs.append(self._getrefnum(v)) + + self._putSize(0xD0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *keyRefs)) + self._outFile.write(struct.pack('>' + self._refFormat * s, *valRefs)) + + else: + raise InvalidFileException() + +class DumbXMLWriter: + def __init__(self, file, indentLevel=0, indent="\t"): + self.file = file + self.stack = [] + self.indentLevel = indentLevel + self.indent = indent + + def beginElement(self, element): + self.stack.append(element) + self.writeln("<%s>" % element) + self.indentLevel += 1 + + def endElement(self, element): + assert self.indentLevel > 0 + assert self.stack.pop() == element + self.indentLevel -= 1 + self.writeln("" % element) + + def simpleElement(self, element, value=None): + if value is not None: + value = _escape(value) + self.writeln("<%s>%s" % (element, value, element)) + else: + self.writeln("<%s/>" % element) + + def writeln(self, line): + if line: + # plist has fixed encoding of utf-8 + if isinstance(line, str): + line = line.encode('utf-8') + self.file.write(self.indentLevel * self.indent) + self.file.write(line) + self.file.write(b'\n') + +class PlistWriter(DumbXMLWriter): + + def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1, sort_keys=True, skipkeys=False): + if writeHeader: + file.write(PLISTHEADER) + DumbXMLWriter.__init__(self, file, indentLevel, indent) + self._sort_keys = sort_keys + self._skipkeys = skipkeys + + def write(self, rootObject): + self.writeln("") + self.writeValue(rootObject) + self.writeln("") + + def writeValue(self, value): + if isinstance(value, str): + self.simpleElement("string", value) + elif isinstance(value, bool): + # must switch for bool before int, as bool is a + # subclass of int... + if value: + self.simpleElement("true") + else: + self.simpleElement("false") + elif isinstance(value, int): + self.simpleElement("integer", "%d" % value) + elif isinstance(value, float): + self.simpleElement("real", repr(value)) + elif isinstance(value, dict): + self.writeDict(value) + elif isinstance(value, Data): + self.writeData(value) + elif isinstance(value, bytes): + self.writeBytes(value) + elif isinstance(value, datetime.datetime): + self.simpleElement("date", _dateToString(value)) + elif isinstance(value, (tuple, list)): + self.writeArray(value) + else: + raise TypeError("unsupported type: %s" % type(value)) + + def writeData(self, data): + self.writeBytes(data.data) + + def writeBytes(self, data): + self.beginElement("data") + self.indentLevel -= 1 + maxlinelength = max(16, 76 - len(self.indent.replace(b"\t", b" " * 8) * + self.indentLevel)) + for line in _encodeBase64(data, maxlinelength).split(b"\n"): + if line: + self.writeln(line) + self.indentLevel += 1 + self.endElement("data") + + def writeDict(self, d): + if d: + self.beginElement("dict") + if self._sort_keys: + items = sorted(d.items()) + else: + items = d.items() + + for key, value in items: + if not isinstance(key, str): + if self._skipkeys: + continue + raise TypeError("keys must be strings") + self.simpleElement("key", key) + self.writeValue(value) + self.endElement("dict") + else: + self.simpleElement("dict") + + def writeArray(self, array): + if array: + self.beginElement("array") + for value in array: + self.writeValue(value) + self.endElement("array") + else: + self.simpleElement("array") + +def _is_fmt_xml(header): + header = header[:7] + for bom in b'', codecs.BOM_UTF8, codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE: + for start in b'', b'\d\d\d\d)(?:-(?P\d\d)(?:-(?P\d\d)(?:T(?P\d\d)(?::(?P\d\d)(?::(?P\d\d))?)?)?)?)?Z", re.ASCII) + +def _dateFromString(s): + order = ('year', 'month', 'day', 'hour', 'minute', 'second') + gd = _dateParser.match(s).groupdict() + lst = [] + for key in order: + val = gd[key] + if val is None: + break + lst.append(int(val)) + return datetime.datetime(*lst) + +def _dateToString(d): + return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( + d.year, d.month, d.day, + d.hour, d.minute, d.second + ) + + +# Regex to find any control chars, except for \t \n and \r +_controlCharPat = re.compile( + r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" + r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") + +def _escape(text): + m = _controlCharPat.search(text) + if m is not None: + raise ValueError("strings can't contains control characters; " + "use plistlib.Data instead") + text = text.replace("\r\n", "\n") # convert DOS line endings + text = text.replace("\r", "\n") # convert Mac line endings + text = text.replace("&", "&") # escape '&' + text = text.replace("<", "<") # escape '<' + text = text.replace(">", ">") # escape '>' + return text + + +PLISTHEADER = b"""\ + + +""" + + + + +class Dict(_InternalDict): + + def __init__(self, **kwargs): + from warnings import warn + warn("The plistlib.Dict class is deprecated, use builtin dict instead", + DeprecationWarning, 2) + super().__init__(**kwargs) + + +class Plist(_InternalDict): + + """This class has been deprecated. Use readPlist() and writePlist() + functions instead, together with regular dict objects. + """ + + def __init__(self, **kwargs): + from warnings import warn + warn("The Plist class is deprecated, use the readPlist() and " + "writePlist() functions instead", DeprecationWarning, 2) + super().__init__(**kwargs) + + def fromFile(cls, pathOrFile): + """Deprecated. Use the readPlist() function instead.""" + rootObject = readPlist(pathOrFile) + plist = cls() + plist.update(rootObject) + return plist + fromFile = classmethod(fromFile) + + def write(self, pathOrFile): + """Deprecated. Use the writePlist() function instead.""" + writePlist(self, pathOrFile) + + +def _encodeBase64(s, maxlinelength=76): + # copied from base64.encodebytes(), with added maxlinelength argument + maxbinsize = (maxlinelength//4)*3 + pieces = [] + for i in range(0, len(s), maxbinsize): + chunk = s[i : i + maxbinsize] + pieces.append(binascii.b2a_base64(chunk)) + return b''.join(pieces) + +class Data: + + """Wrapper for binary data.""" + + def __init__(self, data): + if not isinstance(data, bytes): + raise TypeError("data must be as bytes") + self.data = data + + @classmethod + def fromBase64(cls, data): + # base64.decodebytes just calls binascii.a2b_base64; + # it seems overkill to use both base64 and binascii. + return cls(binascii.a2b_base64(data)) + + def asBase64(self, maxlinelength=76): + return _encodeBase64(self.data, maxlinelength) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.data == other.data + elif isinstance(other, str): + return self.data == other + else: + return id(self) == id(other) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self.data)) + + +# simular interface as the pickle and json modules: + +def dump(value, fp, *, skipkeys=False, sort_keys=True): + if not hasattr(fp, 'write'): + raise TypeError("fp must be a file-like object") + + writePlist(value, fp, fmt=fmt, sort_keys=sort_keys, skipkeys=skipkeys) + +def dumps(value, *, skipkeys=False, sort_keys=True): + fp = BytesIO() + return dump(value, fp, skipkeys=skipkeys, sort_keys=sort_keys) + +def load(fp, *, fmt=None, dict_type=dict): + if not hasattr(fp, 'reaed'): + raise TypeError("fp must be a file-like object") + readPlist(fp, fmt=fmt, data_as_bytes=True, dict_type=dict_type) + +def loads(value, *, fmt=None): + fp = BytesIO(value) + return load(fp, fmt=fmt) diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -7,89 +7,14 @@ from test import support -# This test data was generated through Cocoa's NSDictionary class -TESTDATA = b""" - - - - aDate - 2004-10-26T10:33:33Z - aDict - - aFalseValue - - aTrueValue - - aUnicodeValue - M\xc3\xa4ssig, Ma\xc3\x9f - anotherString - <hello & 'hi' there!> - deeperDict - - a - 17 - b - 32.5 - c - - 1 - 2 - text - - - - aFloat - 0.5 - aList - - A - B - 12 - 32.5 - - 1 - 2 - 3 - - - aString - Doodah - anEmptyDict - - anEmptyList - - anInt - 728 - nestedData - - - PGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5r - PgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5 - IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBi - aW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3Rz - IG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQID - PGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAw== - - - someData - - PGJpbmFyeSBndW5rPg== - - someMoreData - - PGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8 - bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxs - b3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxv - dHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90 - cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAw== - - \xc3\x85benraa - That was a unicode key. - - -""".replace(b" " * 8, b"\t") # Apple as well as plistlib.py output hard tabs +ALL_FORMATS=(plistlib.FMT_XML, plistlib.FMT_BINARY) +# The testdata is generated using Mac/Tools/plistlib_generate_testdata.py +# (which using PyObjC to control the Cocoa classes for generating plists) +TESTDATA={ + plistlib.FMT_XML: b'\n\n\n\n\taDate\n\t2004-10-26T10:33:33Z\n\taDict\n\t\n\t\taFalseValue\n\t\t\n\t\taTrueValue\n\t\t\n\t\taUnicodeValue\n\t\tM\xc3\xa4ssig, Ma\xc3\x9f\n\t\tanotherString\n\t\t<hello & \'hi\' there!>\n\t\tdeeperDict\n\t\t\n\t\t\ta\n\t\t\t17\n\t\t\tb\n\t\t\t32.5\n\t\t\tc\n\t\t\t\n\t\t\t\t1\n\t\t\t\t2\n\t\t\t\ttext\n\t\t\t\n\t\t\n\t\n\taFloat\n\t0.5\n\taList\n\t\n\t\tA\n\t\tB\n\t\t12\n\t\t32.5\n\t\t\n\t\t\t1\n\t\t\t2\n\t\t\t3\n\t\t\n\t\n\taString\n\tDoodah\n\tanEmptyDict\n\t\n\tanEmptyList\n\t\n\tanInt\n\t728\n\tnestedData\n\t\n\t\t\n\t\tPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5r\n\t\tPgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5\n\t\tIGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBi\n\t\taW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3Rz\n\t\tIG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQID\n\t\tPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAw==\n\t\t\n\t\n\tsomeData\n\t\n\tPGJpbmFyeSBndW5rPg==\n\t\n\tsomeMoreData\n\t\n\tPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8\n\tbG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAzxs\n\tb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90cyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxv\n\tdHMgb2YgYmluYXJ5IGd1bms+AAECAzxsb3RzIG9mIGJpbmFyeSBndW5rPgABAgM8bG90\n\tcyBvZiBiaW5hcnkgZ3Vuaz4AAQIDPGxvdHMgb2YgYmluYXJ5IGd1bms+AAECAw==\n\t\n\t\xc3\x85benraa\n\tThat was a unicode key.\n\n\n', + plistlib.FMT_BINARY: b'bplist00\xdc\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e"#)*+,-/.0UaDateUaDictVaFloatUaListWaString[anEmptyDict[anEmptyListUanIntZnestedDataXsomeData\\someMoreDatag\x00\xc5\x00b\x00e\x00n\x00r\x00a\x00a3A\x9c\xb9}\xf4\x00\x00\x00\xd5\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18[aFalseValueZaTrueValue]aUnicodeValue]anotherStringZdeeperDict\x08\tk\x00M\x00\xe4\x00s\x00s\x00i\x00g\x00,\x00 \x00M\x00a\x00\xdf_\x10\x15\xd3\x19\x1a\x1b\x1c\x1d\x1eQaQbQc\x10\x11#@@@\x00\x00\x00\x00\x00\xa3\x1f !\x10\x01\x10\x02Ttext#?\xe0\x00\x00\x00\x00\x00\x00\xa5$%&\x1d\'QAQB\x10\x0c\xa3\x1f (\x10\x03VDoodah\xd0\xa0\x11\x02\xd8\xa1.O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03M_\x10\x17That was a unicode key.\x00\x08\x00!\x00\'\x00-\x004\x00:\x00B\x00N\x00Z\x00`\x00k\x00t\x00\x81\x00\x90\x00\x99\x00\xa4\x00\xb0\x00\xbb\x00\xc9\x00\xd7\x00\xe2\x00\xe3\x00\xe4\x00\xfb\x01\x13\x01\x1a\x01\x1c\x01\x1e\x01 \x01"\x01+\x01/\x011\x013\x018\x01A\x01G\x01I\x01K\x01M\x01Q\x01S\x01Z\x01[\x01\\\x01_\x01a\x02^\x02l\x00\x00\x00\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x86', +} class TestPlistlib(unittest.TestCase): @@ -99,7 +24,7 @@ except: pass - def _create(self): + def _create(self, fmt=None): pl = dict( aString="Doodah", aList=["A", "B", 12, 32.5, [1, 2, 3]], @@ -154,24 +79,29 @@ self.assertEqual(plistlib.readPlistFromBytes(plistlib.writePlistToBytes(data)), data) def test_appleformatting(self): - pl = plistlib.readPlistFromBytes(TESTDATA) - data = plistlib.writePlistToBytes(pl) - self.assertEqual(data, TESTDATA, - "generated data was not identical to Apple's output") + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + pl = plistlib.readPlistFromBytes(TESTDATA[fmt]) + data = plistlib.writePlistToBytes(pl, fmt=fmt) + self.assertEqual(data, TESTDATA[fmt], "generated data was not identical to Apple's output") def test_appleformattingfromliteral(self): - pl = self._create() - pl2 = plistlib.readPlistFromBytes(TESTDATA) - self.assertEqual(dict(pl), dict(pl2), - "generated data was not identical to Apple's output") + self.maxDiff = None + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + pl = self._create(fmt=fmt) + pl2 = plistlib.readPlistFromBytes(TESTDATA[fmt]) + self.assertEqual(dict(pl), dict(pl2), "generated data was not identical to Apple's output") def test_bytesio(self): from io import BytesIO - b = BytesIO() - pl = self._create() - plistlib.writePlist(pl, b) - pl2 = plistlib.readPlist(BytesIO(b.getvalue())) - self.assertEqual(dict(pl), dict(pl2)) + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + b = BytesIO() + pl = self._create(fmt=fmt) + plistlib.writePlist(pl, b, fmt=fmt) + pl2 = plistlib.readPlist(BytesIO(b.getvalue())) + self.assertEqual(dict(pl), dict(pl2)) def test_controlcharacters(self): for i in range(128): @@ -179,19 +109,21 @@ testString = "string containing %s" % c if i >= 32 or c in "\r\n\t": # \r, \n and \t are the only legal control chars in XML - plistlib.writePlistToBytes(testString) + plistlib.writePlistToBytes(testString, fmt=plistlib.FMT_XML) else: self.assertRaises(ValueError, plistlib.writePlistToBytes, testString) def test_nondictroot(self): - test1 = "abc" - test2 = [1, 2, 3, "abc"] - result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1)) - result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2)) - self.assertEqual(test1, result1) - self.assertEqual(test2, result2) + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + test1 = "abc" + test2 = [1, 2, 3, "abc"] + result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1, fmt=fmt)) + result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2, fmt=fmt)) + self.assertEqual(test1, result1) + self.assertEqual(test2, result2) def test_invalidarray(self): for i in ["key inside an array",