diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst --- a/Doc/library/plistlib.rst +++ b/Doc/library/plistlib.rst @@ -45,35 +45,108 @@ This module defines the following functions: -.. function:: readPlist(pathOrFile) +.. function:: readPlist(pathOrFile[[, fmt], bytes_for_data]) Read a plist file. *pathOrFile* may either be a file name or a (readable and binary) file object. Return the unpacked root object (which usually is a dictionary). - The XML data is parsed using the Expat parser from :mod:`xml.parsers.expat` - -- see its documentation for possible exceptions on ill-formed XML. - Unknown elements will simply be ignored by the plist parser. + The *fmt* is the format of the file and the following values are valid: + * :data:`None`: Autodetect the file format -.. function:: writePlist(rootObject, pathOrFile) + * "xml1": XML file format + + * "binary1": Binary plist format + + If *bytes_for_data* is false (the default) binary data will be returned as instances + of :class:`Data`, otherwise it is returned as instances of :class:`bytes`. + + XML data for the "xml1" format is parsed using the Expat parser + from :mod:`xml.parsers.expat` -- see its documentation for possible + exceptions on ill-formed XML. Unknown elements will simply be ignored + by the plist parser. + + The parser for the binary format raises :exc:`InvalidFileException` + when the file cannot be parsed. + + .. versionchanged:: 3.4 + + Added the the *fmt* argument. + + +.. function:: writePlist(rootObject, pathOrFile[[[, fmt], sort_keys], skipkeys]) Write *rootObject* to a plist file. *pathOrFile* may either be a file name or a (writable and binary) file object. + The *fmt* argument specifies the format of the plist file and can be + one of the following values: + + * "xml1" (the default): XML formatted plist file + + * "binary1": Binary formatted plist file + + When *sort_keys* is true (the default) the keys for dictionaries will be written + to the plist in sorted order, otherwise they will be written in the iteration + order of the dictionary. + + When *skipkeys* is false (the default) the function raises :exc:`TypeError` when a key of + a dictionary is not a string, otherwise such keys are skipped. + A :exc:`TypeError` will be raised if the object is of an unsupported type or a container that contains objects of unsupported types. + .. versionchanged:: 3.4 -.. function:: readPlistFromBytes(data) + Added the the *fmt*, *sort_keys* and *skipkeys* arguments. + + +.. function:: readPlistFromBytes(data[, fmt, [, bytes_for_data]]) Read a plist data from a bytes object. Return the root object. + The *fmt* is the format of the data and the following values are valid: -.. function:: writePlistToBytes(rootObject) + * :data:`None`: Autodetect the file format + + * "xml1": XML file format + + * "binary1": Binary plist format + + If *bytes_for_data* is false (the default) binary data will be returned as instances + of :class:`Data`, otherwise it is returned as instances of :class:`bytes`. + + A :exc:`TypeError` will be raised if the object is of an unsupported type or + a container that contains objects of unsupported types. + + .. versionchanged:: 3.4 + + Added the the *fmt* argument. + + +.. function:: writePlistToBytes(rootObject[, fmt [, sort_keys[, skipkeys]]]) Return *rootObject* as a plist-formatted bytes object. + The *fmt* argument specifies the format of the plist file and can be + one of the following values: + + * "xml1" (the default): XML formatted plist file + + * "binary1": Binary formatted plist file + + When *sort_keys* is true (the default) the keys for dictionaries will be written + to the plist in sorted order, otherwise they will be written in the iteration + order of the dictionary. + + When *skipkeys* is false (the default) the function raises :exc:`TypeError` when a key of + a dictionary is not a string, otherwise such keys are skipped. + + .. versionchanged:: 3.4 + + Added the the *fmt*, *sort_keys* and *skipkeys* arguments. + The following class is available: @@ -113,3 +186,12 @@ pl = readPlist(pathOrFile) print(pl["aKey"]) + + +.. seealso:: + + * The closest to a specification of the binary plist format + is the + `CF-Lite source code for CFBinaryPlist `_ + + * `The plist(5) manual page `_ diff --git a/Lib/plistlib.py b/Lib/plistlib.py --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -52,322 +52,33 @@ __all__ = [ "readPlist", "writePlist", "readPlistFromBytes", "writePlistToBytes", - "Plist", "Data", "Dict" + "Plist", "Data", "Dict", "FMT_XML", "FMT_BINARY", ] # Note: the Plist and Dict classes have been deprecated. import binascii +import codecs import datetime from io import BytesIO +import os import re +import struct +FMT_XML="xml1" +FMT_BINARY="binary1" -def readPlist(pathOrFile): - """Read a .plist file. 'pathOrFile' may either be a file name or a - (readable) file object. Return the unpacked root object (which - usually is a dictionary). - """ - didOpen = False - try: - if isinstance(pathOrFile, str): - pathOrFile = open(pathOrFile, 'rb') - didOpen = True - p = PlistParser() - rootObject = p.parse(pathOrFile) - finally: - if didOpen: - pathOrFile.close() - return rootObject - - -def writePlist(rootObject, pathOrFile): - """Write 'rootObject' to a .plist file. 'pathOrFile' may either be a - file name or a (writable) file object. - """ - didOpen = False - try: - if isinstance(pathOrFile, str): - pathOrFile = open(pathOrFile, 'wb') - didOpen = True - writer = PlistWriter(pathOrFile) - writer.writeln("") - writer.writeValue(rootObject) - writer.writeln("") - finally: - if didOpen: - pathOrFile.close() - - -def readPlistFromBytes(data): - """Read a plist data from a bytes object. Return the root object. - """ - return readPlist(BytesIO(data)) - - -def writePlistToBytes(rootObject): - """Return 'rootObject' as a plist-formatted bytes object. - """ - f = BytesIO() - writePlist(rootObject, f) - return f.getvalue() - - -class DumbXMLWriter: - def __init__(self, file, indentLevel=0, indent="\t"): - self.file = file - self.stack = [] - self.indentLevel = indentLevel - self.indent = indent - - def beginElement(self, element): - self.stack.append(element) - self.writeln("<%s>" % element) - self.indentLevel += 1 - - def endElement(self, element): - assert self.indentLevel > 0 - assert self.stack.pop() == element - self.indentLevel -= 1 - self.writeln("" % element) - - def simpleElement(self, element, value=None): - if value is not None: - value = _escape(value) - self.writeln("<%s>%s" % (element, value, element)) - else: - self.writeln("<%s/>" % element) - - def writeln(self, line): - if line: - # plist has fixed encoding of utf-8 - if isinstance(line, str): - line = line.encode('utf-8') - self.file.write(self.indentLevel * self.indent) - self.file.write(line) - self.file.write(b'\n') - - -# Contents should conform to a subset of ISO 8601 -# (in particular, YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'. Smaller units may be omitted with -# a loss of precision) -_dateParser = re.compile(r"(?P\d\d\d\d)(?:-(?P\d\d)(?:-(?P\d\d)(?:T(?P\d\d)(?::(?P\d\d)(?::(?P\d\d))?)?)?)?)?Z", re.ASCII) - -def _dateFromString(s): - order = ('year', 'month', 'day', 'hour', 'minute', 'second') - gd = _dateParser.match(s).groupdict() - lst = [] - for key in order: - val = gd[key] - if val is None: - break - lst.append(int(val)) - return datetime.datetime(*lst) - -def _dateToString(d): - return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( - d.year, d.month, d.day, - d.hour, d.minute, d.second - ) - - -# Regex to find any control chars, except for \t \n and \r -_controlCharPat = re.compile( - r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" - r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") - -def _escape(text): - m = _controlCharPat.search(text) - if m is not None: - raise ValueError("strings can't contains control characters; " - "use plistlib.Data instead") - text = text.replace("\r\n", "\n") # convert DOS line endings - text = text.replace("\r", "\n") # convert Mac line endings - text = text.replace("&", "&") # escape '&' - text = text.replace("<", "<") # escape '<' - text = text.replace(">", ">") # escape '>' - return text - - -PLISTHEADER = b"""\ - - -""" - -class PlistWriter(DumbXMLWriter): - - def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1): - if writeHeader: - file.write(PLISTHEADER) - DumbXMLWriter.__init__(self, file, indentLevel, indent) - - def writeValue(self, value): - if isinstance(value, str): - self.simpleElement("string", value) - elif isinstance(value, bool): - # must switch for bool before int, as bool is a - # subclass of int... - if value: - self.simpleElement("true") - else: - self.simpleElement("false") - elif isinstance(value, int): - self.simpleElement("integer", "%d" % value) - elif isinstance(value, float): - self.simpleElement("real", repr(value)) - elif isinstance(value, dict): - self.writeDict(value) - elif isinstance(value, Data): - self.writeData(value) - elif isinstance(value, datetime.datetime): - self.simpleElement("date", _dateToString(value)) - elif isinstance(value, (tuple, list)): - self.writeArray(value) - else: - raise TypeError("unsupported type: %s" % type(value)) - - def writeData(self, data): - self.beginElement("data") - self.indentLevel -= 1 - maxlinelength = max(16, 76 - len(self.indent.replace(b"\t", b" " * 8) * - self.indentLevel)) - for line in data.asBase64(maxlinelength).split(b"\n"): - if line: - self.writeln(line) - self.indentLevel += 1 - self.endElement("data") - - def writeDict(self, d): - if d: - self.beginElement("dict") - items = sorted(d.items()) - for key, value in items: - if not isinstance(key, str): - raise TypeError("keys must be strings") - self.simpleElement("key", key) - self.writeValue(value) - self.endElement("dict") - else: - self.simpleElement("dict") - - def writeArray(self, array): - if array: - self.beginElement("array") - for value in array: - self.writeValue(value) - self.endElement("array") - else: - self.simpleElement("array") - - -class _InternalDict(dict): - - # This class is needed while Dict is scheduled for deprecation: - # we only need to warn when a *user* instantiates Dict or when - # the "attribute notation for dict keys" is used. - - def __getattr__(self, attr): - try: - value = self[attr] - except KeyError: - raise AttributeError(attr) - from warnings import warn - warn("Attribute access from plist dicts is deprecated, use d[key] " - "notation instead", DeprecationWarning, 2) - return value - - def __setattr__(self, attr, value): - from warnings import warn - warn("Attribute access from plist dicts is deprecated, use d[key] " - "notation instead", DeprecationWarning, 2) - self[attr] = value - - def __delattr__(self, attr): - try: - del self[attr] - except KeyError: - raise AttributeError(attr) - from warnings import warn - warn("Attribute access from plist dicts is deprecated, use d[key] " - "notation instead", DeprecationWarning, 2) - -class Dict(_InternalDict): - - def __init__(self, **kwargs): - from warnings import warn - warn("The plistlib.Dict class is deprecated, use builtin dict instead", - DeprecationWarning, 2) - super().__init__(**kwargs) - - -class Plist(_InternalDict): - - """This class has been deprecated. Use readPlist() and writePlist() - functions instead, together with regular dict objects. - """ - - def __init__(self, **kwargs): - from warnings import warn - warn("The Plist class is deprecated, use the readPlist() and " - "writePlist() functions instead", DeprecationWarning, 2) - super().__init__(**kwargs) - - def fromFile(cls, pathOrFile): - """Deprecated. Use the readPlist() function instead.""" - rootObject = readPlist(pathOrFile) - plist = cls() - plist.update(rootObject) - return plist - fromFile = classmethod(fromFile) - - def write(self, pathOrFile): - """Deprecated. Use the writePlist() function instead.""" - writePlist(self, pathOrFile) - - -def _encodeBase64(s, maxlinelength=76): - # copied from base64.encodebytes(), with added maxlinelength argument - maxbinsize = (maxlinelength//4)*3 - pieces = [] - for i in range(0, len(s), maxbinsize): - chunk = s[i : i + maxbinsize] - pieces.append(binascii.b2a_base64(chunk)) - return b''.join(pieces) - -class Data: - - """Wrapper for binary data.""" - - def __init__(self, data): - if not isinstance(data, bytes): - raise TypeError("data must be as bytes") - self.data = data - - @classmethod - def fromBase64(cls, data): - # base64.decodebytes just calls binascii.a2b_base64; - # it seems overkill to use both base64 and binascii. - return cls(binascii.a2b_base64(data)) - - def asBase64(self, maxlinelength=76): - return _encodeBase64(self.data, maxlinelength) - - def __eq__(self, other): - if isinstance(other, self.__class__): - return self.data == other.data - elif isinstance(other, str): - return self.data == other - else: - return id(self) == id(other) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, repr(self.data)) +class InvalidFileException(ValueError): + def __str__(self): + return "Invalid file" + def __unicode__(self): + return "Invalid file" class PlistParser: - - def __init__(self): + def __init__(self, bytes_for_data): self.stack = [] self.currentKey = None self.root = None + self.bytes_for_data = bytes_for_data def parse(self, fileobj): from xml.parsers.expat import ParserCreate @@ -449,6 +160,621 @@ def end_string(self): self.addObject(self.getData()) def end_data(self): - self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) + if self.bytes_for_data: + self.addObject(binascii.a2b_base64(data)(self.getData().encode("utf-8"))) + else: + self.addObject(Data.fromBase64(self.getData().encode("utf-8"))) def end_date(self): self.addObject(_dateFromString(self.getData())) + + +class BinaryPlistParser(object): + """ + Read or write a binary plist file, following the description of the binary format: http://opensource.apple.com/source/CF/CF-550/CFBinaryPList.c + Raise InvalidFileException in case of error, otherwise return the root object, as usual + """ + _atoms = {0: None, 8: False, 9: True, 15: ''} + + def __init__(self, bytes_for_data): + self.bytes_for_data = bytes_for_data + + def parse(self, inFile): + return self.read(inFile) + + def read(self, inFile): + self._inFile = inFile + self._inFile.seek(-32, os.SEEK_END) # go to the trailer + trailer = self._inFile.read(32) + if len(trailer) != 32: + return InvalidFileException() + offsetSize, self._refSize, numObjects, topObject, offsetTableOffset = struct.unpack('>6xBB4xL4xL4xL', trailer) + self._inFile.seek(offsetTableOffset) # go to the table with offsets of all objects + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + self._objectOffsets = struct.unpack(offsetFormat, self._inFile.read(offsetSize * numObjects)) + return self._readNextObject(self._objectOffsets[topObject]) + + def _getSize(self, tokenL): + """ return the size of the next object.""" + if tokenL == 0xF: + m = int.from_bytes(self._inFile.read(1), byteorder='big') & 0x3 + int_format = {0: (1, '>B'), 1: (2, '>H'), 2: (4, '>L'), 3: (8, '>Q'), }# {log2(bit_number): (bit_number, pattern to use with struct.unpack)} + s, f = int_format[m] + return struct.unpack(f, self._inFile.read(s))[0] + return tokenL + + def _readRefs(self, n): + return struct.unpack('>' + self._refFormat * n, self._inFile.read(n * self._refSize)) + + def _readNextObject(self, offset): + """ read the object at offset. May recursively read sub-objects (content of an array/dict/set) """ + self._inFile.seek(offset) + token = self._inFile.read(1)[0] + tokenH, tokenL = token & 0xF0, token & 0x0F #high and low parts + if token == 0x00: + return None + elif token == 0x08: + return False + elif token == 0x09: + return True + elif token == 0x0f: + return '' + #if tokenH == 0x00 and token in self._atoms: + #return self._atoms[token] + elif tokenH == 0x10: #int + return int.from_bytes(self._inFile.read(1 << tokenL), 'big') + elif tokenH == 0x20 and tokenL == 0x02: #real + return struct.unpack('>f', self._inFile.read(4))[0] + elif tokenH == 0x20 and tokenL == 0x03: #real + return struct.unpack('>d', self._inFile.read(8))[0] + elif tokenH == 0x30 and tokenL == 0x03: #date + f = struct.unpack('>d', self._inFile.read(8))[0] + return datetime.datetime.utcfromtimestamp(f + (31 * 365 + 8) * 86400) # timestamp 0 of binary plists corresponds to 1/1/2001 (year of Mac OS X 10.0), instead of 1/1/1970. + elif tokenH == 0x40: #data + s = self._getSize(tokenL) + if self.bytes_for_data: + return self._inFile.read(s) + else: + return Data(self._inFile.read(s)) + elif tokenH == 0x50: #ascii string + s = self._getSize(tokenL) + return self._inFile.read(s).decode('ascii') + elif tokenH == 0x60: #unicode string + s = self._getSize(tokenL) + return self._inFile.read(s * 2).decode('utf-16be') + elif tokenH == 0x80: #uid + return self._inFile.read(tokenL + 1) + elif tokenH == 0xA0: #array + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return [self._readNextObject(self._objectOffsets[x]) for x in obj_refs] + elif tokenH == 0xC0: #set + s = self._getSize(tokenL) + obj_refs = self._readRefs(s) + return set(self._readNextObject(self._objectOffsets[x]) for x in obj_refs) + elif tokenH == 0xD0: #dict + s = self._getSize(tokenL) + key_refs = self._readRefs(s) + obj_refs = self._readRefs(s) + return {self._readNextObject(self._objectOffsets[k]): self._readNextObject(self._objectOffsets[o]) for k, o in zip(key_refs, obj_refs)} + raise InvalidFileException() + +class BinaryPlistWriter (object): + _atoms = {0: None, 8: False, 9: True, 15: ''} + + def __init__(self, file, sort_keys, skipkeys): + self._outFile = file + self._sort_keys = sort_keys + self._skipkeys = skipkeys + + def countObjects(self, rootObject): + """ recursive function to count the total number of objects in the rootObject""" + total = 1 + if isinstance(rootObject, dict): + for k, v in rootObject.items(): + if not isinstance(k, str): + raise InvalidFileException() + total += 1 + self.countObjects(v) + elif isinstance(rootObject, list) or isinstance(rootObject, tuple): + for v in rootObject: + total += self.countObjects(v) + elif isinstance(rootObject, set) or isinstance(rootObject, frozenset): + for v in rootObject: + total += self.countObjects(v) + return total + + def write(self, rootObject): + self._outFile.write(b'bplist00') #header + numObjects = self.countObjects(rootObject) + self._objectOffsets = [] + if numObjects < 1 << 8: + self._refSize = 1 + elif numObjects < 1 << 16: + self._refSize = 2 + elif numObjects < 1 << 32: + self._refSize = 4 + else: + self._refSize = 8 + + self._refFormat = {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[self._refSize] + self._currentRef = 0 + topObject = self._writeNextObject(rootObject) + offsetTableOffset = self._outFile.tell() + if offsetTableOffset < 1 << 8: + offsetSize = 1 + elif offsetTableOffset < 1 << 16: + offsetSize = 2 + elif offsetTableOffset < 1 << 32: + offsetSize = 4 + else: + offsetSize = 8 + + offsetFormat = '>' + {1: 'B', 2: 'H', 4: 'L', 8: 'Q', }[offsetSize] * numObjects + self._outFile.write(struct.pack(offsetFormat, *self._objectOffsets)) + trailer = offsetSize, self._refSize, numObjects, topObject, offsetTableOffset + self._outFile.write(struct.pack('>6xBB4xL4xL4xL', *trailer)) + + def _putSize(self, token, size): + if size < 15: + self._outFile.write(struct.pack('>B', token | size)) + elif size < 1 << 8: + self._outFile.write(struct.pack('>BBB', token | 0xF, 0x10, size)) + elif size < 1 << 16: + self._outFile.write(struct.pack('>BBH', token | 0xF, 0x11, size)) + elif size < 1 << 32: + self._outFile.write(struct.pack('>BBL', token | 0xF, 0x12, size)) + else: + self._outFile.write(struct.pack('>BBQ', token | 0xF, 0x13, size)) + + def _addRef(self): + self._objectOffsets.append(self._outFile.tell()) + self._currentRef += 1 + return self._currentRef - 1 + + def _writeNextObject(self, rootObject): + if rootObject == None: + currentRef = self._addRef() + self._outFile.write(b'\x00') + elif isinstance(rootObject, bool) and rootObject == False: + currentRef = self._addRef() + self._outFile.write(b'\x08') + elif isinstance(rootObject, bool) and rootObject == True: + currentRef = self._addRef() + self._outFile.write(b'\x09') + elif isinstance(rootObject, int): + currentRef = self._addRef() + if rootObject < 1 << 8: + self._outFile.write(struct.pack('>BB', 0x10, rootObject)) + elif rootObject < 1 << 16: + self._outFile.write(struct.pack('>BH', 0x11, rootObject)) + elif rootObject < 1 << 32: + self._outFile.write(struct.pack('>BL', 0x12, rootObject)) + else: + self._outFile.write(struct.pack('>BQ', 0x13, rootObject)) + elif isinstance(rootObject, float): + currentRef = self._addRef() + self._outFile.write(struct.pack('>Bd', 0x23, rootObject)) + elif isinstance(rootObject, datetime.datetime): + currentRef = self._addRef() + f = (rootObject - datetime.datetime(2001, 1, 1)).total_seconds() + self._outFile.write(struct.pack('>Bd', 0x33, f)) + elif isinstance(rootObject, Data): + currentRef = self._addRef() + self._putSize(0x40, len(rootObject.data)) + self._outFile.write(rootObject.data) + elif isinstance(rootObject, bytes): + currentRef = self._addRef() + self._putSize(0x40, len(data)) + self._outFile.write(data) + elif isinstance(rootObject, str): + currentRef = self._addRef() + try: + t = rootObject.encode('ascii') + self._putSize(0x50, len(rootObject)) + except: + t = rootObject.encode('utf-16be') + self._putSize(0x60, len(rootObject)) + self._outFile.write(t) + elif isinstance(rootObject, list) or isinstance(rootObject, tuple): + refs = [] + for v in rootObject: + refs.append(self._writeNextObject(v)) + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xA0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + elif isinstance(rootObject, set) or isinstance(rootObject, frozenset): + refs = [refs.append(self._writeNextObject(v)) for v in rootObject] + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xC0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *refs)) + elif isinstance(rootObject, dict): + keyRefs, obj_refs = [], [] + + if self._sort_keys: + rootKeys = sorted(rootObject.keys()) + else: + rootKeys = rootObject.keys() + + for k in rootKeys: + # XXX: check Apple binary plist behavior when key is not a string + # if not isinstance(k, str): + # if self._skipkeys: + # continue + # raise TypeError("keys must be strings") + keyRefs.append(self._writeNextObject(k)) + obj_refs.append(self._writeNextObject(rootObject[k])) + s = len(rootObject) + currentRef = self._addRef() + self._putSize(0xD0, s) + self._outFile.write(struct.pack('>' + self._refFormat * s, *keyRefs)) + self._outFile.write(struct.pack('>' + self._refFormat * s, *obj_refs)) + else: + raise InvalidFileException() + return currentRef + +class DumbXMLWriter: + def __init__(self, file, indentLevel=0, indent="\t"): + self.file = file + self.stack = [] + self.indentLevel = indentLevel + self.indent = indent + + def beginElement(self, element): + self.stack.append(element) + self.writeln("<%s>" % element) + self.indentLevel += 1 + + def endElement(self, element): + assert self.indentLevel > 0 + assert self.stack.pop() == element + self.indentLevel -= 1 + self.writeln("" % element) + + def simpleElement(self, element, value=None): + if value is not None: + value = _escape(value) + self.writeln("<%s>%s" % (element, value, element)) + else: + self.writeln("<%s/>" % element) + + def writeln(self, line): + if line: + # plist has fixed encoding of utf-8 + if isinstance(line, str): + line = line.encode('utf-8') + self.file.write(self.indentLevel * self.indent) + self.file.write(line) + self.file.write(b'\n') + +class PlistWriter(DumbXMLWriter): + + def __init__(self, file, indentLevel=0, indent=b"\t", writeHeader=1, sort_keys=True, skipkeys=False): + if writeHeader: + file.write(PLISTHEADER) + DumbXMLWriter.__init__(self, file, indentLevel, indent) + self._sort_keys = sort_keys + self._skipkeys = skipkeys + + def write(self, rootObject): + self.writeln("") + self.writeValue(rootObject) + self.writeln("") + + def writeValue(self, value): + if isinstance(value, str): + self.simpleElement("string", value) + elif isinstance(value, bool): + # must switch for bool before int, as bool is a + # subclass of int... + if value: + self.simpleElement("true") + else: + self.simpleElement("false") + elif isinstance(value, int): + self.simpleElement("integer", "%d" % value) + elif isinstance(value, float): + self.simpleElement("real", repr(value)) + elif isinstance(value, dict): + self.writeDict(value) + elif isinstance(value, Data): + self.writeData(value) + elif isinstance(value, bytes): + self.writeBytes(value) + elif isinstance(value, datetime.datetime): + self.simpleElement("date", _dateToString(value)) + elif isinstance(value, (tuple, list)): + self.writeArray(value) + else: + raise TypeError("unsupported type: %s" % type(value)) + + def writeData(self, data): + self.writeBytes(data.data) + + def writeBytes(self, data): + self.beginElement("data") + self.indentLevel -= 1 + maxlinelength = max(16, 76 - len(self.indent.replace(b"\t", b" " * 8) * + self.indentLevel)) + for line in _encodeBase64(data, maxlinelength).split(b"\n"): + if line: + self.writeln(line) + self.indentLevel += 1 + self.endElement("data") + + def writeDict(self, d): + if d: + self.beginElement("dict") + if self._sort_keys: + items = sorted(d.items()) + else: + items = d.items() + + for key, value in items: + if not isinstance(key, str): + if self._skipkeys: + continue + raise TypeError("keys must be strings") + self.simpleElement("key", key) + self.writeValue(value) + self.endElement("dict") + else: + self.simpleElement("dict") + + def writeArray(self, array): + if array: + self.beginElement("array") + for value in array: + self.writeValue(value) + self.endElement("array") + else: + self.simpleElement("array") + +def _is_fmt_xml(header): + header = header[:7] + for bom in b'', codecs.BOM_UTF8, codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE, codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE: + for start in b'', b'\d\d\d\d)(?:-(?P\d\d)(?:-(?P\d\d)(?:T(?P\d\d)(?::(?P\d\d)(?::(?P\d\d))?)?)?)?)?Z", re.ASCII) + +def _dateFromString(s): + order = ('year', 'month', 'day', 'hour', 'minute', 'second') + gd = _dateParser.match(s).groupdict() + lst = [] + for key in order: + val = gd[key] + if val is None: + break + lst.append(int(val)) + return datetime.datetime(*lst) + +def _dateToString(d): + return '%04d-%02d-%02dT%02d:%02d:%02dZ' % ( + d.year, d.month, d.day, + d.hour, d.minute, d.second + ) + + +# Regex to find any control chars, except for \t \n and \r +_controlCharPat = re.compile( + r"[\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f" + r"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f]") + +def _escape(text): + m = _controlCharPat.search(text) + if m is not None: + raise ValueError("strings can't contains control characters; " + "use plistlib.Data instead") + text = text.replace("\r\n", "\n") # convert DOS line endings + text = text.replace("\r", "\n") # convert Mac line endings + text = text.replace("&", "&") # escape '&' + text = text.replace("<", "<") # escape '<' + text = text.replace(">", ">") # escape '>' + return text + + +PLISTHEADER = b"""\ + + +""" + + + +class _InternalDict(dict): + + # This class is needed while Dict is scheduled for deprecation: + # we only need to warn when a *user* instantiates Dict or when + # the "attribute notation for dict keys" is used. + + def __getattr__(self, attr): + try: + value = self[attr] + except KeyError: + raise AttributeError(attr) + from warnings import warn + warn("Attribute access from plist dicts is deprecated, use d[key] " + "notation instead", DeprecationWarning, 2) + return value + + def __setattr__(self, attr, value): + from warnings import warn + warn("Attribute access from plist dicts is deprecated, use d[key] " + "notation instead", DeprecationWarning, 2) + self[attr] = value + + def __delattr__(self, attr): + try: + del self[attr] + except KeyError: + raise AttributeError(attr) + from warnings import warn + warn("Attribute access from plist dicts is deprecated, use d[key] " + "notation instead", DeprecationWarning, 2) + +class Dict(_InternalDict): + + def __init__(self, **kwargs): + from warnings import warn + warn("The plistlib.Dict class is deprecated, use builtin dict instead", + DeprecationWarning, 2) + super().__init__(**kwargs) + + +class Plist(_InternalDict): + + """This class has been deprecated. Use readPlist() and writePlist() + functions instead, together with regular dict objects. + """ + + def __init__(self, **kwargs): + from warnings import warn + warn("The Plist class is deprecated, use the readPlist() and " + "writePlist() functions instead", DeprecationWarning, 2) + super().__init__(**kwargs) + + def fromFile(cls, pathOrFile): + """Deprecated. Use the readPlist() function instead.""" + rootObject = readPlist(pathOrFile) + plist = cls() + plist.update(rootObject) + return plist + fromFile = classmethod(fromFile) + + def write(self, pathOrFile): + """Deprecated. Use the writePlist() function instead.""" + writePlist(self, pathOrFile) + + +def _encodeBase64(s, maxlinelength=76): + # copied from base64.encodebytes(), with added maxlinelength argument + maxbinsize = (maxlinelength//4)*3 + pieces = [] + for i in range(0, len(s), maxbinsize): + chunk = s[i : i + maxbinsize] + pieces.append(binascii.b2a_base64(chunk)) + return b''.join(pieces) + +class Data: + + """Wrapper for binary data.""" + + def __init__(self, data): + if not isinstance(data, bytes): + raise TypeError("data must be as bytes") + self.data = data + + @classmethod + def fromBase64(cls, data): + # base64.decodebytes just calls binascii.a2b_base64; + # it seems overkill to use both base64 and binascii. + return cls(binascii.a2b_base64(data)) + + def asBase64(self, maxlinelength=76): + return _encodeBase64(self.data, maxlinelength) + + def __eq__(self, other): + if isinstance(other, self.__class__): + return self.data == other.data + elif isinstance(other, str): + return self.data == other + else: + return id(self) == id(other) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, repr(self.data)) + + diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -7,8 +7,10 @@ from test import support +ALL_FORMATS=(plistlib.FMT_XML, plistlib.FMT_BINARY) + # This test data was generated through Cocoa's NSDictionary class -TESTDATA = b""" +TESTDATA_XML1 = b""" @@ -89,7 +91,12 @@ """.replace(b" " * 8, b"\t") # Apple as well as plistlib.py output hard tabs +TESTDATA_BINARY1 = b'bplist00\xda\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x13\x1c\x1dVaFloatg\x00\xc5\x00b\x00e\x00n\x00r\x00a\x00aXsomeDataUanIntWaStringUaDateZnestedDataUaList\\someMoreDataUaDict#?\xe0\x00\x00\x00\x00\x00\x00_\x10\x17That was a unicode key.M\x11\x02\xd8VDoodah3A\x9c\xb9}\xf4\x00\x00\x00\xa1\x12O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xa5\x14\x15\x16\x17\x18QAQB\x10\x0c#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a\x1b\x10\x01\x10\x02\x10\x03O\x10\xfa\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\x00\x01\x02\x03\xd5\x1e\x1f !"#+,-.ZdeeperDict]anotherString[aFalseValueZaTrueValue]aUnicodeValue\xd3$%&\'()QaQbQc\x10\x11#@@@\x00\x00\x00\x00\x00\xa3\x19\x1a*Ttext_\x10\x15\x08\tk\x00M\x00\xe4\x00s\x00s\x00i\x00g\x00,\x00 \x00M\x00a\x00\xdf\x00\x08\x00\x1d\x00$\x003\x00<\x00B\x00J\x00P\x00[\x00a\x00n\x00t\x00}\x00\x97\x00\xa5\x00\xa8\x00\xaf\x00\xb8\x00\xba\x01\xb7\x01\xbd\x01\xbf\x01\xc1\x01\xc3\x01\xcc\x01\xd0\x01\xd2\x01\xd4\x01\xd6\x02\xd3\x02\xde\x02\xe9\x02\xf7\x03\x03\x03\x0e\x03\x1c\x03#\x03%\x03\'\x03)\x03+\x034\x038\x03=\x03U\x03V\x03W\x00\x00\x00\x00\x00\x00\x02\x01\x00\x00\x00\x00\x00\x00\x00/\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03n' +TESTDATA={ + plistlib.FMT_XML: TESTDATA_XML1, + plistlib.FMT_BINARY: TESTDATA_BINARY1, +} class TestPlistlib(unittest.TestCase): @@ -99,7 +106,7 @@ except: pass - def _create(self): + def _create(self, fmt=None): pl = dict( aString="Doodah", aList=["A", "B", 12, 32.5, [1, 2, 3]], @@ -154,24 +161,28 @@ self.assertEqual(plistlib.readPlistFromBytes(plistlib.writePlistToBytes(data)), data) def test_appleformatting(self): - pl = plistlib.readPlistFromBytes(TESTDATA) - data = plistlib.writePlistToBytes(pl) - self.assertEqual(data, TESTDATA, + for fmt in ALL_FORMATS: + pl = plistlib.readPlistFromBytes(TESTDATA[fmt]) + data = plistlib.writePlistToBytes(pl) + self.assertEqual(data, TESTDATA[fmt], "generated data was not identical to Apple's output") def test_appleformattingfromliteral(self): - pl = self._create() - pl2 = plistlib.readPlistFromBytes(TESTDATA) - self.assertEqual(dict(pl), dict(pl2), + self.maxDiff = None + for fmt in ALL_FORMATS: + pl = self._create(fmt=fmt) + pl2 = plistlib.readPlistFromBytes(TESTDATA[fmt]) + self.assertEqual(dict(pl), dict(pl2), "generated data was not identical to Apple's output") def test_bytesio(self): from io import BytesIO - b = BytesIO() - pl = self._create() - plistlib.writePlist(pl, b) - pl2 = plistlib.readPlist(BytesIO(b.getvalue())) - self.assertEqual(dict(pl), dict(pl2)) + for fmt in ALL_FORMATS: + b = BytesIO() + pl = self._create(fmt=fmt) + plistlib.writePlist(pl, b, fmt=fmt) + pl2 = plistlib.readPlist(BytesIO(b.getvalue())) + self.assertEqual(dict(pl), dict(pl2)) def test_controlcharacters(self): for i in range(128): @@ -179,19 +190,20 @@ testString = "string containing %s" % c if i >= 32 or c in "\r\n\t": # \r, \n and \t are the only legal control chars in XML - plistlib.writePlistToBytes(testString) + plistlib.writePlistToBytes(testString, fmt='xml1') else: self.assertRaises(ValueError, plistlib.writePlistToBytes, testString) def test_nondictroot(self): - test1 = "abc" - test2 = [1, 2, 3, "abc"] - result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1)) - result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2)) - self.assertEqual(test1, result1) - self.assertEqual(test2, result2) + for fmt in ALL_FORMATS: + test1 = "abc" + test2 = [1, 2, 3, "abc"] + result1 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test1, fmt=fmt)) + result2 = plistlib.readPlistFromBytes(plistlib.writePlistToBytes(test2, fmt=fmt)) + self.assertEqual(test1, result1) + self.assertEqual(test2, result2) def test_invalidarray(self): for i in ["key inside an array",