# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
"""Simple XML parsing and rendering framework.
This uses someting almost, but not quite, entirely unlike a DTD.
"""
# Python imports
import os
import sys
import cgi
import copy
import datetime
from StringIO import StringIO
from xml.parsers import expat
class ParseError(Exception):
"""Exception raised for problems parsing the rules XML."""
def __init__(self, msg, *args):
if args:
msg = msg % args
Exception.__init__(self, msg)
class GenericParser(object):
"""Generic parser for XML.
Typical usage:
Initialization:
>>> p = GenericParser([Root1, Root2, ...])
Then one of the following:
>>> obj = p.parseText('...')
>>> obj = p.parseFile('demo1.xml')
>>> obj = p.parseStream(open('demo1.xml'), 'demo1.xml')
Or, for a lower-level API:
>>> p.reset('demo1.xml')
>>> p.feedText('')
>>> p.feedText('...')
>>> p.feedText('', True)
>>> obj = p.root
>>>
"""
def __init__(self, rootClasses, __filler=0, strict=True):
self.rootClasses = rootClasses
self.strict = strict
def parseText(self, text, filename=None):
self.reset(filename)
self.feedText(text, True)
return self.root
def parseFile(self, file):
if hasattr(file, "read"):
return self.parseStream(file)
if file == "-" or not file:
return self.parseStream(sys.stdin, "")
f = open(file, "r")
try:
self.reset(file)
return self.parseStream(f, file)
finally:
f.close()
def parseStream(self, f, filename=None):
if filename is None:
try:
filename = f.name
except AttributeError:
filename = None
self.reset(filename)
self.feedStream(f)
return self.root
def reset(self, filename=None):
self.filename = filename
self.parser = parser = expat.ParserCreate(None, None)
parser.StartElementHandler = self.start
parser.EndElementHandler = self.end
parser.CharacterDataHandler = self.characters
self.stack = []
self.rules = []
self.root = None
def feedText(self, text, isFinal=False):
try:
self.parser.Parse(text, isFinal)
except expat.ExpatError, err:
self.raiseError(err)
def feedStream(self, stream):
try:
self.parser.ParseFile(stream)
except (expat.ExpatError, ParseError), err:
self.raiseError(err)
def raiseError(self, err):
msg = "line %d: %s" % (self.parser.ErrorLineNumber, err)
if self.filename:
msg = "%s, %s" % (self.filename, msg)
raise ParseError(msg)
def start(self, tag, attrs):
try:
# Attempt to convert Unicode to 8-bit strings
tag = str(tag)
except:
pass
for key, value in attrs.iteritems():
try:
# Attempt to convert Unicode to 8-bit strings
attrs[key] = str(value)
except:
pass
if not self.stack:
# Pick a root element
for cls in self.rootClasses:
if tag == cls.__element__:
if self.strict:
self.root = cls(attrs)
else:
self.root = cls(attrs, False)
self.stack.append(self.root)
break
else:
raise ParseError("<%s> is not a root element", tag)
else:
if self.strict:
obj = self.stack[-1].__add_child__(tag, attrs)
else:
parent = self.stack[-1]
if parent is None:
parent = self.getparent()
if parent is None:
obj = None
else:
obj = parent.__add_child__(tag, attrs, strict=False)
self.stack.append(obj)
def characters(self, data):
try:
# Attempt to convert Unicode to 8-bit strings
data = str(data)
except:
pass
if self.stack:
if self.strict:
self.stack[-1].__add_characters__(data)
else:
parent = self.stack[-1]
if parent is None:
parent = self.getparent()
if parent is not None:
parent .__add_characters__(data, strict=False)
def end(self, tag):
obj = self.stack.pop()
if self.strict:
assert tag == obj.__element__
else:
assert obj is None or tag == obj.__element__
def getparent(self):
i = len(self.stack)
parent = None
while parent is None and i > 0:
i -= 1
parent = self.stack[i]
return parent
# Classes defining the "DTD"
class ElementClass(object):
# Override these special attributes in subclasses
__element__ = "" # string giving the tag name
__attributes__ = {} # dict of attrname -> function (e.g. String, Boolean)
__children__ = {} # dict of ElementClass -> attrname
# if attrname ends in [], it is a list
__characters__ = "" # string giving name where to store character data
#classmethod
def __parser__(cls, __filler=0, strict=True):
return GenericParser([cls], strict=strict)
__parser__ = classmethod(__parser__)
#classmethod
def __parseStream__(cls, stream, filename=None, __filler=0, strict=True):
return GenericParser([cls], strict=strict).parseStream(stream,
filename)
__parseStream__ = classmethod(__parseStream__)
#classmethod
def __parseText__(cls, text, filename=None, __filler=0, strict=True):
return GenericParser([cls], strict=strict).parseText(text, filename)
__parseText__ = classmethod(__parseText__)
#classmethod
def __parseFile__(cls, file, __filler=0, strict=True):
return GenericParser([cls], strict=strict).parseFile(file)
__parseFile__ = classmethod(__parseFile__)
def __init__(self, __attrs=None, __strict=True, **kwds):
# You can't pass both __attrs and kwds simultaneously
assert __attrs is None or kwds == {}
# We don't support __characters__ and __children__ simultaneously
assert not (self.__children__ and self.__characters__), \
self.__class__.__name__
# First set all undefined attributes to None
for key in self.__attributes__:
name = self.__fixname__(key)
if not hasattr(self, name):
setattr(self, name, None)
# And set all child attributes to None or []
for childname in self.__children__.itervalues():
if childname.endswith("[]"):
childname = childname[:-2]
setattr(self, childname, [])
else:
setattr(self, childname, None)
# Set __characters__ attribute to ""
if self.__characters__:
self.__init_characters__()
# Then set all actual attributes, either from __attrs or from kwds
if __attrs is None:
# Use kwds; these should be appropriate Python values.
# You can use this to set *arbitrary* instance attributes.
for key, value in kwds.iteritems():
# key is already the *output* of __fixname__!
setattr(self, key, value)
else:
# Use __attrs; these should be strings
if __strict:
self.__set_attributes__(__attrs)
else:
self.__set_attributes__(__attrs, strict=False)
def __init_characters__(self):
setattr(self, self.__characters__, "")
def __fixname__(self, name):
return name.replace("-", "_")
def __str__(self):
f = StringIO()
self.__render__(f)
return f.getvalue().rstrip("\n")
def __eq__(self, other):
if other is None:
return False
cls = self.__class__
if other.__class__ is not cls:
return False
for name in cls.__attributes__:
name = self.__fixname__(name)
if getattr(self, name) != getattr(other, name):
return False
for c, name in cls.__children__.iteritems():
if name.endswith("[]"):
name = name[:-2]
if getattr(self, name) != getattr(other, name):
return False
name = cls.__characters__
if name:
if getattr(self, name) != getattr(other, name):
return False
return True
def __ne__(self, other):
if other is None:
return True
return not self.__eq__(other)
def __render__(self, f=None, level="", indent=" "):
if f is None:
f = sys.stdout
f.write("%s<%s" % (level, self.__element__))
if len(self.__attributes__) > 1:
sep = "\n%s " % (level+indent)
else:
sep = " "
attrnames = self.__attributes__.keys()
attrnames.sort()
for key in attrnames:
value = getattr(self, self.__fixname__(key), None)
if value is not None:
function = self.__attributes__[key]
value = function.__render__(value)
f.write('%s%s="%s"' % (sep, key, value))
if self.__characters__:
assert not self.__children__ # Again!
f.write(">")
self.__render_characters__(f, level, indent)
f.write("%s>\n" % self.__element__)
elif not self.__children__:
f.write(" />\n")
else:
f.write(">\n")
entries = [(childname, key)
for key, childname in self.__children__.iteritems()]
entries.sort()
for childname, key in entries:
if childname.endswith("[]"):
childname = childname[:-2]
children = getattr(self, childname, [])
for child in children:
child.__render__(f, level+indent, indent)
else:
child = getattr(self, childname, None)
if child is not None:
child.__render__(f, level+indent, indent)
f.write("%s%s>\n" % (level, self.__element__))
def __render_characters__(self, f, level, indent):
text = getattr(self, self.__characters__, None)
if text is not None:
# Reproduce the text exactly (except for quoting)
f.write(cgi.escape(text))
def __set_attributes__(self, attrs, __filler=0, strict=True):
attrdefs = self.__attributes__
for key, value in attrs.iteritems():
if key not in attrdefs:
if not strict:
continue
raise ParseError("<%s> tag has no %r attribute",
self.__element__, key)
setattr(self, self.__fixname__(key), attrdefs[key](value))
def __add_child__(self, tag, attrs, __filler=0, strict=True):
for cls in self.__children__:
if tag == cls.__element__:
if strict:
obj = cls(attrs)
else:
obj = cls(attrs, False)
childname = self.__children__[cls]
if childname.endswith("[]"):
childname = childname[:-2]
children = getattr(self, childname, None)
if children is None:
children = []
setattr(self, childname, children)
children.append(obj)
else:
child = getattr(self, childname, None)
if child is None:
setattr(self, childname, obj)
elif strict:
raise ParseError("duplicate <%s> in <%s>",
tag, self.__element__)
return obj
if not strict:
return None
raise ParseError("<%s> not allowed inside <%s>", tag, self.__element__)
def __add_characters__(self, data, __filler=0, strict=True):
if self.__characters__:
text = getattr(self, self.__characters__, None)
if text is None:
text = data
else:
text += data
setattr(self, self.__characters__, text)
else:
if strict and not data.isspace():
raise ParseError("unexpected characters inside <%s> element",
self.__element__)
def __deepcopy__(self, memo=None):
# Assume the class can be invoked without arguments
new = self.__class__()
# Assume attributes are immutable values
for key in self.__attributes__:
name = self.__fixname__(key)
setattr(new, name, getattr(self, name))
# Use proper deep copying for sub-elements
for name in self.__children__.itervalues():
if name.endswith("[]"):
name = name[:-2]
setattr(new, name, copy.deepcopy(getattr(self, name), memo))
name = self.__characters__
if name:
setattr(new, name, copy.deepcopy(getattr(self, name), memo))
return new
def __clone__(self):
"""Public API (!) to create a clone."""
return copy.deepcopy(self)
# Functions defining attribute types
def String(arg):
# XXX Unicode?
return str(arg)
def String_render(s):
return cgi.escape(s, True)
String.__render__ = String_render
def Integer(arg):
try:
return int(arg)
except ValueError, err:
raise ParseError("attribute value %r is not an Integer", arg)
Integer.__render__ = str
_trues = dict.fromkeys(["yes", "on", "true", "1"])
_falses = dict.fromkeys(["no", "off", "false", "0"])
def Boolean(arg):
s = arg.strip().lower()
if s in _trues:
return True
if s in _falses:
return False
raise ParseError("attribute value %r is not a Boolean", arg)
Boolean.__render__ = lambda b: b and "true" or "false"
def _test():
class Inner(ElementClass):
__element__ = "inner"
__attributes__ = {"special": Boolean}
__characters__ = "text"
class Outer(ElementClass):
__element__ = "outer"
__attributes__ = {"id": Integer, "name": String}
__children__ = {Inner: "inner[]"}
sample = '''
blah, blah
'''
outie = Outer.__parseText__(sample)
print (outie.id, outie.name)
for innie in outie.inner:
print (innie.special, innie.text, str(innie))
print outie
if __name__ == "__main__":
_test()