Index: Doc/lib/xmldom.tex
===================================================================
--- Doc/lib/xmldom.tex (revision 55014)
+++ Doc/lib/xmldom.tex (working copy)
@@ -195,6 +195,8 @@
{Representation of comments in the source document.}
\lineiii{Text}{\ref{dom-text-objects}}
{Nodes containing textual content from the document.}
+ \lineiii{EntityReference}{\ref{dom-entityreference-objects}}
+ {Entity reference representation.}
\lineiii{ProcessingInstruction}{\ref{dom-pi-objects}}
{Processing instruction representation.}
\end{tableiii}
@@ -314,7 +316,7 @@
\begin{memberdesc}[Node]{nodeName}
This has a different meaning for each node type; see the DOM
-specification for details. You can always get the information you
+specification for details. You can usually get the information you
would get here from another property such as the \member{tagName}
property for elements or the \member{name} property for attributes.
For all node types, the value of this attribute will be either a
@@ -716,6 +718,20 @@
\end{notice}
+\subsection{EntityReference Objects \label{dom-entityreference-objects}}
+
+Represents an entity reference in the XML document. This inherits from
+the \class{Node} interface. \class{EntityReference} nodes and their
+descendents are readonly.
+
+The name of the referenced entity is in the \member{nodeName}
+attribute.
+
+Parsers may expand entity references on input, so the presence of an
+entity reference in an XML document does not mean that an
+\class{EntityReference} node will necessarily appear in the DOM tree.
+
+
\subsubsection{ProcessingInstruction Objects \label{dom-pi-objects}}
Represents a processing instruction in the XML document; this inherits
Index: Doc/lib/libpyexpat.tex
===================================================================
--- Doc/lib/libpyexpat.tex (revision 55014)
+++ Doc/lib/libpyexpat.tex (working copy)
@@ -137,6 +137,16 @@
\method{SetBase()} hasn't been called.
\end{methoddesc}
+\begin{methoddesc}[xmlparser]{GetSpecifiedAttributeCount}{}
+Returns the number of attributes and values passed in the most recent
+call to the \function{StartElementHandler} function that were
+specified in the start-tag rather than defaulted. Each
+attribute/value pair counts as 2. Thus if \member{ordered_attributes}
+is set, this corresponds to an index into the list passed to the
+\function{StartElementHandler}.
+\versionadded{2.6}
+\end{methoddesc}
+
\begin{methoddesc}[xmlparser]{GetInputContext}{}
Returns the input data that generated the current event as a string.
The data is in the encoding of the entity which contains the text.
Index: Doc/lib/xmldomminidom.tex
===================================================================
--- Doc/lib/xmldomminidom.tex (revision 55014)
+++ Doc/lib/xmldomminidom.tex (working copy)
@@ -258,27 +258,25 @@
\end{itemize}
-The following interfaces have no implementation in
-\refmodule{xml.dom.minidom}:
+The following DOM Level 1 interfaces were added to
+\refmodule{xml.dom.minidom} after Python 2.0:
\begin{itemize}
-\item \class{DOMTimeStamp}
+\item \class{DocumentFragment} (added in Python 2.1)
\item \class{DocumentType} (added in Python 2.1)
\item \class{DOMImplementation} (added in Python 2.1)
-\item \class{CharacterData}
+\item \class{CharacterData} (added in Python 2.3)
-\item \class{CDATASection}
+\item \class{CDATASection} (added in Python 2.3)
-\item \class{Notation}
+\item \class{Notation} (added in Python 2.3)
-\item \class{Entity}
+\item \class{Entity} (added in Python 2.3)
-\item \class{EntityReference}
-
-\item \class{DocumentFragment}
+\item \class{EntityReference} (added in Python 2.6)
\end{itemize}
Most of these reflect information in the XML document that is not of
Index: Lib/test/test_minidom.py
===================================================================
--- Lib/test/test_minidom.py (revision 55014)
+++ Lib/test/test_minidom.py (working copy)
@@ -53,29 +53,17 @@
return doc
class MinidomTest(unittest.TestCase):
- def tearDown(self):
- try:
- Node.allnodes
- except AttributeError:
- # We don't actually have the minidom from the standard library,
- # but are picking up the PyXML version from site-packages.
- pass
- else:
- self.confirm(len(Node.allnodes) == 0,
- "assertion: len(Node.allnodes) == 0")
- if len(Node.allnodes):
- print "Garbage left over:"
- if verbose:
- print Node.allnodes.items()[0:10]
- else:
- # Don't print specific nodes if repeatable results
- # are needed
- print len(Node.allnodes)
- Node.allnodes = {}
-
def confirm(self, test, testname = "Test"):
self.assertTrue(test, testname)
+ def assertSameNode(self, a, b, msg=None):
+ if msg is None:
+ msg = "Expected nodes to be the same, got %r and %r" % (a, b)
+ self.assert_(a is not None, msg)
+ self.assert_(b is not None, msg)
+ self.assert_(a.isSameNode(b), msg)
+ self.assert_(b.isSameNode(a), msg)
+
def checkWholeText(self, node, s):
t = node.wholeText
self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t)))
@@ -91,6 +79,21 @@
dom.documentElement.getElementsByTagName("LI"))
dom.unlink()
+ def testXmlVersion(self):
+ # xmlVersion also affects name-checking;
+ # see testInvalidCharacterErr()
+ doc = parseString("")
+ self.assertEquals(doc.xmlVersion, "1.0")
+ self.assert_(doc.implementation.hasFeature("XMLVersion", "1.0"))
+ self.assert_(doc.implementation.hasFeature("XMLVersion", "1.1"))
+ self.failIf(doc.implementation.hasFeature("XMLVersion", "2.0"))
+ doc.xmlVersion = "1.1"
+ self.assertRaises(xml.dom.NotSupportedErr,
+ setattr, doc, 'xmlVersion', "2.0")
+
+ doc2 = parseString(" ")
+ self.assertEquals(doc2.xmlVersion, "1.1")
+
def testInsertBefore(self):
dom = parseString("")
root = dom.documentElement
@@ -210,6 +213,125 @@
elem.appendChild(text)
dom.unlink()
+ def testAncestorLoops(self):
+ doc = parseString("")
+ a = doc.documentElement
+ b = a.firstChild
+ c = b.firstChild
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ a.appendChild, a)
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ b.appendChild, a)
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ a.insertBefore, a, b)
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ b.insertBefore, a, c)
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ a.replaceChild, a, b)
+ self.assertRaises(xml.dom.HierarchyRequestErr,
+ b.replaceChild, a, c)
+
+
+ def testWrongDocumentErr(self):
+ doc1 = parseString("")
+ doc2 = parseString("")
+ el1 = doc1.createElement("test")
+ doc2el = doc2.documentElement
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2el.insertBefore, el1, doc2el.firstChild)
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2el.replaceChild, el1, doc2el.firstChild)
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2el.appendChild, el1)
+ attr1 = doc1.createAttribute("spam")
+ attr1.value = "true"
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2el.setAttributeNode, attr1)
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2el.attributes.setNamedItem, attr1)
+
+ doc2.removeChild(doc2el)
+ self.assertRaises(xml.dom.WrongDocumentErr,
+ doc2.appendChild, el1)
+
+ def testInvalidCharacterErr(self):
+ badNames = ['', '12', 'with space', "can't",
+ "D\u0133kstra", u'\U00012345']
+ uri = 'http://www.python.org/ns/foo'
+ doc = parseString("")
+ self.assertEqual(doc.xmlVersion, "1.0")
+ for name in badNames:
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createElement, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createAttribute, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createAttributeNS, uri, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createElement, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createElementNS, uri, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createEntityReference, name)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.createProcessingInstruction, name, '')
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.implementation.createDocument, uri, name, None)
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ doc.renameNode, doc.documentElement, None, name)
+ el = doc.documentElement
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ el.setAttribute, name, 'value')
+ ## self.assertRaises(xml.dom.InvalidCharacterErr,
+ ## setattr, el, 'prefix', name)
+ attrNode = el.getAttributeNode('attr')
+ self.assertRaises(xml.dom.InvalidCharacterErr,
+ setattr, attrNode, 'prefix', name)
+
+ # expat doesn't parse XML 1.1 correctly, as of this writing,
+ # but we can test this by modifying an existing document:
+ doc.xmlVersion = "1.1"
+ root = doc.documentElement
+ xml11_names = [u'D\u0133kstra', u'\U00012345']
+ for name in xml11_names:
+ el = doc.createElement(name)
+ self.assertEquals(el.tagName, name)
+ root.attributes[name] = "ok"
+ self.assertEquals(root.getAttribute(name), 'ok')
+
+ def testInuseAttributeErr(self):
+ doc = parseString(
+ "\n"
+ "\n")
+ parent = doc.documentElement
+ attr = parent.getAttributeNode('inuse')
+ child = parent.firstChild
+ parent.attributes.setNamedItem(attr) # should be a no-op
+ self.assertEquals(parent.attributes.length, 1)
+ self.assertRaises(xml.dom.InuseAttributeErr,
+ child.attributes.setNamedItem, attr)
+ parent.setAttributeNode(attr) # no-op
+ self.assertEquals(parent.attributes.length, 1)
+ self.assertRaises(xml.dom.InuseAttributeErr,
+ child.setAttributeNode, attr)
+
+ doc = parseString(
+ "\n"
+ "\n")
+ nsuri = 'http://www.python.org/ns/tests/1.0'
+ parent = doc.documentElement
+ attr = parent.getAttributeNodeNS(nsuri, "inuse")
+ child = parent.firstChild
+ parent.attributes.setNamedItemNS(attr) # no-op
+ self.assertEquals(parent.attributes.length, 2)
+ self.assertRaises(xml.dom.InuseAttributeErr,
+ child.attributes.setNamedItemNS, attr)
+ parent.setAttributeNodeNS(attr) # no-op
+ self.assertEquals(parent.attributes.length, 2)
+ self.assertRaises(xml.dom.InuseAttributeErr,
+ child.setAttributeNodeNS, attr)
+
def testNamedNodeMapSetItem(self):
dom = Document()
elem = dom.createElement('element')
@@ -244,6 +366,30 @@
self.confirm(dom.documentElement)
dom.unlink()
+ def testNodeValueDefinedNull(self):
+ # According to the spec, for node types where nodeValue is
+ # defined to be null, setting it has no effect.
+ doc = parseString(
+ "\n"
+ "\n"
+ "\n"
+ "]>\n"
+ "\n")
+ nodes = [
+ doc,
+ doc.createDocumentFragment(),
+ doc.doctype,
+ doc.documentElement,
+ doc.doctype.entities['version'],
+ doc.createEntityReference('version'),
+ doc.doctype.notations['notation']]
+ for node in nodes:
+ node.nodeValue = "Tuesday"
+ self.assert_(
+ node.nodeValue is None,
+ "setting %r.nodeValue should have no effect" % node)
+
def testAAA(self):
dom = parseString("")
el = dom.documentElement
@@ -256,6 +402,32 @@
"setAttribute() sets ownerElement")
dom.unlink()
+ def testAttributesDefinedNull(self):
+ doc = parseString(
+ "\n"
+ "\n"
+ "\n"
+ "]>\n"
+ "\n")
+ nodes = [
+ doc.documentElement.getAttributeNode("test"),
+ doc.createTextNode("test"),
+ doc.createCDATASection("1<2"),
+ doc.createEntityReference("version"),
+ doc.doctype.entities['version'],
+ doc.createProcessingInstruction("pragma", "ignore"),
+ doc.createComment("test"),
+ doc,
+ doc.doctype,
+ doc.createDocumentFragment(),
+ doc.doctype.notations['notation']]
+ for node in nodes:
+ self.assert_(hasattr(node, 'attributes'),
+ ("Node %r has no 'attributes' property "
+ "(should be present but null)" % node))
+ self.assertEqual(node.attributes, None)
+
def testAAB(self):
dom = parseString("")
el = dom.documentElement
@@ -324,9 +496,18 @@
child.setAttribute("spam", "jam")
self.confirm(len(child.attributes) == 1)
node = child.getAttributeNode("spam")
- child.removeAttributeNode(node)
+ removed = child.removeAttributeNode(node)
+ self.assertSameNode(removed, node)
+ self.assertEqual(removed.value, "jam")
self.confirm(len(child.attributes) == 0
and child.getAttributeNode("spam") is None)
+
+ child.setAttribute("special", "spam")
+ other = dom.createElement("foo")
+ other.setAttribute("special", "spam")
+ self.assertRaises(xml.dom.NotFoundErr,
+ child.removeAttributeNode,
+ other.getAttributeNode("special"))
dom.unlink()
def testChangeAttr(self):
@@ -367,6 +548,48 @@
and el.getAttribute("spam2") == "bam2")
dom.unlink()
+ def testSetAttributeNodeNS(self):
+ dom = parseString(
+ '\n')
+ el = dom.documentElement
+ ns = 'http://www.python.org/ns'
+ origAttr = el.getAttributeNodeNS(ns, 'x')
+ self.assertEqual(origAttr.nodeName, 'a:x')
+ self.assertEqual(origAttr.value, "spam")
+ newAttr = dom.createAttributeNS(ns, 'b:x')
+ newAttr.value = 'ham'
+ oldAttr = el.setAttributeNodeNS(newAttr)
+ self.assertSameNode(oldAttr, origAttr)
+ self.assertEqual(oldAttr.parentNode, None)
+ dom.unlink()
+
+ def testAttrSpecified(self):
+ doc = parseString("")
+ el = doc.documentElement
+ self.assert_(el.attributes["brief"].specified)
+ el.setAttribute("modified", "true")
+ self.assert_(el.attributes["modified"].specified)
+ doc.unlink()
+
+ doc = parseString(
+ "\n"
+ "\n"
+ "\n"
+ "]>\n"
+ "\n")
+ test = doc.documentElement
+ self.assertEquals(test.getAttribute("required"), "true")
+ self.assert_(not test.getAttributeNode("required").specified)
+ self.assert_(test.getAttributeNode("date").specified)
+ self.assert_(test.getAttributeNode("id").specified)
+ doc.unlink()
+
def testGetAttrList(self):
pass
@@ -466,6 +689,18 @@
dom.unlink()
self.confirm(domstr == str.replace("\n", "\r\n"))
+ def testEntityReference(self):
+ doc = create_doc_with_doctype()
+ er = doc.createEntityReference("my-entity")
+ doc.documentElement.appendChild(er)
+ self.assertEquals(
+ doc.documentElement.toxml(), "&my-entity;")
+ node = doc.createTextNode("hello")
+ self.assertRaises(xml.dom.NoModificationAllowedErr,
+ er.appendChild, node)
+ self.assertRaises(xml.dom.NoModificationAllowedErr,
+ er.insertBefore, node, None)
+
def testProcessingInstruction(self):
dom = parseString('')
pi = dom.documentElement.firstChild
@@ -483,6 +718,78 @@
def testProcessingInstructionRepr(self): pass
+ def _checkCharacterDataNode(self, node):
+ self.assertEqual(node.length, len(node.data))
+ n = node.length
+ d = node.data
+
+ # substringData
+ self.assertEqual(node.substringData(0, n + 30), node.data)
+ self.assertEqual(node.substringData(0, 1), node.data[0:1])
+ self.assertEqual(node.substringData(n, 0), '')
+ self.assertEqual(node.substringData(n, 30), '')
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.substringData, -1, 0)
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.substringData, n + 1, 0)
+
+ # appendData
+ node.appendData("extra")
+ self.assertEqual(node.length, n + 5)
+ self.assertEqual(node.data, d + "extra")
+
+ # insertData
+ n = node.length
+ d = node.data
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.insertData, -1, "bogus")
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.insertData, n + 1, "bogus")
+ node.insertData(n, "more")
+ self.assertEqual(node.length, n + 4)
+ self.assertEqual(node.data, d + "more")
+ node.insertData(n, "a bit ")
+ self.assertEqual(node.length, n + 4 + 6)
+ self.assertEqual(node.data, d + "a bit more")
+ self.assertEqual(node.substringData(n + 2, 6), "bit mo")
+
+ # deleteData
+ node.deleteData(n, 1000000)
+ self.assertEqual(node.length, n)
+ self.assertEqual(node.data, d)
+ node.deleteData(n, 20) # shouldn't throw
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.deleteData, n + 1, 0)
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.deleteData, -1, 0)
+
+ # replaceData
+ node.replaceData(0, node.length, "test data")
+ self.assertEqual(node.length, 9)
+ self.assertEqual(node.data, "test data")
+ n = node.length
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.replaceData, -1, 0, "")
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.replaceData, n + 1, 0, "")
+ self.assertRaises(xml.dom.IndexSizeErr,
+ node.replaceData, 0, -1, "")
+ node.replaceData(1, 3, "ricky")
+ self.assertEqual(node.length, 11)
+ self.assertEqual(node.data, "tricky data")
+
+ def testTextMethods(self):
+ doc = parseString("This is a text node.")
+ self._checkCharacterDataNode(doc.documentElement.firstChild)
+ self._checkCharacterDataNode(doc.createTextNode("hello world"))
+ self._checkCharacterDataNode(doc.createComment("hello world"))
+ self._checkCharacterDataNode(doc.createCDATASection("hello world"))
+ doc.unlink()
+
+ doc = parseString(" node.]]>")
+ self._checkCharacterDataNode(doc.documentElement.firstChild)
+ doc.unlink()
+
def testTextRepr(self): pass
def testWriteText(self): pass
@@ -930,17 +1237,17 @@
# Simple renaming
attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "b")
- self.confirm(attr.name == "b"
- and attr.nodeName == "b"
- and attr.localName is None
- and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE
- and attr.prefix is None
- and attr.value == "v"
- and elem.getAttributeNode("a") is None
- and elem.getAttributeNode("b").isSameNode(attr)
- and attrmap["b"].isSameNode(attr)
- and attr.ownerDocument.isSameNode(doc)
- and attr.ownerElement.isSameNode(elem))
+ self.assertEqual(attr.name, "b")
+ self.assertEqual(attr.nodeName, "b")
+ self.assertEqual(attr.localName, None)
+ self.assertEqual(attr.namespaceURI, xml.dom.EMPTY_NAMESPACE)
+ self.assertEqual(attr.prefix, None)
+ self.assertEqual(attr.value, "v")
+ self.assertEqual(elem.getAttributeNode("a"), None)
+ self.assertSameNode(elem.getAttributeNode("b"), attr)
+ self.assertSameNode(attrmap["b"], attr)
+ self.assertSameNode(attr.ownerDocument, doc)
+ self.assertSameNode(attr.ownerElement, elem)
# Rename to have a namespace, no prefix
attr = doc.renameNode(attr, "http://xml.python.org/ns", "c")
@@ -960,39 +1267,38 @@
# Rename to have a namespace, with prefix
attr = doc.renameNode(attr, "http://xml.python.org/ns2", "p:d")
- self.confirm(attr.name == "p:d"
- and attr.nodeName == "p:d"
- and attr.localName == "d"
- and attr.namespaceURI == "http://xml.python.org/ns2"
- and attr.prefix == "p"
- and attr.value == "v"
- and elem.getAttributeNode("a") is None
- and elem.getAttributeNode("b") is None
- and elem.getAttributeNode("c") is None
- and elem.getAttributeNodeNS(
- "http://xml.python.org/ns", "c") is None
- and elem.getAttributeNode("p:d").isSameNode(attr)
- and elem.getAttributeNodeNS(
- "http://xml.python.org/ns2", "d").isSameNode(attr)
- and attrmap["p:d"].isSameNode(attr)
- and attrmap[("http://xml.python.org/ns2", "d")].isSameNode(attr))
+ self.assertEqual(attr.name, "p:d")
+ self.assertEqual(attr.nodeName, "p:d")
+ self.assertEqual(attr.localName, "d")
+ self.assertEqual(attr.namespaceURI, "http://xml.python.org/ns2")
+ self.assertEqual(attr.prefix, "p")
+ self.assertEqual(attr.value, "v")
+ self.assertEqual(elem.getAttributeNode("a"), None)
+ self.assertEqual(elem.getAttributeNode("b"), None)
+ self.assertEqual(elem.getAttributeNode("c"), None)
+ self.assertEqual(elem.getAttributeNodeNS(
+ "http://xml.python.org/ns", "c"), None)
+ self.assertSameNode(elem.getAttributeNode("p:d"), attr)
+ self.assertSameNode(elem.getAttributeNodeNS(
+ "http://xml.python.org/ns2", "d"), attr)
+ self.assertSameNode(attrmap["p:d"], attr)
+ self.assertSameNode(attrmap[("http://xml.python.org/ns2", "d")], attr)
# Rename back to a simple non-NS node
attr = doc.renameNode(attr, xml.dom.EMPTY_NAMESPACE, "e")
- self.confirm(attr.name == "e"
- and attr.nodeName == "e"
- and attr.localName is None
- and attr.namespaceURI == xml.dom.EMPTY_NAMESPACE
- and attr.prefix is None
- and attr.value == "v"
- and elem.getAttributeNode("a") is None
- and elem.getAttributeNode("b") is None
- and elem.getAttributeNode("c") is None
- and elem.getAttributeNode("p:d") is None
- and elem.getAttributeNodeNS(
- "http://xml.python.org/ns", "c") is None
- and elem.getAttributeNode("e").isSameNode(attr)
- and attrmap["e"].isSameNode(attr))
+ self.assertEqual(attr.name, "e")
+ self.assertEqual(attr.nodeName, "e")
+ self.assertEqual(attr.localName, None)
+ self.assertEqual(attr.namespaceURI, xml.dom.EMPTY_NAMESPACE)
+ self.assertEqual(attr.prefix, None)
+ self.assertEqual(attr.value, "v")
+ self.assertEqual(elem.getAttributeNode("a"), None)
+ self.assertEqual(elem.getAttributeNode("b"), None)
+ self.assertEqual(elem.getAttributeNode("c"), None)
+ self.assertEqual(elem.getAttributeNode("p:d"), None)
+ self.assertEqual(elem.getAttributeNodeNS("http://xml.python.org/ns", "c"), None)
+ self.assertSameNode(elem.getAttributeNode("e"), attr)
+ self.assertSameNode(attrmap["e"], attr)
self.assertRaises(xml.dom.NamespaceErr, doc.renameNode, attr,
"http://xml.python.org/ns", "xmlns")
Index: Lib/xml/dom/expatbuilder.py
===================================================================
--- Lib/xml/dom/expatbuilder.py (revision 55014)
+++ Lib/xml/dom/expatbuilder.py (working copy)
@@ -159,7 +159,6 @@
self._intern_setdefault = self._parser.intern.setdefault
self._parser.buffer_text = True
self._parser.ordered_attributes = True
- self._parser.specified_attributes = True
self.install(self._parser)
return self._parser
@@ -350,6 +349,8 @@
def first_element_handler(self, name, attributes):
if self._filter is None and not self._elem_info:
self._finish_end_element = id
+ if self.document.xmlVersion is None:
+ self.document.xmlVersion = "1.0"
self.getParser().StartElementHandler = self.start_element_handler
self.start_element_handler(name, attributes)
@@ -359,6 +360,7 @@
self.curNode = node
if attributes:
+ ispecified = self.getParser().GetSpecifiedAttributeCount()
for i in range(0, len(attributes), 2):
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
@@ -368,6 +370,7 @@
d = a.__dict__
d['value'] = d['nodeValue'] = value
d['ownerDocument'] = self.document
+ d['specified'] = i < ispecified
_set_attribute_node(node, a)
if node is not self.document.documentElement:
@@ -446,7 +449,7 @@
[None, name, None, None, default, 0, type, required])
def xml_decl_handler(self, version, encoding, standalone):
- self.document.version = version
+ self.document.xmlVersion = version
self.document.encoding = encoding
# This is still a little ugly, thanks to the pyexpat API. ;-(
if standalone >= 0:
@@ -772,6 +775,7 @@
if attributes:
_attrs = node._attrs
_attrsNS = node._attrsNS
+ ispecified = self.getParser().GetSpecifiedAttributeCount()
for i in range(0, len(attributes), 2):
aname = attributes[i]
value = attributes[i+1]
@@ -791,6 +795,7 @@
d['ownerDocument'] = self.document
d['value'] = d['nodeValue'] = value
d['ownerElement'] = node
+ d['specified'] = i < ispecified
if __debug__:
# This only adds some asserts to the original
Index: Lib/xml/dom/minidom.py
===================================================================
--- Lib/xml/dom/minidom.py (revision 55014)
+++ Lib/xml/dom/minidom.py (working copy)
@@ -15,6 +15,8 @@
"""
import xml.dom
+from xml.names import is_name as _is_name
+import sys, re
from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
from xml.dom.minicompat import *
@@ -35,9 +37,18 @@
ownerDocument = None
nextSibling = None
previousSibling = None
+ attributes = None
prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
+ def __setattr__(self, name, value):
+ d = self.__dict__
+ # Per the DOM spec, if self.nodeValue is defined to be null,
+ # setting it has no effect. Node types with a meaningful
+ # nodeValue override this.
+ if name != 'nodeValue':
+ d[name] = value
+
def __nonzero__(self):
return True
@@ -82,9 +93,20 @@
self.insertBefore(c, refChild)
### The DOM does not clearly specify what to return in this case
return newChild
+ doc = self.ownerDocument or self
+ newChildDoc = newChild.ownerDocument
+ if newChildDoc is not doc and newChildDoc is not None:
+ raise xml.dom.WrongDocumentErr()
if newChild.nodeType not in self._child_node_types:
raise xml.dom.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(newChild), repr(self)))
+ if newChild is self:
+ raise xml.dom.HierarchyRequestErr(
+ "cannot insert %r into itself" % self)
+ if _is_ancestor(newChild, self):
+ raise xml.dom.HierarchyRequestErr(
+ "new child %r is an ancestor of target node %r"
+ % (newChild, self))
if newChild.parentNode is not None:
newChild.parentNode.removeChild(newChild)
if refChild is None:
@@ -114,10 +136,20 @@
self.appendChild(c)
### The DOM does not clearly specify what to return in this case
return node
+ doc = self.ownerDocument or self
+ nodeDoc = node.ownerDocument
+ if nodeDoc is not doc and nodeDoc is not None:
+ raise xml.dom.WrongDocumentErr()
if node.nodeType not in self._child_node_types:
raise xml.dom.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(node), repr(self)))
- elif node.nodeType in _nodeTypes_with_children:
+ if node is self:
+ raise xml.dom.HierarchyRequestErr(
+ "cannot append a node to itself (%r)" % self)
+ if _is_ancestor(node, self):
+ raise xml.dom.HierarchyRequestErr(
+ "new child %r is an ancestor of target node %r" % (node, self))
+ if node.nodeType in _nodeTypes_with_children:
_clear_id_cache(self)
if node.parentNode is not None:
node.parentNode.removeChild(node)
@@ -130,11 +162,22 @@
refChild = oldChild.nextSibling
self.removeChild(oldChild)
return self.insertBefore(newChild, refChild)
+ doc = self.ownerDocument or self
+ newChildDoc = newChild.ownerDocument
+ if newChildDoc is not doc and newChildDoc is not None:
+ raise xml.dom.WrongDocumentErr()
if newChild.nodeType not in self._child_node_types:
raise xml.dom.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(newChild), repr(self)))
if newChild is oldChild:
return
+ if newChild is self:
+ raise xml.dom.HierarchyRequestErr(
+ "cannot insert %r into itself" % self)
+ if _is_ancestor(newChild, self):
+ raise xml.dom.HierarchyRequestErr(
+ "new child %r is an ancestor of target node %r"
+ % (newChild, self))
if newChild.parentNode is not None:
newChild.parentNode.removeChild(newChild)
try:
@@ -294,6 +337,14 @@
node = node.parentNode
return False
+def _is_ancestor(ancestor, child):
+ while True:
+ child = child.parentNode
+ if child is None:
+ return False
+ if ancestor is child:
+ return True
+
def _write_data(writer, data):
"Writes datachars to writer."
data = data.replace("&", "&").replace("<", "<")
@@ -321,7 +372,6 @@
nodeType = Node.DOCUMENT_FRAGMENT_NODE
nodeName = "#document-fragment"
nodeValue = None
- attributes = None
parentNode = None
_child_node_types = (Node.ELEMENT_NODE,
Node.TEXT_NODE,
@@ -337,9 +387,8 @@
class Attr(Node):
nodeType = Node.ATTRIBUTE_NODE
- attributes = None
ownerElement = None
- specified = False
+ specified = True
_is_id = False
_child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
@@ -379,10 +428,14 @@
d["name"] = d["nodeName"] = value
if self.ownerElement is not None:
_clear_id_cache(self.ownerElement)
+ elif name == 'prefix':
+ self._set_prefix(value)
else:
d[name] = value
def _set_prefix(self, prefix):
+ if self.ownerDocument is not None:
+ _check_name(prefix, self.ownerDocument.xmlVersion)
nsuri = self.namespaceURI
if prefix == "xmlns":
if nsuri and nsuri != XMLNS_NAMESPACE:
@@ -579,9 +632,17 @@
raise xml.dom.NotFoundErr()
def setNamedItem(self, node):
+ if node.ownerDocument is not self._ownerElement.ownerDocument:
+ raise xml.dom.WrongDocumentErr()
if not isinstance(node, Attr):
raise xml.dom.HierarchyRequestErr(
"%s cannot be child of %s" % (repr(node), repr(self)))
+ if node.ownerElement is self._ownerElement:
+ return
+ if node.ownerElement is not None:
+ raise xml.dom.InuseAttributeErr(
+ "Cannot add %r to %r while it is still used in %r"
+ % (node, self, node.ownerElement))
old = self._attrs.get(node.name)
if old:
old.unlink()
@@ -687,6 +748,7 @@
return ""
def setAttribute(self, attname, value):
+ _check_name(attname, self.ownerDocument.xmlVersion)
attr = self.getAttributeNode(attname)
if attr is None:
attr = Attr(attname)
@@ -702,6 +764,7 @@
_clear_id_cache(self)
def setAttributeNS(self, namespaceURI, qualifiedName, value):
+ _check_name(qualifiedName, self.ownerDocument.xmlVersion)
prefix, localname = _nssplit(qualifiedName)
attr = self.getAttributeNodeNS(namespaceURI, localname)
if attr is None:
@@ -730,6 +793,8 @@
return self._attrsNS.get((namespaceURI, localName))
def setAttributeNode(self, attr):
+ if attr.ownerDocument is not self.ownerDocument:
+ raise xml.dom.WrongDocumentErr()
if attr.ownerElement not in (None, self):
raise xml.dom.InuseAttributeErr("attribute node already owned")
old1 = self._attrs.get(attr.name, None)
@@ -740,7 +805,11 @@
self.removeAttributeNode(old2)
_set_attribute_node(self, attr)
- if old1 is not attr:
+ ### The DOM does not clearly specify what to return when attr
+ ### was already in this element. (The W3C DOM Test Suite
+ ### expects this to return attr in that case, but the
+ ### recommendation itself doesn't say.)
+ if old1 is not None and old1 is not attr:
# It might have already been part of this node, in which case
# it doesn't represent a change, and should not be returned.
return old1
@@ -767,14 +836,17 @@
if node is None:
raise xml.dom.NotFoundErr()
try:
- self._attrs[node.name]
+ target = self._attrs[node.name]
except KeyError:
raise xml.dom.NotFoundErr()
+ if node is not target:
+ raise xml.dom.NotFoundErr()
_clear_id_cache(self)
node.unlink()
# Restore this since the node is still useful and otherwise
# unlinked
node.ownerDocument = self.ownerDocument
+ return node
removeAttributeNodeNS = removeAttributeNode
@@ -868,7 +940,6 @@
the complexity of the Node methods that deal with children.
"""
- attributes = None
childNodes = EmptyNodeList()
firstChild = None
lastChild = None
@@ -912,6 +983,9 @@
d = self.__dict__
d['data'] = d['nodeValue'] = value
+ _get_nodeValue = _get_data
+ _set_nodeValue = _set_data
+
def _get_target(self):
return self.target
def _set_target(self, value):
@@ -962,7 +1036,7 @@
def substringData(self, offset, count):
if offset < 0:
raise xml.dom.IndexSizeErr("offset cannot be negative")
- if offset >= len(self.data):
+ if offset > len(self.data):
raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
if count < 0:
raise xml.dom.IndexSizeErr("count cannot be negative")
@@ -974,7 +1048,7 @@
def insertData(self, offset, arg):
if offset < 0:
raise xml.dom.IndexSizeErr("offset cannot be negative")
- if offset >= len(self.data):
+ if offset > len(self.data):
raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
if arg:
self.data = "%s%s%s" % (
@@ -983,7 +1057,7 @@
def deleteData(self, offset, count):
if offset < 0:
raise xml.dom.IndexSizeErr("offset cannot be negative")
- if offset >= len(self.data):
+ if offset > len(self.data):
raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
if count < 0:
raise xml.dom.IndexSizeErr("count cannot be negative")
@@ -1012,7 +1086,6 @@
nodeType = Node.TEXT_NODE
nodeName = "#text"
- attributes = None
def splitText(self, offset):
if offset < 0 or offset > len(self.data):
@@ -1287,7 +1360,6 @@
writer.write(">"+newl)
class Entity(Identified, Node):
- attributes = None
nodeType = Node.ENTITY_NODE
nodeValue = None
@@ -1326,6 +1398,40 @@
raise xml.dom.HierarchyRequestErr(
"cannot replace children of an entity node")
+class EntityReference(Node):
+ nodeType = Node.ENTITY_REFERENCE_NODE
+ nodeValue = None
+
+ _child_node_types = (Node.ELEMENT_NODE,
+ Node.TEXT_NODE,
+ Node.CDATA_SECTION_NODE,
+ Node.ENTITY_REFERENCE_NODE,
+ Node.PROCESSING_INSTRUCTION_NODE,
+ Node.COMMENT_NODE)
+
+ def __init__(self, name):
+ self.nodeName = name
+ self.childNodes = NodeList()
+
+ def appendChild(self, newChild):
+ raise xml.dom.NoModificationAllowedErr(
+ "EntityReference nodes are readonly")
+
+ def insertBefore(self, newChild, refChild):
+ raise xml.dom.NoModificationAllowedErr(
+ "EntityReference nodes are readonly")
+
+ def removeChild(self, oldChild):
+ raise xml.dom.NoModificationAllowedErr(
+ "EntityReference nodes are readonly")
+
+ def replaceChild(self, newChild, oldChild):
+ raise xml.dom.NoModificationAllowedErr(
+ "EntityReference nodes are readonly")
+
+ def writexml(self, writer, indent="", addindent="", newl=""):
+ writer.write("&" + self.nodeName + ";")
+
class Notation(Identified, Childless, Node):
nodeType = Node.NOTATION_NODE
nodeValue = None
@@ -1344,6 +1450,8 @@
("xml", "2.0"),
("xml", "3.0"),
("xml", None),
+ ("xmlversion", "1.0"),
+ ("xmlversion", "1.1"),
("ls-load", "3.0"),
("ls-load", None),
]
@@ -1357,6 +1465,9 @@
if doctype and doctype.parentNode is not None:
raise xml.dom.WrongDocumentErr(
"doctype object owned by another DOM tree")
+ if qualifiedName is not None:
+ # Note: DOM Level 3 specifies XML 1.0 here.
+ _check_name(qualifiedName, "1.0")
doc = self._create_document()
add_root_element = not (namespaceURI is None
@@ -1368,10 +1479,8 @@
# would be the other obvious candidate. Since Xerces raises
# InvalidCharacterErr, and since SyntaxErr is not listed
# for createDocument, that seems to be the better choice.
- # XXX: need to check for illegal characters here and in
- # createElement.
- # DOM Level III clears this up when talking about the return value
+ # DOM Level 3 clears this up when talking about the return value
# of this function. If namespaceURI, qName and DocType are
# Null the document is returned without a document element
# Otherwise if doctype or namespaceURI are not None
@@ -1399,6 +1508,9 @@
return doc
def createDocumentType(self, qualifiedName, publicId, systemId):
+ if qualifiedName is not None:
+ # DOM Level 3 specifies XML 1.0 here
+ _check_name(qualifiedName, "1.0")
doctype = DocumentType(qualifiedName)
doctype.publicId = publicId
doctype.systemId = systemId
@@ -1473,7 +1585,6 @@
nodeType = Node.DOCUMENT_NODE
nodeName = "#document"
nodeValue = None
- attributes = None
doctype = None
parentNode = None
previousSibling = nextSibling = None
@@ -1493,6 +1604,7 @@
_magic_id_count = 0
def __init__(self):
+ self.xmlVersion = "1.0"
self.childNodes = NodeList()
# mapping of (namespaceURI, localName) -> ElementInfo
# and tagName -> ElementInfo
@@ -1500,6 +1612,12 @@
self._id_cache = {}
self._id_search_stack = None
+ def __setattr__(self, name, value):
+ if name == 'xmlVersion':
+ if not self.implementation.hasFeature("XMLVersion", value):
+ raise xml.dom.NotSupportedErr("XMLVersion " + value)
+ return Node.__setattr__(self, name, value)
+
def _get_elem_info(self, element):
if element.namespaceURI:
key = element.namespaceURI, element.localName
@@ -1597,6 +1715,7 @@
return d
def createElement(self, tagName):
+ _check_name(tagName, self.xmlVersion)
e = Element(tagName)
e.ownerDocument = self
return e
@@ -1623,23 +1742,27 @@
return c
def createProcessingInstruction(self, target, data):
+ _check_name(target, self.xmlVersion)
p = ProcessingInstruction(target, data)
p.ownerDocument = self
return p
def createAttribute(self, qName):
+ _check_name(qName, self.xmlVersion)
a = Attr(qName)
a.ownerDocument = self
a.value = ""
return a
def createElementNS(self, namespaceURI, qualifiedName):
+ _check_name(qualifiedName, self.xmlVersion)
prefix, localName = _nssplit(qualifiedName)
e = Element(qualifiedName, namespaceURI, prefix)
e.ownerDocument = self
return e
def createAttributeNS(self, namespaceURI, qualifiedName):
+ _check_name(qualifiedName, self.xmlVersion)
prefix, localName = _nssplit(qualifiedName)
a = Attr(qualifiedName, namespaceURI, localName, prefix)
a.ownerDocument = self
@@ -1659,6 +1782,12 @@
n.ownerDocument = self
return n
+ def createEntityReference(self, name):
+ _check_name(name, self.xmlVersion)
+ ref = EntityReference(name)
+ ref.ownerDocument = self
+ return ref
+
def getElementById(self, id):
if self._id_cache.has_key(id):
return self._id_cache[id]
@@ -1753,6 +1882,7 @@
if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
raise xml.dom.NotSupportedErr(
"renameNode() only applies to element and attribute nodes")
+ _check_name(name, self.xmlVersion)
if namespaceURI != EMPTY_NAMESPACE:
if ':' in name:
prefix, localName = name.split(':', 1)
@@ -1893,6 +2023,9 @@
else:
return (None, fields[0])
+def _check_name(name, xmlVersion):
+ if not _is_name(name, xmlVersion):
+ raise xml.dom.InvalidCharacterErr(name)
def _get_StringIO():
# we can't use cStringIO since it doesn't support Unicode strings
Index: Lib/xml/names.py
===================================================================
--- Lib/xml/names.py (revision 0)
+++ Lib/xml/names.py (revision 0)
@@ -0,0 +1,153 @@
+"""Function for checking XML names.
+
+This module exposes a single function, is_name().
+"""
+
+import re
+import sys
+
+__all__ = ['is_name']
+
+
+# --- XML 1.0 Names
+
+_xml10_letter = (
+ ur'A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF'
+ ur'\u0100-\u0131\u0134-\u013E\u0141-\u0148\u014A-\u017E\u0180-\u01C3'
+ ur'\u01CD-\u01F0\u01F4-\u01F5\u01FA-\u0217\u0250-\u02A8\u02BB-\u02C1'
+ ur'\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D6'
+ ur'\u03DA\u03DC\u03DE\u03E0\u03E2-\u03F3\u0401-\u040C\u040E-\u044F'
+ ur'\u0451-\u045C\u045E-\u0481\u0490-\u04C4\u04C7-\u04C8\u04CB-\u04CC'
+ ur'\u04D0-\u04EB\u04EE-\u04F5\u04F8-\u04F9\u0531-\u0556\u0559'
+ ur'\u0561-\u0586\u05D0-\u05EA\u05F0-\u05F2\u0621-\u063A\u0641-\u064A'
+ ur'\u0671-\u06B7\u06BA-\u06BE\u06C0-\u06CE\u06D0-\u06D3\u06D5'
+ ur'\u06E5-\u06E6\u0905-\u0939\u093D\u0958-\u0961\u0985-\u098C'
+ ur'\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9'
+ ur'\u09DC-\u09DD\u09DF-\u09E1\u09F0-\u09F1\u0A05-\u0A0A\u0A0F-\u0A10'
+ ur'\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39'
+ ur'\u0A59-\u0A5C\u0A5E\u0A72-\u0A74\u0A85-\u0A8B\u0A8D\u0A8F-\u0A91'
+ ur'\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABD\u0AE0'
+ ur'\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33'
+ ur'\u0B36-\u0B39\u0B3D\u0B5C-\u0B5D\u0B5F-\u0B61\u0B85-\u0B8A'
+ ur'\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F'
+ ur'\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0C05-\u0C0C'
+ ur'\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C60-\u0C61'
+ ur'\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9'
+ ur'\u0CDE\u0CE0-\u0CE1'
+ ur'\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D60-\u0D61'
+ ur'\u0E01-\u0E2E\u0E30\u0E32-\u0E33\u0E40-\u0E45\u0E81-\u0E82\u0E84'
+ ur'\u0E87-\u0E88\u0E8A\u0E8D\u0E94-\u0E97\u0E99-\u0E9F\u0EA1-\u0EA3'
+ ur'\u0EA5\u0EA7\u0EAA-\u0EAB\u0EAD-\u0EAE\u0EB0\u0EB2-\u0EB3\u0EBD'
+ ur'\u0EC0-\u0EC4'
+ ur'\u0F40-\u0F47\u0F49-\u0F69'
+ ur'\u10A0-\u10C5\u10D0-\u10F6'
+ ur'\u1100\u1102-\u1103\u1105-\u1107\u1109\u110B-\u110C\u110E-\u1112'
+ ur'\u113C\u113E\u1140\u114C\u114E\u1150\u1154-\u1155\u1159'
+ ur'\u115F-\u1161\u1163\u1165\u1167\u1169\u116D-\u116E\u1172-\u1173'
+ ur'\u1175\u119E\u11A8\u11AB\u11AE-\u11AF\u11B7-\u11B8\u11BA'
+ ur'\u11BC-\u11C2\u11EB\u11F0\u11F9'
+ ur'\u1E00-\u1E9B\u1EA0-\u1EF9'
+ ur'\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57'
+ ur'\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE'
+ ur'\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC'
+ ur'\u1FF2-\u1FF4\u1FF6-\u1FFC'
+ ur'\u2126\u212A-\u212B\u212E\u2180-\u2182'
+ ur'\u3007'
+ ur'\u3021-\u3029'
+ ur'\u3041-\u3094\u30A1-\u30FA\u3105-\u312C'
+ ur'\u4E00-\u9FA5'
+ ur'\uAC00-\uD7A3')
+
+# [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+# | CombiningChar | Extender
+_xml10_name_char = (
+ _xml10_letter + ur'\.\-_:' +
+ # Digit
+ ur'\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u0966-\u096F\u09E6-\u09EF'
+ ur'\u0A66-\u0A6F\u0AE6-\u0AEF\u0B66-\u0B6F\u0BE7-\u0BEF\u0C66-\u0C6F'
+ ur'\u0CE6-\u0CEF\u0D66-\u0D6F\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29'
+ # CombiningChar
+ ur'\u0300-\u0345\u0360-\u0361\u0483-\u0486'
+ ur'\u0591-\u05A1\u05A3-\u05B9\u05BB-\u05BD\u05BF\u05C1-\u05C2\u05C4'
+ ur'\u064B-\u0652\u0670\u06D6-\u06DC\u06DD-\u06DF\u06E0-\u06E4'
+ ur'\u06E7-\u06E8\u06EA-\u06ED'
+ ur'\u0901-\u0903\u093C\u093E-\u094C\u094D\u0951-\u0954\u0962-\u0963'
+ ur'\u0981-\u0983\u09BC\u09BE\u09BF\u09C0-\u09C4\u09C7-\u09C8'
+ ur'\u09CB-\u09CD\u09D7\u09E2-\u09E3'
+ ur'\u0A02\u0A3C\u0A3E\u0A3F\u0A40-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D'
+ ur'\u0A70-\u0A71\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9'
+ ur'\u0ACB-\u0ACD'
+ ur'\u0B01-\u0B03\u0B3C\u0B3E-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D'
+ ur'\u0B56-\u0B57\u0B82-\u0B83\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD'
+ ur'\u0BD7'
+ ur'\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56'
+ ur'\u0C82-\u0C83\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6'
+ ur'\u0D02-\u0D03\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57'
+ ur'\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB-\u0EBC'
+ ur'\u0EC8-\u0ECD'
+ ur'\u0F18-\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84'
+ ur'\u0F86-\u0F8B\u0F90-\u0F95\u0F97\u0F99-\u0FAD\u0FB1-\u0FB7\u0FB9'
+ ur'\u20D0-\u20DC\u20E1\u302A-\u302F\u3099\u309A'
+ # Extender
+ ur'\u00B7\u02D0\u02D1\u0387\u0640\u0E46\u0EC6\u3005\u3031-\u3035'
+ ur'\u309D-\u309E\u30FC-\u30FE'
+ )
+
+# [5] Name ::= (Letter | '_' | ':') (NameChar)*
+_xml10_name_re = re.compile(
+ ur'^[' + _xml10_letter + ur'_:]'
+ ur'[' + _xml10_name_char + ur']*$')
+
+
+# --- XML 1.1 Names
+
+# Matches the NameStartChar production except for characters outside
+# the BMP, which are handled by _xml11_wrapper (below).
+#
+#[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
+# | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D]
+# | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F]
+# | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF]
+# | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+_xml11_name_start_char = (
+ ur':A-Z_a-z\xC0-\xD6\xD8-\xF6'
+ ur'\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F'
+ ur'\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD')
+
+# Matches the XML 1.1 NameChar production except for characters outside
+# the BMP.
+#
+# [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7
+# | [#x0300-#x036F] | [#x203F-#x2040]
+_xml11_name_char = (
+ _xml11_name_start_char + ur'\-\.0-9\xB7\u0300-\u036F\u203F\u2040')
+
+# Wrap a character range to add XML 1.1 name characters that live
+# outside the BMP.
+if sys.maxunicode == 65535:
+ _xml11_wrapper = ur'(?:[%s]|[\uD800-\uDB7F][\uDC00-\uDFFF])'
+else:
+ _xml11_wrapper = u'[%s\\U00010000-\\U000EFFFF]'
+
+# [5] Name ::= NameStartChar (NameChar)*
+_xml11_name_re = re.compile(
+ ur'^'
+ + (_xml11_wrapper % _xml11_name_start_char)
+ + (_xml11_wrapper % _xml11_name_char) + ur'*$')
+
+
+def is_name(name, xmlVersion='1.0'):
+ """Determine whether name may be used as an XML Name.
+
+ Return True if name matches the Name grammar production in the
+ specified version of XML. Raises ValueError if xmlVersion is not
+ a known version of XML (currently "1.0" or "1.1").
+
+ """
+ if xmlVersion == '1.0':
+ name_re = _xml10_name_re
+ elif xmlVersion == '1.1':
+ name_re = _xml11_name_re
+ else:
+ raise ValueError("Unsupported XML version: " + xmlVersion)
+ return name_re.match(name) is not None
Index: Modules/pyexpat.c
===================================================================
--- Modules/pyexpat.c (revision 55014)
+++ Modules/pyexpat.c (working copy)
@@ -1079,6 +1079,17 @@
return Py_BuildValue("z", XML_GetBase(self->itself));
}
+PyDoc_STRVAR(xmlparse_GetSpecifiedAttributeCount__doc__,
+"GetSpecifiedAttributeCount() -> int\n\
+Return the number of attributes specified (not defaulted)\n\
+in the most recent StartElementHandler call.");
+
+static PyObject *
+xmlparse_GetSpecifiedAttributeCount(xmlparseobject *self, PyObject *unused)
+{
+ return PyInt_FromLong(XML_GetSpecifiedAttributeCount(self->itself));
+}
+
PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
"GetInputContext() -> string\n\
Return the untranslated text of the input that caused the current event.\n\
@@ -1251,6 +1262,8 @@
METH_VARARGS, xmlparse_SetBase__doc__},
{"GetBase", (PyCFunction)xmlparse_GetBase,
METH_NOARGS, xmlparse_GetBase__doc__},
+ {"GetSpecifiedAttributeCount", (PyCFunction)xmlparse_GetSpecifiedAttributeCount,
+ METH_NOARGS, xmlparse_GetSpecifiedAttributeCount__doc__},
{"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
{"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,