diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -109,3 +109,4 @@
88a0792e8ba3e4916b24c7e7a522c277d326d66e v3.3.0rc2
c191d21cefafb3832c45570e84854e309aa62eaa v3.3.0rc3
bd8afb90ebf28ba4edc901d4a235f75e7bbc79fd v3.3.0
+afc380863975b201e162f94a101f15e37ca233a6 xmlbomb-2.7-merge1
diff --git a/Include/pyexpat.h b/Include/pyexpat.h
--- a/Include/pyexpat.h
+++ b/Include/pyexpat.h
@@ -3,7 +3,11 @@
/* note: you must import expat.h before importing this module! */
+#ifdef XML_BOMB_PROTECTION
+#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
+#else
#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.0"
+#endif
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"
struct PyExpat_CAPI
@@ -45,6 +49,15 @@
void (*SetUserData)(XML_Parser parser, void *userData);
void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
XML_StartDoctypeDeclHandler start);
+#ifdef XML_BOMB_PROTECTION
+ /* CAPI 1.1 bomb protection additions */
+ int (*GetFeature)(XML_Parser parser, enum XML_FeatureEnum feature,
+ long *value);
+ int (*SetFeature)(XML_Parser parser, enum XML_FeatureEnum feature,
+ long value);
+ int (*GetFeatureDefault)(enum XML_FeatureEnum feature, long *value);
+ int (*SetFeatureDefault)(enum XML_FeatureEnum feature, long value);
+#endif
/* always add new stuff to the end! */
};
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -7,7 +7,9 @@
from xml.parsers import expat
from xml.parsers.expat import errors
-from test.support import sortdict, run_unittest
+from test.support import sortdict, run_unittest, findfile
+
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
class SetAttributeTest(unittest.TestCase):
@@ -95,11 +97,11 @@
def NotationDeclHandler(self, *args):
name, base, sysid, pubid = args
- self.out.append('Notation declared: %s' %(args,))
+ self.out.append('Notation declared: %s' % (args,))
def UnparsedEntityDeclHandler(self, *args):
entityName, base, systemId, publicId, notationName = args
- self.out.append('Unparsed entity decl: %s' %(args,))
+ self.out.append('Unparsed entity decl: %s' % (args,))
def NotStandaloneHandler(self):
self.out.append('Not standalone')
@@ -107,7 +109,7 @@
def ExternalEntityRefHandler(self, *args):
context, base, sysId, pubId = args
- self.out.append('External entity ref: %s' %(args[1:],))
+ self.out.append('External entity ref: %s' % (args[1:],))
return 1
def StartDoctypeDeclHandler(self, *args):
@@ -436,7 +438,7 @@
'too many parser events')
expected = self.expected_list[self.upto]
self.assertEqual(pos, expected,
- 'Expected position %s, got position %s' %(pos, expected))
+ 'Expected position %s, got position %s' % (pos, expected))
self.upto += 1
def test(self):
@@ -491,8 +493,8 @@
self.assertRaises(ValueError, f, 0)
def test_unchanged_size(self):
- xml1 = b"" + b'a' * 512
- xml2 = b'a'*512 + b''
+ xml1 = ("%s" % ('a' * 512))
+ xml2 = 'a' * 512 + ''
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
@@ -582,7 +584,7 @@
parser.buffer_size = 2048
self.assertEqual(parser.buffer_size, 2048)
- self.n=0
+ self.n = 0
parser.Parse(xml1, 0)
parser.buffer_size = parser.buffer_size // 2
self.assertEqual(parser.buffer_size, 1024)
@@ -677,6 +679,97 @@
b"")
self.assertEqual(handler_call_args, [("bar", "baz")])
+quadratic_bomb = b"""\
+
+
+
+]>
+&a;
+"""
+
+class XmlBombTest(unittest.TestCase):
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_exponential(self):
+ # test that the maximum indirection limitation prevents exponential
+ # entity expansion attacks (billion laughs). Every expansion increases
+ # the indirection level. The result of an expansion is never cached.
+ p = expat.ParserCreate()
+ self.assertEqual(p.max_entity_indirections, 40)
+ p.max_entity_indirections = 71
+ with self.assertRaises(expat.ExpatError) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+ self.assertEqual(str(e.exception), "entity indirection limit exceeded: line 7, column 6")
+
+ p = expat.ParserCreate()
+ p.max_entity_indirections = 0
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+
+ p = expat.ParserCreate()
+ p.max_entity_indirections = 72 # 8 * 8 + 8
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_quadratic(self):
+ # test that the total amount of expanded entities chars is limited to
+ # prevent quadratic blowout attacks.
+ p = expat.ParserCreate()
+ self.assertEqual(p.max_entity_expansions, 8 * 1024 ** 2)
+
+ # lower limit to 1024, must fail with one entity of 1025 chars
+ p.max_entity_expansions = 1024
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 1025)
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ # but passes with an entity of 1024 chars
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 1024)
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1024
+ p.Parse(xml)
+
+ # one level of indirection, b = "&a;&a;" adds 6 chars
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 512)
+ xml = xml.replace(b"&a;", b"&b;")
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1024
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1030 # 2 * x512 + 6
+ p.Parse(xml)
+
+ # test default limit of 8 MB
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 2 * 1024 ** 2)
+ xml = xml.replace(b"&a;", b"&c;")
+ p = expat.ParserCreate()
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ # disabled limit
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 0
+ p.Parse(xml)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_resetdtd(self):
+ # with reset_dtd all DTD information are ignored
+ p = expat.ParserCreate()
+ self.assertEqual(p.reset_dtd, False)
+ p.reset_dtd = True
+ with self.assertRaises(expat.ExpatError) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+ self.assertEqual(str(e.exception), "undefined entity: line 7, column 6")
+
def test_main():
run_unittest(SetAttributeTest,
@@ -690,7 +783,8 @@
ChardataBufferTest,
MalformedInputTest,
ErrorMessageTest,
- ForeignDTDTests)
+ ForeignDTDTests,
+ XmlBombTest)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py
--- a/Lib/test/test_sax.py
+++ b/Lib/test/test_sax.py
@@ -11,8 +11,11 @@
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import (feature_namespaces,
+ feature_max_entity_indirections, feature_max_entity_expansions,
+ feature_ignore_dtd, feature_external_ges)
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
+from xml.parsers import expat
from io import BytesIO, StringIO
import os.path
import shutil
@@ -22,6 +25,7 @@
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
try:
TEST_XMLFILE.encode("utf-8")
TEST_XMLFILE_OUT.encode("utf-8")
@@ -588,7 +592,7 @@
def __init__(self):
self._notations = []
- self._entities = []
+ self._entities = []
def notationDecl(self, name, publicId, systemId):
self._notations.append((name, publicId, systemId))
@@ -623,6 +627,7 @@
def test_expat_entityresolver(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
@@ -925,6 +930,91 @@
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
+# ===========================================================================
+#
+# XML bomb DoS tests
+#
+# ===========================================================================
+
+class XmlBombTest(unittest.TestCase):
+
+ def test_protection_features(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_max_entity_indirections),
+ expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS)
+ self.assertEqual(parser.getFeature(feature_max_entity_expansions),
+ expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS)
+ self.assertFalse(parser.getFeature(feature_ignore_dtd))
+ parser.setFeature(feature_ignore_dtd, True)
+ self.assertTrue(parser.getFeature(feature_ignore_dtd))
+ if expat.XML_BOMB_PROTECTION:
+ parser.setFeature(feature_max_entity_indirections, 100)
+ self.assertEqual(parser.getFeature(feature_max_entity_indirections),
+ 100)
+ parser.setFeature(feature_max_entity_expansions, 1024)
+ self.assertEqual(parser.getFeature(feature_max_entity_expansions),
+ 1024)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_exponential(self):
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with self.assertRaises(SAXParseException) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+ self.assertEqual(str(e.exception),
+ ":7:6: entity indirection limit exceeded")
+
+ parser = create_parser()
+ parser.setFeature(feature_max_entity_indirections, 72)
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_quadratic(self):
+ parser = create_parser()
+ result = StringIO()
+ parser.setContentHandler(XMLGenerator(result))
+ parser.setFeature(feature_max_entity_expansions, 1024)
+
+ parser.feed('\n' % ('a' * 1025))
+ parser.feed(']>\n')
+ with self.assertRaises(SAXParseException) as e:
+ parser.feed('&a;')
+ self.assertEqual(str(e.exception),
+ ":4:5: document's entity expansion limit "
+ "exceeded")
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_resetdtd(self):
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+
+ parser.setFeature(feature_ignore_dtd, True)
+ with self.assertRaises(SAXParseException) as e:
+ with open(XMLBOMB_XMLFILE) as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+ self.assertEqual(str(e.exception),
+ ":7:6: undefined entity")
+
+
def test_main():
run_unittest(MakeParserTest,
SaxutilsTest,
@@ -933,7 +1023,8 @@
WriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
- XmlReaderTest)
+ XmlReaderTest,
+ XmlBombTest)
if __name__ == "__main__":
test_main()
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -26,6 +26,12 @@
from test import support
from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect
+try:
+ import pyexpat
+ XML_BOMB_PROTECTION = pyexpat.XML_BOMB_PROTECTION
+except (ImportError, AttributeError):
+ XML_BOMB_PROTECTION = False
+
# pyET is the pure-Python implementation.
#
# ET is pyET in test_xml_etree and is the C accelerated version in
@@ -39,6 +45,7 @@
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
SAMPLE_XML = """\
@@ -87,6 +94,14 @@
"""
+QUADRATIC_BOMB_TPL = """\
+
+
+]>
+&a;
+"""
+
def sanity():
"""
Import sanity.
@@ -1747,7 +1762,7 @@
expected = '<%s>' % elem
serialized = serialize(ET.XML('<%s />' % elem), method='html')
self.assertEqual(serialized, expected)
- serialized = serialize(ET.XML('<%s>%s>' % (elem,elem)),
+ serialized = serialize(ET.XML('<%s>%s>' % (elem, elem)),
method='html')
self.assertEqual(serialized, expected)
@@ -2490,6 +2505,57 @@
# --------------------------------------------------------------------
+class XmlBombTest(unittest.TestCase):
+
+ @unittest.skipUnless(XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb(self):
+ limit = 8 * 1024 ** 2 + 1
+
+ # test entity indirections
+ with self.assertRaisesRegex(ET.ParseError,
+ 'entity indirection limit exceeded: '
+ 'line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE)
+
+ parser = ET.XMLParser(max_entity_indirections=71)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'entity indirection limit exceeded: '
+ 'line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ parser = ET.XMLParser(max_entity_indirections=72)
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ parser = ET.XMLParser(max_entity_indirections=0)
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ # test ignore_dtd
+ parser = ET.XMLParser(ignore_dtd=True)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'undefined entity: line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ xml = QUADRATIC_BOMB_TPL.replace("MARK", "a" * limit)
+ with self.assertRaisesRegex(ET.ParseError,
+ "document's entity expansion limit "
+ "exceeded: line 5, column 6"):
+ ET.fromstring(xml)
+
+ parser = ET.XMLParser(max_entity_expansions=0)
+ e = ET.fromstring(xml, parser=parser)
+ self.assertEqual(e.text, "a" * limit)
+
+ parser = ET.XMLParser(max_entity_expansions=limit + 1)
+ e = ET.fromstring(xml, parser=parser)
+ self.assertEqual(e.text, "a" * limit)
+
+ parser = ET.XMLParser(ignore_dtd=True)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'undefined entity: line 5, column 6'):
+ ET.fromstring(xml, parser=parser)
+
+# --------------------------------------------------------------------
+
class CleanContext(object):
"""Provide default namespace mapping and path cache."""
@@ -2553,6 +2619,7 @@
ElementFindTest,
ElementIterTest,
TreeBuilderTest,
+ XmlBombTest,
]
# These tests will only run for the pure-Python version that doesn't import
diff --git a/Lib/test/xmltestdata/xmlbomb.xml b/Lib/test/xmltestdata/xmlbomb.xml
new file mode 100644
--- /dev/null
+++ b/Lib/test/xmltestdata/xmlbomb.xml
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+&c;
diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py
--- a/Lib/xml/dom/expatbuilder.py
+++ b/Lib/xml/dom/expatbuilder.py
@@ -172,13 +172,18 @@
def install(self, parser):
"""Install the callbacks needed to build the DOM into the parser."""
# This creates circular references!
- parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
+ if not self._options.ignore_dtd:
+ parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
+ parser.NotationDeclHandler = self.notation_decl_handler
parser.StartElementHandler = self.first_element_handler
parser.EndElementHandler = self.end_element_handler
parser.ProcessingInstructionHandler = self.pi_handler
if self._options.entities:
+ # or ignore_dtd
parser.EntityDeclHandler = self.entity_decl_handler
- parser.NotationDeclHandler = self.notation_decl_handler
+ if self._options.external_general_entities:
+ # or ignore_dtd
+ parser.ExternalEntityRefHandler = self.external_entity_ref_handler
if self._options.comments:
parser.CommentHandler = self.comment_handler
if self._options.cdata_sections:
@@ -187,10 +192,14 @@
parser.CharacterDataHandler = self.character_data_handler_cdata
else:
parser.CharacterDataHandler = self.character_data_handler
- parser.ExternalEntityRefHandler = self.external_entity_ref_handler
parser.XmlDeclHandler = self.xml_decl_handler
parser.ElementDeclHandler = self.element_decl_handler
parser.AttlistDeclHandler = self.attlist_decl_handler
+ if expat.XML_BOMB_PROTECTION:
+ options = self._options
+ parser.reset_dtd = options.ignore_dtd
+ parser.max_entity_indirections = options.max_entity_indirections
+ parser.max_entity_expansions = options.max_entity_expansions
def parseFile(self, file):
"""Parse a document from a file object, returning the document
@@ -199,7 +208,7 @@
first_buffer = True
try:
while 1:
- buffer = file.read(16*1024)
+ buffer = file.read(16 * 1024)
if not buffer:
break
parser.Parse(buffer, 0)
@@ -272,7 +281,7 @@
def character_data_handler_cdata(self, data):
childNodes = self.curNode.childNodes
if self._cdata:
- if ( self._cdata_continue
+ if (self._cdata_continue
and childNodes[-1].nodeType == CDATA_SECTION_NODE):
childNodes[-1].appendData(data)
return
@@ -356,7 +365,7 @@
for i in range(0, len(attributes), 2):
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
- value = attributes[i+1]
+ value = attributes[i + 1]
a.value = value
a.ownerDocument = self.document
_set_attribute_node(node, a)
@@ -763,7 +772,7 @@
_attrsNS = node._attrsNS
for i in range(0, len(attributes), 2):
aname = attributes[i]
- value = attributes[i+1]
+ value = attributes[i + 1]
if ' ' in aname:
uri, localname, prefix, qname = _parse_ns_name(self, aname)
a = minidom.Attr(qname, uri, localname, prefix)
diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py
--- a/Lib/xml/dom/xmlbuilder.py
+++ b/Lib/xml/dom/xmlbuilder.py
@@ -2,6 +2,7 @@
import copy
import xml.dom
+from xml.parsers import expat
from xml.dom.NodeFilter import NodeFilter
@@ -22,9 +23,9 @@
namespaces = 1
namespace_declarations = True
validation = False
- external_parameter_entities = True
- external_general_entities = True
- external_dtd_subset = True
+ external_parameter_entities = False # changed
+ external_general_entities = False # changed
+ external_dtd_subset = False # changed
validate_if_schema = False
validate = False
datatype_normalization = False
@@ -40,6 +41,10 @@
errorHandler = None
filter = None
+ max_entity_indirections = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+ max_entity_expansions = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+ ignore_dtd = False
+
class DOMBuilder:
entityResolver = None
@@ -157,6 +162,18 @@
("namespaces", 0)],
("namespaces", 1): [
("namespaces", 1)],
+ ("max_entity_indirections", 0): [
+ ("max_entity_indirections", 0)],
+ ("max_entity_expansions", 0): [
+ ("max_entity_expansions", 0)],
+ ("ignore_dtd", 0): [
+ ("ignore_dtd", 0),
+ ("external_general_entities", 0),
+ ("external_parameter_entities", 0),
+ ("create_entity_ref_nodes", 0),
+ ("entities", 0)],
+ ("ignore_dtd", 1): [
+ ("ignore_dtd", 1)],
}
def getFeature(self, name):
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -1498,7 +1498,7 @@
def end(self, tag):
self._flush()
self._last = self._elem.pop()
- assert self._last.tag == tag,\
+ assert self._last.tag == tag, \
"end tag mismatch (expected %s, got %s)" % (
self._last.tag, tag)
self._tail = 1
@@ -1519,7 +1519,9 @@
class XMLParser:
- def __init__(self, html=0, target=None, encoding=None):
+ def __init__(self, html=0, target=None, encoding=None,
+ max_entity_indirections=None, max_entity_expansions=None,
+ ignore_dtd=False):
try:
from xml.parsers import expat
except ImportError:
@@ -1530,6 +1532,14 @@
"No module named expat; use SimpleXMLTreeBuilder instead"
)
parser = expat.ParserCreate(encoding, "}")
+ if expat.XML_BOMB_PROTECTION:
+ if max_entity_indirections is not None:
+ parser.max_entity_indirections = max_entity_indirections
+ if max_entity_expansions is not None:
+ parser.max_entity_expansions = max_entity_expansions
+ if ignore_dtd:
+ parser.reset_dtd = True
+
if target is None:
target = TreeBuilder()
# underscored names are provided for compatibility only
@@ -1601,7 +1611,7 @@
attrib = {}
if attrib_in:
for i in range(0, len(attrib_in), 2):
- attrib[fixname(attrib_in[i])] = attrib_in[i+1]
+ attrib[fixname(attrib_in[i])] = attrib_in[i + 1]
return self.target.start(tag, attrib)
def _end(self, tag):
@@ -1651,7 +1661,9 @@
pubid = None
else:
return
- if hasattr(self.target, "doctype"):
+ if self.ignore_dtd:
+ pass
+ elif hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
elif self.doctype != self._XMLParser__doctype:
# warn about deprecated call
@@ -1659,6 +1671,18 @@
self.doctype(name, pubid, system[1:-1])
self._doctype = None
+ @property
+ def max_entity_indirections(self):
+ return getattr(self.parser, "max_entity_indirections", None)
+
+ @property
+ def max_entity_expansions(self):
+ return getattr(self.parser, "max_entity_expansions", None)
+
+ @property
+ def ignore_dtd(self):
+ return getattr(self.parser, "reset_dtd", None)
+
##
# (Deprecated) Handles a doctype declaration.
#
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
--- a/Lib/xml/sax/expatreader.py
+++ b/Lib/xml/sax/expatreader.py
@@ -10,6 +10,9 @@
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
from xml.sax.handler import feature_string_interning
+from xml.sax.handler import feature_max_entity_indirections
+from xml.sax.handler import feature_max_entity_expansions
+from xml.sax.handler import feature_ignore_dtd
from xml.sax.handler import property_xml_string, property_interning_dict
# xml.parsers.expat does not raise ImportError in Jython
@@ -84,7 +87,17 @@
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
"""SAX driver for the pyexpat C module."""
- def __init__(self, namespaceHandling=0, bufsize=2**16-20):
+ # default settings for security critical issues
+ # enables extern general entities
+ EXTERNAL_GES = False
+ # allow maximum number of indirections when resolving nested entities
+ MAX_ENTITY_INDIRECTIONS = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+ # limit total amount of expanded entites characters
+ MAX_ENTITY_EXPANSIONS = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+ # ignore DTD information by resetting DTD information
+ IGNORE_DTD = False
+
+ def __init__(self, namespaceHandling=0, bufsize=2 ** 16 - 20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = xmlreader.InputSource()
self._parser = None
@@ -92,8 +105,11 @@
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = self.EXTERNAL_GES
self._interning = None
+ self._max_entity_indirections = self.MAX_ENTITY_INDIRECTIONS
+ self._max_entity_expansions = self.MAX_ENTITY_EXPANSIONS
+ self._ignore_dtd = self.IGNORE_DTD
# XMLReader methods
@@ -127,6 +143,12 @@
return 0
elif name == feature_external_ges:
return self._external_ges
+ elif name == feature_max_entity_indirections:
+ return self._max_entity_indirections
+ elif name == feature_max_entity_expansions:
+ return self._max_entity_expansions
+ elif name == feature_ignore_dtd:
+ return self._ignore_dtd
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
@@ -155,6 +177,21 @@
if state:
raise SAXNotSupportedException(
"expat does not report namespace prefixes")
+ elif name == feature_max_entity_indirections:
+ if not expat.XML_BOMB_PROTECTION:
+ raise SAXNotSupportedException(
+ "expat does not support max entity indirections")
+ self._max_entity_indirections = state
+ elif name == feature_max_entity_expansions:
+ if not expat.XML_BOMB_PROTECTION:
+ raise SAXNotSupportedException(
+ "expat does not support max entity expansions")
+ self._max_entity_expansions = state
+ elif name == feature_ignore_dtd:
+ if not expat.XML_BOMB_PROTECTION and state:
+ raise SAXNotSupportedException(
+ "expat does not support ignore dtd")
+ self._ignore_dtd = bool(state)
else:
raise SAXNotRecognizedException(
"Feature '%s' not recognized" % name)
@@ -193,7 +230,7 @@
# IncrementalParser methods
- def feed(self, data, isFinal = 0):
+ def feed(self, data, isFinal=0):
if not self._parsing:
self.reset()
self._parsing = 1
@@ -214,7 +251,7 @@
if self._entity_stack:
# If we are completing an external entity, do nothing here
return
- self.feed("", isFinal = 1)
+ self.feed("", isFinal=1)
self._cont_handler.endDocument()
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
@@ -244,6 +281,27 @@
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EndDoctypeDeclHandler = lex.endDTD
+ def _set_ignore_dtd(self):
+ parser = self._parser
+ if self._ignore_dtd:
+ parser.reset_dtd = True
+ parser.UnparsedEntityDeclHandler = None
+ parser.EntityDeclHandler = None
+ parser.NotationDeclHandler = None
+ parser.StartDoctypeDeclHandler = None
+ parser.EndDoctypeDeclHandler = None
+ parser.NotationDeclHandler = None
+ parser.ExternalEntityRefHandler = None
+ try:
+ parser.SkippedEntityHandler = None
+ except AttributeError:
+ # This pyexpat does not support SkippedEntity
+ pass
+
+ else:
+ parser.reset_dtd = False
+ # handlers are already reset
+
def reset(self):
if self._namespaces:
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
@@ -253,7 +311,7 @@
self._parser.EndElementHandler = self.end_element_ns
else:
self._parser = expat.ParserCreate(self._source.getEncoding(),
- intern = self._interning)
+ intern=self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
@@ -278,6 +336,14 @@
self._parser.SetParamEntityParsing(
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
+ if expat.XML_BOMB_PROTECTION:
+ parser = self._parser
+ if self._max_entity_indirections is not None:
+ parser.max_entity_indirections = self._max_entity_indirections
+ if self._max_entity_expansions is not None:
+ parser.max_entity_expansions = self._max_entity_expansions
+ self._set_ignore_dtd()
+
self._parsing = 0
self._entity_stack = []
@@ -399,7 +465,7 @@
def skipped_entity_handler(self, name, is_pe):
if is_pe:
# The SAX spec requires to report skipped PEs with a '%'
- name = '%'+name
+ name = '%' + name
self._cont_handler.skippedEntity(name)
# ---
diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py
--- a/Lib/xml/sax/handler.py
+++ b/Lib/xml/sax/handler.py
@@ -277,12 +277,28 @@
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write
+feature_max_entity_indirections = \
+ "http://www.python.org/sax/features/max-entity-indirections"
+
+feature_max_entity_expansions = \
+ "http://www.python.org/sax/features/max-entity-expansions"
+
+feature_ignore_dtd = "http://www.python.org/sax/features/ignore-dtd"
+# true: Ignore all doctype information and reset all doctype information
+# after the DTD block has been parsed. No DTD relevant handler is
+# called.
+# false: Obey DTD
+# access: (parsing) read-only; (not parsing) read/write
+
all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
- feature_external_pes]
+ feature_external_pes,
+ feature_max_entity_indirections,
+ feature_max_entity_expansions,
+ feature_ignore_dtd]
#============================================================================
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -3197,13 +3197,86 @@
XMLParserObject *self_xp = (XMLParserObject *)self;
PyObject *target = NULL, *html = NULL;
char *encoding = NULL;
- static char *kwlist[] = {"html", "target", "encoding", 0};
+ long ignore_dtd_flag = 0;
+
+#ifdef XML_BOMB_PROTECTION
+ PyObject *ignore_dtd = NULL, *indirections = NULL, *expansions = NULL;
+ long max_indirections;
+ long max_expansions;
+
+ static char *kwlist[] = {"html", "target", "encoding",
+ "max_entity_indirections",
+ "max_entity_expansions", "ignore_dtd", 0};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOzOOO:XMLParser", kwlist,
+ &html, &target, &encoding,
+ &indirections, &expansions,
+ &ignore_dtd)) {
+ return -1;
+ }
+#else
+ static char *kwlist[] = {"html", "target", "encoding", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
&html, &target, &encoding)) {
return -1;
}
+#endif
+
+#ifdef XML_BOMB_PROTECTION
+ if (indirections == NULL || indirections == Py_None) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &max_indirections)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else {
+ max_indirections = PyLong_AsLong(indirections);
+ if ((max_indirections == -1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if ((max_indirections > UINT_MAX) || (max_indirections < 0)) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_indirections be between 0 and %i",
+ UINT_MAX);
+ return -1;
+ }
+ }
+
+ if (expansions == NULL || expansions == Py_None) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &max_expansions)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else {
+ max_expansions = PyLong_AsLong(expansions);
+ if ((max_expansions == -1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if ((max_expansions > UINT_MAX) || (max_expansions < 0)) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_expansions be between 0 and %i",
+ UINT_MAX);
+ return -1;
+ }
+ }
+
+ if (ignore_dtd == NULL) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_RESET_DTD,
+ &ignore_dtd_flag)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else if ((ignore_dtd_flag = PyObject_IsTrue(ignore_dtd)) == -1) {
+ return -1;
+ }
+#endif
+
self_xp->entity = PyDict_New();
if (!self_xp->entity)
return -1;
@@ -3222,6 +3295,27 @@
return -1;
}
+#ifdef XML_BOMB_PROTECTION
+ if (!EXPAT(SetFeature)(self_xp->parser,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ max_indirections)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ if (!EXPAT(SetFeature)(self_xp->parser,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ max_expansions)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ if (!EXPAT(SetFeature)(self_xp->parser,
+ XML_FEATURE_RESET_DTD,
+ ignore_dtd_flag)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+#endif
+
if (target) {
Py_INCREF(target);
} else {
@@ -3241,7 +3335,9 @@
self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
self_xp->handle_close = PyObject_GetAttrString(target, "close");
- self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
+ if (!ignore_dtd_flag) {
+ self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
+ }
PyErr_Clear();
@@ -3270,10 +3366,12 @@
self_xp->parser,
(XML_ProcessingInstructionHandler) expat_pi_handler
);
- EXPAT(SetStartDoctypeDeclHandler)(
- self_xp->parser,
- (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
- );
+ if (!ignore_dtd_flag) {
+ EXPAT(SetStartDoctypeDeclHandler)(
+ self_xp->parser,
+ (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
+ );
+ }
EXPAT(SetUnknownEncodingHandler)(
self_xp->parser,
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
@@ -3590,6 +3688,35 @@
"Expat %d.%d.%d", XML_MAJOR_VERSION,
XML_MINOR_VERSION, XML_MICRO_VERSION);
}
+#ifdef XML_BOMB_PROTECTION
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "ignore_dtd") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser, XML_FEATURE_RESET_DTD,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+
+ return PyBool_FromLong(value);
+ }
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+#endif
else
goto generic;
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -5,6 +5,20 @@
#ifndef Expat_INCLUDED
#define Expat_INCLUDED 1
+#define XML_BOMB_PROTECTION 1 /* Python only: hard coded */
+
+#ifdef COMPILED_FROM_DSP
+#include "winconfig.h"
+#elif defined(MACOS_CLASSIC)
+#include "macconfig.h"
+#elif defined(__amigaos__)
+#include "amigaconfig.h"
+#elif defined(__WATCOMC__)
+#include "watcomconfig.h"
+#elif defined(HAVE_EXPAT_CONFIG_H)
+#include
+#endif /* ndef COMPILED_FROM_DSP */
+
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
@@ -96,6 +110,12 @@
XML_ERROR_RESERVED_PREFIX_XML,
XML_ERROR_RESERVED_PREFIX_XMLNS,
XML_ERROR_RESERVED_NAMESPACE_URI
+#ifdef XML_BOMB_PROTECTION
+ /* Added in 2.2. */
+ ,
+ XML_ERROR_ENTITY_INDIRECTIONS,
+ XML_ERROR_ENTITY_EXPANSION
+#endif
};
enum XML_Content_Type {
@@ -1020,6 +1040,13 @@
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
XML_FEATURE_ATTR_INFO
+#ifdef XML_BOMB_PROTECTION
+ /* Added in 2.2. */
+ ,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ XML_FEATURE_RESET_DTD
+#endif
/* Additional features must be added to the end of this enum. */
};
@@ -1032,6 +1059,92 @@
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+/* Protection against XML bomb DoS attacks
+ Added in 2.2.
+ */
+#ifdef XML_BOMB_PROTECTION
+
+/* XML_FEATURE_MAX_ENTITY_INDIRECTIONS
+
+ Limit the amount of indirections that are allowed to occur during the
+ expansion of a nested entity. A counter starts when an entity reference
+ is encountered. It resets after the entity is fully expanded. The limit
+ protects the parser against exponential entity expansion attacks (aka
+ billion laughs attack). When the limit is exceeded the parser stops and
+ fails with `XML_ERROR_ENTITY_INDIRECTIONS`.
+ A value of 0 disables the protection.
+
+ Supported range: 0 .. UINT_MAX
+ Default: 40
+ */
+
+#ifndef XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+#define XML_DEFAULT_MAX_ENTITY_INDIRECTIONS 40
+#endif
+
+/* XML_FEATURE_MAX_ENTITY_EXPANSIONS
+
+ Limit the total length of all entity expansions throughout the entire
+ document. The lengths of all entities are accumulated in a parser variable.
+ The setting protects against quadratic blowup attacks (lots of expansions
+ of a large entity declaration). When the sum of all entities exceeds
+ the limit, the parser stops and fails with `XML_ERROR_ENTITY_EXPANSION`.
+ A value of 0 disables the protection.
+
+ Supported range: 0 .. UINT_MAX
+ Default: 8 MB
+ */
+#ifndef XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+#define XML_DEFAULT_MAX_ENTITY_EXPANSIONS 1 << 23 /* 8 MiB */
+#endif
+
+/* XML_FEATURE_RESET_DTD
+
+ Reset all DTD information after the block has been parsed. When
+ the flag is set (default: false) all DTD information after the
+ endDoctypeDeclHandler has been called. The flag can be set inside the
+ endDoctypeDeclHandler. Without DTD information any entity reference in
+ the document body leads to a XML_ERROR_UNDEFINED_ENTITY.
+
+ Supported range: 0, 1
+ Default: 0
+ */
+#ifndef XML_DEFAULT_DTD_RESET
+#define XML_DEFAULT_DTD_RESET XML_FALSE
+#endif
+
+/* Feature modifiers
+
+ On success the functions shall return 1 and modify or retrieve the value.
+
+ Otherwise, 0 shall be returned and errno set to indicate an error. The
+ value shall not be modified if a function signals an error.
+
+ ENOENT feature is not supported
+ EINVAL value is invalid and outside the allowed range
+
+ As of now three features are supported:
+ - XML_FEATURE_MAX_ENTITY_INDIRECTIONS
+ - XML_FEATURE_MAX_ENTITY_EXPANSIONS
+ - XML_FEATURE_RESET_DTD
+
+ */
+
+/* Get / set feature of XML parser instance
+ */
+int XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature,
+ long *value);
+
+int XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature,
+ long value);
+
+/* Get / set global default
+ */
+int XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value);
+int XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value);
+
+#endif /* XML_BOMB_PROTECTION */
+
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -7,21 +7,10 @@
#include
#include /* UINT_MAX */
#include /* time() */
+#include
#define XML_BUILDING_EXPAT 1
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#elif defined(HAVE_EXPAT_CONFIG_H)
-#include
-#endif /* ndef COMPILED_FROM_DSP */
-
#include "ascii.h"
#include "expat.h"
@@ -141,6 +130,12 @@
#define EXPAND_SPARE 24
+#ifdef XML_BOMB_PROTECTION
+static unsigned int defaultMaxEntityIndirections = XML_DEFAULT_MAX_ENTITY_INDIRECTIONS;
+static unsigned int defaultMaxEntityExpansions = XML_DEFAULT_MAX_ENTITY_EXPANSIONS;
+static XML_Bool defaultResetDTDFlag = XML_DEFAULT_DTD_RESET;
+#endif
+
typedef struct binding {
struct prefix *prefix;
struct binding *nextTagBinding;
@@ -151,6 +146,11 @@
int uriAlloc;
} BINDING;
+/* Python only: workaround for PREFIX macro in PC/pyconfig.h */
+#ifdef PREFIX
+#undef PREFIX
+#endif
+
typedef struct prefix {
const XML_Char *name;
BINDING *binding;
@@ -557,6 +557,13 @@
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+#ifdef XML_BOMB_PROTECTION
+ unsigned int m_entityIndirections;
+ unsigned int m_maxEntityIndirections;
+ unsigned int m_entityExpansions;
+ unsigned int m_maxEntityExpansions;
+ XML_Bool m_resetDTDFlag;
+#endif /* XML_BOMB_PROTECTION */
};
#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
@@ -666,6 +673,13 @@
#define paramEntityParsing (parser->m_paramEntityParsing)
#endif /* XML_DTD */
#define hash_secret_salt (parser->m_hash_secret_salt)
+#ifdef XML_BOMB_PROTECTION
+#define entityIndirections (parser->m_entityIndirections)
+#define maxEntityIndirections (parser->m_maxEntityIndirections)
+#define entityExpansions (parser->m_entityExpansions)
+#define maxEntityExpansions (parser->m_maxEntityExpansions)
+#define resetDTDFlag (parser->m_resetDTDFlag)
+#endif /* XML_BOMB_PROTECTION */
XML_Parser XMLCALL
XML_ParserCreate(const XML_Char *encodingName)
@@ -756,6 +770,13 @@
buffer = NULL;
bufferLim = NULL;
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections = 0;
+ maxEntityIndirections = defaultMaxEntityIndirections;
+ entityExpansions = 0;
+ maxEntityExpansions = defaultMaxEntityExpansions;
+ resetDTDFlag = defaultResetDTDFlag;
+#endif
attsSize = INIT_ATTS_SIZE;
atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
@@ -1886,6 +1907,109 @@
return position.columnNumber;
}
+#ifdef XML_BOMB_PROTECTION
+
+int XMLCALL
+XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long *value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ *value = (long)maxEntityIndirections;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ *value = (long)maxEntityExpansions;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ *value = (long)resetDTDFlag;
+ return 1;
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ maxEntityIndirections = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ maxEntityExpansions = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ if ((value == 0) || (value == 1)) {
+ resetDTDFlag = (XML_Bool)value;
+ return 1;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ *value = (long)defaultMaxEntityIndirections;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ *value = (long)defaultMaxEntityExpansions;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ *value = (long)defaultResetDTDFlag;
+ return 1;
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ defaultMaxEntityIndirections = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ defaultMaxEntityExpansions = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ if ((value == 0) || (value == 1)) {
+ defaultResetDTDFlag = (XML_Bool)value;
+ return 1;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+#endif
+
+
void XMLCALL
XML_FreeContentModel(XML_Parser parser, XML_Content *model)
{
@@ -1969,6 +2093,11 @@
XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
XML_L("prefix must not be bound to one of the reserved namespace names")
+#ifdef XML_BOMB_PROTECTION
+ ,
+ XML_L("entity indirection limit exceeded"),
+ XML_L("document's entity expansion limit exceeded")
+#endif
};
if (code > 0 && code < sizeof(message)/sizeof(message[0]))
return message[code];
@@ -2040,6 +2169,17 @@
#ifdef XML_ATTR_INFO
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
#endif
+#ifdef XML_BOMB_PROTECTION
+ {XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ XML_L("XML_FEATURE_MAX_ENTITY_INDIRECTIONS"),
+ XML_DEFAULT_MAX_ENTITY_INDIRECTIONS},
+ {XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ XML_L("XML_FEATURE_MAX_ENTITY_EXPANSIONS"),
+ XML_DEFAULT_MAX_ENTITY_EXPANSIONS},
+ {XML_FEATURE_RESET_DTD,
+ XML_L("XML_FEATURE_RESET_DTD"),
+ XML_DEFAULT_DTD_RESET},
+#endif
{XML_FEATURE_END, NULL, 0}
};
@@ -2238,9 +2378,15 @@
{
/* save one level of indirection */
DTD * const dtd = _dtd;
-
const char **eventPP;
const char **eventEndPP;
+
+#ifdef XML_BOMB_PROTECTION
+ if (haveMore) {
+ entityIndirections = 0;
+ }
+#endif
+
if (enc == encoding) {
eventPP = &eventPtr;
eventEndPP = &eventEndPtr;
@@ -3974,6 +4120,11 @@
endDoctypeDeclHandler(handlerArg);
handleDefault = XML_FALSE;
}
+#ifdef XML_BOMB_PROTECTION
+ if (resetDTDFlag) {
+ dtdReset(dtd, &parser->m_mem);
+ }
+#endif
break;
case XML_ROLE_INSTANCE_START:
#ifdef XML_DTD
@@ -4800,6 +4951,22 @@
enum XML_Error result;
OPEN_INTERNAL_ENTITY *openEntity;
+#ifdef XML_BOMB_PROTECTION
+ if (maxEntityIndirections && (entityIndirections > maxEntityIndirections)) {
+ return XML_ERROR_ENTITY_INDIRECTIONS;
+ }
+ if (maxEntityExpansions) {
+ if (entity->textLen > UINT_MAX - entityExpansions) {
+ /* overflow */
+ return XML_ERROR_ENTITY_EXPANSION;
+ }
+ entityExpansions = entityExpansions + entity->textLen;
+ if (entityExpansions > maxEntityExpansions) {
+ return XML_ERROR_ENTITY_EXPANSION;
+ }
+ }
+#endif
+
if (freeInternalEntities) {
openEntity = freeInternalEntities;
freeInternalEntities = openEntity->next;
@@ -4824,13 +4991,21 @@
#ifdef XML_DTD
if (entity->is_param) {
int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections++;
+#endif
result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
next, &next, XML_FALSE);
}
else
#endif /* XML_DTD */
+ {
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections++;
+#endif
result = doContent(parser, tagLevel, internalEncoding, textStart,
textEnd, &next, XML_FALSE);
+ }
if (result == XML_ERROR_NONE) {
if (textEnd != next && ps_parsing == XML_SUSPENDED) {
diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@@ -4,20 +4,7 @@
#include
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#else
-#ifdef HAVE_EXPAT_CONFIG_H
-#include
-#endif
-#endif /* ndef COMPILED_FROM_DSP */
-
+#include "expat.h"
#include "expat_external.h"
#include "internal.h"
#include "xmlrole.h"
diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -4,20 +4,7 @@
#include
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#else
-#ifdef HAVE_EXPAT_CONFIG_H
-#include
-#endif
-#endif /* ndef COMPILED_FROM_DSP */
-
+#include "expat.h"
#include "expat_external.h"
#include "internal.h"
#include "xmltok.h"
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1322,6 +1322,28 @@
if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
return PyLong_FromLong((long) self->buffer_used);
}
+#ifdef XML_BOMB_PROTECTION
+ if (first_char == 'm') {
+ if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ }
+#endif
if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
return get_pybool(self->ns_prefixes);
if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
@@ -1338,6 +1360,15 @@
return self->intern;
}
}
+#ifdef XML_BOMB_PROTECTION
+ if (PyUnicode_CompareWithASCIIString(nameobj, "reset_dtd") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself, XML_FEATURE_RESET_DTD, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyBool_FromLong(value);
+ }
+#endif
generic:
return PyObject_GenericGetAttr((PyObject*)self, nameobj);
}
@@ -1374,6 +1405,9 @@
APPEND(rc, "buffer_text");
APPEND(rc, "buffer_used");
APPEND(rc, "namespace_prefixes");
+ APPEND(rc, "max_entity_expansions");
+ APPEND(rc, "max_entity_indirections");
+ APPEND(rc, "reset_dtd");
APPEND(rc, "ordered_attributes");
APPEND(rc, "specified_attributes");
APPEND(rc, "intern");
@@ -1520,6 +1554,65 @@
self->buffer_size = new_buffer_size;
return 0;
}
+#ifdef XML_BOMB_PROTECTION
+ if (PyUnicode_CompareWithASCIIString(name, "max_entity_expansions") == 0) {
+ unsigned long value;
+
+ value = PyLong_AsUnsignedLong(v);
+ if ((value == (unsigned long)-1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if (value > UINT_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_expansions must not be greater than %i",
+ UINT_MAX);
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ value)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (PyUnicode_CompareWithASCIIString(name, "max_entity_indirections") == 0) {
+ unsigned long value;
+
+ value = PyLong_AsUnsignedLong(v);
+ if ((value == (unsigned long)-1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if (value > UINT_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_indirections must not be greater than %i",
+ UINT_MAX);
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ value)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (PyUnicode_CompareWithASCIIString(name, "reset_dtd") == 0) {
+ int value;
+
+ if ((value = PyObject_IsTrue(v)) == -1) {
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself, XML_FEATURE_RESET_DTD,
+ value ? XML_TRUE : XML_FALSE)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+#endif
if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
/* If we're changing the character data handler, flush all
@@ -1654,12 +1747,125 @@
/* List of methods defined in the module */
+#ifdef XML_BOMB_PROTECTION
+PyDoc_STRVAR(pyexpat_set_reset_dtd_doc,
+"set_reset_dtd(n)\n"
+);
+
+static PyObject *
+pyexpat_set_reset_dtd(PyObject *self, PyObject *args)
+{
+ PyObject *pre;
+ long value;
+ if (!PyArg_ParseTuple(args, "O:set_reset_dtd", &pre))
+ return NULL;
+ if ((value = (long)PyObject_IsTrue(pre)) == -1 ) {
+ return NULL;
+ }
+ if (!XML_SetFeatureDefault(XML_FEATURE_RESET_DTD, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_reset_dtd_doc,
+"get_reset_dtd(n)\n"
+);
+
+static PyObject *
+pyexpat_get_reset_dtd(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_RESET_DTD, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyBool_FromLong(value);
+}
+
+PyDoc_STRVAR(pyexpat_set_max_entity_expansions_doc,
+"set_max_entity_expansions(n)\n"
+);
+
+static PyObject *
+pyexpat_set_max_entity_expansions(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!PyArg_ParseTuple(args, "l:set_max_entity_expansions", &value))
+ return NULL;
+ if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_max_entity_expansions_doc,
+"get_max_entity_expansions(n)\n"
+);
+
+static PyObject *
+pyexpat_get_max_entity_expansions(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+}
+
+PyDoc_STRVAR(pyexpat_set_max_entity_indirections_doc,
+"set_max_entity_indirections(n)\n"
+);
+
+static PyObject *
+pyexpat_set_max_entity_indirections(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!PyArg_ParseTuple(args, "l:set_max_entity_indirections", &value))
+ return NULL;
+ if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_max_entity_indirections_doc,
+"get_max_entity_indirections(n)\n"
+);
+
+static PyObject *
+pyexpat_get_max_entity_indirections(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+}
+#endif
+
+
static struct PyMethodDef pyexpat_methods[] = {
{"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
{"ErrorString", (PyCFunction)pyexpat_ErrorString,
METH_VARARGS, pyexpat_ErrorString__doc__},
-
+#ifdef XML_BOMB_PROTECTION
+ {"set_reset_dtd", (PyCFunction)pyexpat_set_reset_dtd, METH_VARARGS,
+ pyexpat_set_reset_dtd_doc},
+ {"get_reset_dtd", (PyCFunction)pyexpat_get_reset_dtd, METH_NOARGS,
+ pyexpat_get_reset_dtd_doc},
+ {"set_max_entity_expansions", (PyCFunction)pyexpat_set_max_entity_expansions,
+ METH_VARARGS, pyexpat_set_max_entity_expansions_doc},
+ {"get_max_entity_expansions", (PyCFunction)pyexpat_get_max_entity_expansions,
+ METH_NOARGS, pyexpat_get_max_entity_expansions_doc},
+ {"set_max_entity_indirections", (PyCFunction)pyexpat_set_max_entity_indirections,
+ METH_VARARGS, pyexpat_set_max_entity_indirections_doc},
+ {"get_max_entity_indirections", (PyCFunction)pyexpat_get_max_entity_indirections,
+ METH_NOARGS, pyexpat_get_max_entity_indirections_doc},
+#endif
{NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
};
@@ -1896,6 +2102,17 @@
MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
+#ifdef XML_BOMB_PROTECTION
+ MYCONST(XML_DEFAULT_MAX_ENTITY_INDIRECTIONS);
+ MYCONST(XML_DEFAULT_MAX_ENTITY_EXPANSIONS);
+ PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_True);
+ Py_INCREF(Py_True);
+#else
+ PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_INDIRECTIONS", 0);
+ PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_EXPANSIONS", 0);
+ PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_False);
+ Py_INCREF(Py_False);
+#endif
#undef MYCONST
#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
@@ -1937,6 +2154,12 @@
capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
capi.SetUserData = XML_SetUserData;
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
+#ifdef XML_BOMB_PROTECTION
+ capi.GetFeature = XML_GetFeature;
+ capi.SetFeature = XML_SetFeature;
+ capi.GetFeatureDefault = XML_GetFeatureDefault;
+ capi.SetFeatureDefault = XML_SetFeatureDefault;
+#endif
/* export using capsule */
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);