# HG changeset patch
# Parent 4a254750ad20c2e63099268a00399ac7490ead25
diff -r 4a254750ad20 .hgtags
--- a/.hgtags Sun May 17 19:36:16 2015 -0400
+++ b/.hgtags Mon May 18 02:40:52 2015 +0000
@@ -114,6 +114,7 @@
88a0792e8ba3e4916b24c7e7a522c277d326d66e v3.3.0rc2
c191d21cefafb3832c45570e84854e309aa62eaa v3.3.0rc3
bd8afb90ebf28ba4edc901d4a235f75e7bbc79fd v3.3.0
+afc380863975b201e162f94a101f15e37ca233a6 xmlbomb-2.7-merge1
92c2cfb924055ce68c4f78f836dcfe688437ceb8 v3.3.1rc1
d9893d13c6289aa03d33559ec67f97dcbf5c9e3c v3.3.1
d047928ae3f6314a13b6137051315453d0ae89b6 v3.3.2
diff -r 4a254750ad20 Include/pyexpat.h
--- a/Include/pyexpat.h Sun May 17 19:36:16 2015 -0400
+++ b/Include/pyexpat.h Mon May 18 02:40:52 2015 +0000
@@ -3,7 +3,11 @@
/* note: you must import expat.h before importing this module! */
+#ifdef XML_BOMB_PROTECTION
+#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1"
+#else
#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.0"
+#endif
#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI"
struct PyExpat_CAPI
@@ -48,6 +52,15 @@
enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding);
int (*DefaultUnknownEncodingHandler)(
void *encodingHandlerData, const XML_Char *name, XML_Encoding *info);
+#ifdef XML_BOMB_PROTECTION
+ /* CAPI 1.1 bomb protection additions */
+ int (*GetFeature)(XML_Parser parser, enum XML_FeatureEnum feature,
+ long *value);
+ int (*SetFeature)(XML_Parser parser, enum XML_FeatureEnum feature,
+ long value);
+ int (*GetFeatureDefault)(enum XML_FeatureEnum feature, long *value);
+ int (*SetFeatureDefault)(enum XML_FeatureEnum feature, long value);
+#endif
/* always add new stuff to the end! */
};
diff -r 4a254750ad20 Lib/test/test_pyexpat.py
--- a/Lib/test/test_pyexpat.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/test/test_pyexpat.py Mon May 18 02:40:52 2015 +0000
@@ -10,7 +10,9 @@
from xml.parsers import expat
from xml.parsers.expat import errors
-from test.support import sortdict
+from test.support import sortdict, findfile
+
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
class SetAttributeTest(unittest.TestCase):
@@ -98,11 +100,11 @@
def NotationDeclHandler(self, *args):
name, base, sysid, pubid = args
- self.out.append('Notation declared: %s' %(args,))
+ self.out.append('Notation declared: %s' % (args,))
def UnparsedEntityDeclHandler(self, *args):
entityName, base, systemId, publicId, notationName = args
- self.out.append('Unparsed entity decl: %s' %(args,))
+ self.out.append('Unparsed entity decl: %s' % (args,))
def NotStandaloneHandler(self):
self.out.append('Not standalone')
@@ -110,7 +112,7 @@
def ExternalEntityRefHandler(self, *args):
context, base, sysId, pubId = args
- self.out.append('External entity ref: %s' %(args[1:],))
+ self.out.append('External entity ref: %s' % (args[1:],))
return 1
def StartDoctypeDeclHandler(self, *args):
@@ -466,7 +468,7 @@
'too many parser events')
expected = self.expected_list[self.upto]
self.assertEqual(pos, expected,
- 'Expected position %s, got position %s' %(pos, expected))
+ 'Expected position %s, got position %s' % (pos, expected))
self.upto += 1
def test(self):
@@ -521,8 +523,8 @@
self.assertRaises(ValueError, f, 0)
def test_unchanged_size(self):
- xml1 = b"" + b'a' * 512
- xml2 = b'a'*512 + b''
+ xml1 = ("%s" % ('a' * 512))
+ xml2 = 'a' * 512 + ''
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_size = 512
@@ -612,7 +614,7 @@
parser.buffer_size = 2048
self.assertEqual(parser.buffer_size, 2048)
- self.n=0
+ self.n = 0
parser.Parse(xml1, 0)
parser.buffer_size = parser.buffer_size // 2
self.assertEqual(parser.buffer_size, 1024)
@@ -707,6 +709,97 @@
b"")
self.assertEqual(handler_call_args, [("bar", "baz")])
+quadratic_bomb = b"""\
+
+
+
+]>
+&a;
+"""
+
+class XmlBombTest(unittest.TestCase):
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_exponential(self):
+ # test that the maximum indirection limitation prevents exponential
+ # entity expansion attacks (billion laughs). Every expansion increases
+ # the indirection level. The result of an expansion is never cached.
+ p = expat.ParserCreate()
+ self.assertEqual(p.max_entity_indirections, 40)
+ p.max_entity_indirections = 71
+ with self.assertRaises(expat.ExpatError) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+ self.assertEqual(str(e.exception), "entity indirection limit exceeded: line 7, column 6")
+
+ p = expat.ParserCreate()
+ p.max_entity_indirections = 0
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+
+ p = expat.ParserCreate()
+ p.max_entity_indirections = 72 # 8 * 8 + 8
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_quadratic(self):
+ # test that the total amount of expanded entities chars is limited to
+ # prevent quadratic blowout attacks.
+ p = expat.ParserCreate()
+ self.assertEqual(p.max_entity_expansions, 8 * 1024 ** 2)
+
+ # lower limit to 1024, must fail with one entity of 1025 chars
+ p.max_entity_expansions = 1024
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 1025)
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ # but passes with an entity of 1024 chars
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 1024)
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1024
+ p.Parse(xml)
+
+ # one level of indirection, b = "&a;&a;" adds 6 chars
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 512)
+ xml = xml.replace(b"&a;", b"&b;")
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1024
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 1030 # 2 * x512 + 6
+ p.Parse(xml)
+
+ # test default limit of 8 MB
+ xml = quadratic_bomb.replace(b"MARK", b"a" * 2 * 1024 ** 2)
+ xml = xml.replace(b"&a;", b"&c;")
+ p = expat.ParserCreate()
+ with self.assertRaises(expat.ExpatError) as e:
+ p.Parse(xml)
+ self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6")
+
+ # disabled limit
+ p = expat.ParserCreate()
+ p.max_entity_expansions = 0
+ p.Parse(xml)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_resetdtd(self):
+ # with reset_dtd all DTD information are ignored
+ p = expat.ParserCreate()
+ self.assertEqual(p.reset_dtd, False)
+ p.reset_dtd = True
+ with self.assertRaises(expat.ExpatError) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ p.ParseFile(f)
+ self.assertEqual(str(e.exception), "undefined entity: line 7, column 6")
+
if __name__ == "__main__":
unittest.main()
diff -r 4a254750ad20 Lib/test/test_sax.py
--- a/Lib/test/test_sax.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/test/test_sax.py Mon May 18 02:40:52 2015 +0000
@@ -12,8 +12,11 @@
from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
XMLFilterBase, prepare_input_source
from xml.sax.expatreader import create_parser
-from xml.sax.handler import feature_namespaces
+from xml.sax.handler import (feature_namespaces,
+ feature_max_entity_indirections, feature_max_entity_expansions,
+ feature_ignore_dtd, feature_external_ges)
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
+from xml.parsers import expat
from io import BytesIO, StringIO
import codecs
import gc
@@ -24,6 +27,7 @@
TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
try:
TEST_XMLFILE.encode("utf-8")
TEST_XMLFILE_OUT.encode("utf-8")
@@ -891,7 +895,7 @@
def __init__(self):
self._notations = []
- self._entities = []
+ self._entities = []
def notationDecl(self, name, publicId, systemId):
self._notations.append((name, publicId, systemId))
@@ -926,6 +930,7 @@
def test_expat_entityresolver(self):
parser = create_parser()
+ parser.setFeature(feature_external_ges, True)
parser.setEntityResolver(self.TestEntityResolver())
result = BytesIO()
parser.setContentHandler(XMLGenerator(result))
@@ -1243,6 +1248,91 @@
self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
+# ===========================================================================
+#
+# XML bomb DoS tests
+#
+# ===========================================================================
+
+class XmlBombTest(unittest.TestCase):
+
+ def test_protection_features(self):
+ parser = create_parser()
+ self.assertEqual(parser.getFeature(feature_max_entity_indirections),
+ expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS)
+ self.assertEqual(parser.getFeature(feature_max_entity_expansions),
+ expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS)
+ self.assertFalse(parser.getFeature(feature_ignore_dtd))
+ parser.setFeature(feature_ignore_dtd, True)
+ self.assertTrue(parser.getFeature(feature_ignore_dtd))
+ if expat.XML_BOMB_PROTECTION:
+ parser.setFeature(feature_max_entity_indirections, 100)
+ self.assertEqual(parser.getFeature(feature_max_entity_indirections),
+ 100)
+ parser.setFeature(feature_max_entity_expansions, 1024)
+ self.assertEqual(parser.getFeature(feature_max_entity_expansions),
+ 1024)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_exponential(self):
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with self.assertRaises(SAXParseException) as e:
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+ self.assertEqual(str(e.exception),
+ ":7:6: entity indirection limit exceeded")
+
+ parser = create_parser()
+ parser.setFeature(feature_max_entity_indirections, 72)
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+ with open(XMLBOMB_XMLFILE, "rb") as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_quadratic(self):
+ parser = create_parser()
+ result = StringIO()
+ parser.setContentHandler(XMLGenerator(result))
+ parser.setFeature(feature_max_entity_expansions, 1024)
+
+ parser.feed('\n' % ('a' * 1025))
+ parser.feed(']>\n')
+ with self.assertRaises(SAXParseException) as e:
+ parser.feed('&a;')
+ self.assertEqual(str(e.exception),
+ ":4:5: document's entity expansion limit "
+ "exceeded")
+
+ @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb_resetdtd(self):
+ parser = create_parser()
+ result = StringIO()
+ xmlgen = XMLGenerator(result)
+
+ parser.setContentHandler(xmlgen)
+ inpsrc = InputSource()
+
+ parser.setFeature(feature_ignore_dtd, True)
+ with self.assertRaises(SAXParseException) as e:
+ with open(XMLBOMB_XMLFILE) as f:
+ inpsrc.setByteStream(f)
+ parser.parse(inpsrc)
+ self.assertEqual(str(e.exception),
+ ":7:6: undefined entity")
+
+
def test_main():
run_unittest(MakeParserTest,
ParseTest,
@@ -1255,7 +1345,8 @@
StreamReaderWriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
- XmlReaderTest)
+ XmlReaderTest,
+ XmlBombTest)
if __name__ == "__main__":
test_main()
diff -r 4a254750ad20 Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/test/test_xml_etree.py Mon May 18 02:40:52 2015 +0000
@@ -18,6 +18,12 @@
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect
+try:
+ import pyexpat
+ XML_BOMB_PROTECTION = pyexpat.XML_BOMB_PROTECTION
+except (ImportError, AttributeError):
+ XML_BOMB_PROTECTION = False
+
# pyET is the pure-Python implementation.
#
# ET is pyET in test_xml_etree and is the C accelerated version in
@@ -31,6 +37,7 @@
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
+XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata")
SAMPLE_XML = """\
@@ -79,6 +86,14 @@
"""
+QUADRATIC_BOMB_TPL = """\
+
+
+]>
+&a;
+"""
+
ENTITY_XML = """\
@@ -950,7 +965,7 @@
expected = '<%s>' % elem
serialized = serialize(ET.XML('<%s />' % elem), method='html')
self.assertEqual(serialized, expected)
- serialized = serialize(ET.XML('<%s>%s>' % (elem,elem)),
+ serialized = serialize(ET.XML('<%s>%s>' % (elem, elem)),
method='html')
self.assertEqual(serialized, expected)
@@ -2499,6 +2514,57 @@
# --------------------------------------------------------------------
+class XmlBombTest(unittest.TestCase):
+
+ @unittest.skipUnless(XML_BOMB_PROTECTION, "xml bomb protection n/a")
+ def test_xmlbomb(self):
+ limit = 8 * 1024 ** 2 + 1
+
+ # test entity indirections
+ with self.assertRaisesRegex(ET.ParseError,
+ 'entity indirection limit exceeded: '
+ 'line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE)
+
+ parser = ET.XMLParser(max_entity_indirections=71)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'entity indirection limit exceeded: '
+ 'line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ parser = ET.XMLParser(max_entity_indirections=72)
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ parser = ET.XMLParser(max_entity_indirections=0)
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ # test ignore_dtd
+ parser = ET.XMLParser(ignore_dtd=True)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'undefined entity: line 7, column 6'):
+ ET.parse(XMLBOMB_XMLFILE, parser=parser)
+
+ xml = QUADRATIC_BOMB_TPL.replace("MARK", "a" * limit)
+ with self.assertRaisesRegex(ET.ParseError,
+ "document's entity expansion limit "
+ "exceeded: line 5, column 6"):
+ ET.fromstring(xml)
+
+ parser = ET.XMLParser(max_entity_expansions=0)
+ e = ET.fromstring(xml, parser=parser)
+ self.assertEqual(e.text, "a" * limit)
+
+ parser = ET.XMLParser(max_entity_expansions=limit + 1)
+ e = ET.fromstring(xml, parser=parser)
+ self.assertEqual(e.text, "a" * limit)
+
+ parser = ET.XMLParser(ignore_dtd=True)
+ with self.assertRaisesRegex(ET.ParseError,
+ 'undefined entity: line 5, column 6'):
+ ET.fromstring(xml, parser=parser)
+
+# --------------------------------------------------------------------
+
class CleanContext(object):
"""Provide default namespace mapping and path cache."""
@@ -2567,6 +2633,7 @@
XMLParserTest,
XMLPullParserTest,
BugsTest,
+ XmlBombTest,
]
# These tests will only run for the pure-Python version that doesn't import
diff -r 4a254750ad20 Lib/test/xmltestdata/xmlbomb.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Lib/test/xmltestdata/xmlbomb.xml Mon May 18 02:40:52 2015 +0000
@@ -0,0 +1,7 @@
+
+
+
+
+]>
+&c;
diff -r 4a254750ad20 Lib/xml/dom/expatbuilder.py
--- a/Lib/xml/dom/expatbuilder.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/xml/dom/expatbuilder.py Mon May 18 02:40:52 2015 +0000
@@ -174,13 +174,18 @@
def install(self, parser):
"""Install the callbacks needed to build the DOM into the parser."""
# This creates circular references!
- parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
+ if not self._options.ignore_dtd:
+ parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler
+ parser.NotationDeclHandler = self.notation_decl_handler
parser.StartElementHandler = self.first_element_handler
parser.EndElementHandler = self.end_element_handler
parser.ProcessingInstructionHandler = self.pi_handler
if self._options.entities:
+ # or ignore_dtd
parser.EntityDeclHandler = self.entity_decl_handler
- parser.NotationDeclHandler = self.notation_decl_handler
+ if self._options.external_general_entities:
+ # or ignore_dtd
+ parser.ExternalEntityRefHandler = self.external_entity_ref_handler
if self._options.comments:
parser.CommentHandler = self.comment_handler
if self._options.cdata_sections:
@@ -189,10 +194,14 @@
parser.CharacterDataHandler = self.character_data_handler_cdata
else:
parser.CharacterDataHandler = self.character_data_handler
- parser.ExternalEntityRefHandler = self.external_entity_ref_handler
parser.XmlDeclHandler = self.xml_decl_handler
parser.ElementDeclHandler = self.element_decl_handler
parser.AttlistDeclHandler = self.attlist_decl_handler
+ if expat.XML_BOMB_PROTECTION:
+ options = self._options
+ parser.reset_dtd = options.ignore_dtd
+ parser.max_entity_indirections = options.max_entity_indirections
+ parser.max_entity_expansions = options.max_entity_expansions
def parseFile(self, file):
"""Parse a document from a file object, returning the document
@@ -201,7 +210,7 @@
first_buffer = True
try:
while 1:
- buffer = file.read(16*1024)
+ buffer = file.read(16 * 1024)
if not buffer:
break
parser.Parse(buffer, 0)
@@ -274,7 +283,7 @@
def character_data_handler_cdata(self, data):
childNodes = self.curNode.childNodes
if self._cdata:
- if ( self._cdata_continue
+ if (self._cdata_continue
and childNodes[-1].nodeType == CDATA_SECTION_NODE):
childNodes[-1].appendData(data)
return
@@ -358,7 +367,7 @@
for i in range(0, len(attributes), 2):
a = minidom.Attr(attributes[i], EMPTY_NAMESPACE,
None, EMPTY_PREFIX)
- value = attributes[i+1]
+ value = attributes[i + 1]
a.value = value
a.ownerDocument = self.document
_set_attribute_node(node, a)
@@ -765,7 +774,7 @@
_attrsNS = node._attrsNS
for i in range(0, len(attributes), 2):
aname = attributes[i]
- value = attributes[i+1]
+ value = attributes[i + 1]
if ' ' in aname:
uri, localname, prefix, qname = _parse_ns_name(self, aname)
a = minidom.Attr(qname, uri, localname, prefix)
diff -r 4a254750ad20 Lib/xml/dom/xmlbuilder.py
--- a/Lib/xml/dom/xmlbuilder.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/xml/dom/xmlbuilder.py Mon May 18 02:40:52 2015 +0000
@@ -3,6 +3,7 @@
import copy
import warnings
import xml.dom
+from xml.parsers import expat
from xml.dom.NodeFilter import NodeFilter
@@ -23,9 +24,9 @@
namespaces = 1
namespace_declarations = True
validation = False
- external_parameter_entities = True
- external_general_entities = True
- external_dtd_subset = True
+ external_parameter_entities = False # changed
+ external_general_entities = False # changed
+ external_dtd_subset = False # changed
validate_if_schema = False
validate = False
datatype_normalization = False
@@ -41,6 +42,10 @@
errorHandler = None
filter = None
+ max_entity_indirections = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+ max_entity_expansions = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+ ignore_dtd = False
+
class DOMBuilder:
entityResolver = None
@@ -158,6 +163,18 @@
("namespaces", 0)],
("namespaces", 1): [
("namespaces", 1)],
+ ("max_entity_indirections", 0): [
+ ("max_entity_indirections", 0)],
+ ("max_entity_expansions", 0): [
+ ("max_entity_expansions", 0)],
+ ("ignore_dtd", 0): [
+ ("ignore_dtd", 0),
+ ("external_general_entities", 0),
+ ("external_parameter_entities", 0),
+ ("create_entity_ref_nodes", 0),
+ ("entities", 0)],
+ ("ignore_dtd", 1): [
+ ("ignore_dtd", 1)],
}
def getFeature(self, name):
diff -r 4a254750ad20 Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/xml/etree/ElementTree.py Mon May 18 02:40:52 2015 +0000
@@ -1431,7 +1431,7 @@
"""
self._flush()
self._last = self._elem.pop()
- assert self._last.tag == tag,\
+ assert self._last.tag == tag, \
"end tag mismatch (expected %s, got %s)" % (
self._last.tag, tag)
self._tail = 1
@@ -1450,7 +1450,9 @@
"""
- def __init__(self, html=0, target=None, encoding=None):
+ def __init__(self, html=0, target=None, encoding=None,
+ max_entity_indirections=None, max_entity_expansions=None,
+ ignore_dtd=False):
try:
from xml.parsers import expat
except ImportError:
@@ -1461,6 +1463,14 @@
"No module named expat; use SimpleXMLTreeBuilder instead"
)
parser = expat.ParserCreate(encoding, "}")
+ if expat.XML_BOMB_PROTECTION:
+ if max_entity_indirections is not None:
+ parser.max_entity_indirections = max_entity_indirections
+ if max_entity_expansions is not None:
+ parser.max_entity_expansions = max_entity_expansions
+ if ignore_dtd:
+ parser.reset_dtd = True
+
if target is None:
target = TreeBuilder()
# underscored names are provided for compatibility only
@@ -1551,7 +1561,7 @@
attrib = {}
if attr_list:
for i in range(0, len(attr_list), 2):
- attrib[fixname(attr_list[i])] = attr_list[i+1]
+ attrib[fixname(attr_list[i])] = attr_list[i + 1]
return self.target.start(tag, attrib)
def _end(self, tag):
@@ -1601,7 +1611,9 @@
pubid = None
else:
return
- if hasattr(self.target, "doctype"):
+ if self.ignore_dtd:
+ pass
+ elif hasattr(self.target, "doctype"):
self.target.doctype(name, pubid, system[1:-1])
elif self.doctype != self._XMLParser__doctype:
# warn about deprecated call
@@ -1609,6 +1621,18 @@
self.doctype(name, pubid, system[1:-1])
self._doctype = None
+ @property
+ def max_entity_indirections(self):
+ return getattr(self.parser, "max_entity_indirections", None)
+
+ @property
+ def max_entity_expansions(self):
+ return getattr(self.parser, "max_entity_expansions", None)
+
+ @property
+ def ignore_dtd(self):
+ return getattr(self.parser, "reset_dtd", None)
+
def doctype(self, name, pubid, system):
"""(Deprecated) Handle doctype declaration
diff -r 4a254750ad20 Lib/xml/sax/expatreader.py
--- a/Lib/xml/sax/expatreader.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/xml/sax/expatreader.py Mon May 18 02:40:52 2015 +0000
@@ -10,6 +10,9 @@
from xml.sax.handler import feature_namespace_prefixes
from xml.sax.handler import feature_external_ges, feature_external_pes
from xml.sax.handler import feature_string_interning
+from xml.sax.handler import feature_max_entity_indirections
+from xml.sax.handler import feature_max_entity_expansions
+from xml.sax.handler import feature_ignore_dtd
from xml.sax.handler import property_xml_string, property_interning_dict
# xml.parsers.expat does not raise ImportError in Jython
@@ -87,7 +90,17 @@
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
"""SAX driver for the pyexpat C module."""
- def __init__(self, namespaceHandling=0, bufsize=2**16-20):
+ # default settings for security critical issues
+ # enables extern general entities
+ EXTERNAL_GES = False
+ # allow maximum number of indirections when resolving nested entities
+ MAX_ENTITY_INDIRECTIONS = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+ # limit total amount of expanded entites characters
+ MAX_ENTITY_EXPANSIONS = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+ # ignore DTD information by resetting DTD information
+ IGNORE_DTD = False
+
+ def __init__(self, namespaceHandling=0, bufsize=2 ** 16 - 20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = xmlreader.InputSource()
self._parser = None
@@ -95,8 +108,11 @@
self._lex_handler_prop = None
self._parsing = 0
self._entity_stack = []
- self._external_ges = 1
+ self._external_ges = self.EXTERNAL_GES
self._interning = None
+ self._max_entity_indirections = self.MAX_ENTITY_INDIRECTIONS
+ self._max_entity_expansions = self.MAX_ENTITY_EXPANSIONS
+ self._ignore_dtd = self.IGNORE_DTD
# XMLReader methods
@@ -130,6 +146,12 @@
return 0
elif name == feature_external_ges:
return self._external_ges
+ elif name == feature_max_entity_indirections:
+ return self._max_entity_indirections
+ elif name == feature_max_entity_expansions:
+ return self._max_entity_expansions
+ elif name == feature_ignore_dtd:
+ return self._ignore_dtd
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
@@ -158,6 +180,21 @@
if state:
raise SAXNotSupportedException(
"expat does not report namespace prefixes")
+ elif name == feature_max_entity_indirections:
+ if not expat.XML_BOMB_PROTECTION:
+ raise SAXNotSupportedException(
+ "expat does not support max entity indirections")
+ self._max_entity_indirections = state
+ elif name == feature_max_entity_expansions:
+ if not expat.XML_BOMB_PROTECTION:
+ raise SAXNotSupportedException(
+ "expat does not support max entity expansions")
+ self._max_entity_expansions = state
+ elif name == feature_ignore_dtd:
+ if not expat.XML_BOMB_PROTECTION and state:
+ raise SAXNotSupportedException(
+ "expat does not support ignore dtd")
+ self._ignore_dtd = bool(state)
else:
raise SAXNotRecognizedException(
"Feature '%s' not recognized" % name)
@@ -196,7 +233,7 @@
# IncrementalParser methods
- def feed(self, data, isFinal = 0):
+ def feed(self, data, isFinal=0):
if not self._parsing:
self.reset()
self._parsing = 1
@@ -219,7 +256,7 @@
# If we are completing an external entity, do nothing here
return
try:
- self.feed("", isFinal = 1)
+ self.feed("", isFinal=1)
self._cont_handler.endDocument()
self._parsing = 0
# break cycle created by expat handlers pointing to our methods
@@ -262,6 +299,27 @@
parser.StartDoctypeDeclHandler = self.start_doctype_decl
parser.EndDoctypeDeclHandler = lex.endDTD
+ def _set_ignore_dtd(self):
+ parser = self._parser
+ if self._ignore_dtd:
+ parser.reset_dtd = True
+ parser.UnparsedEntityDeclHandler = None
+ parser.EntityDeclHandler = None
+ parser.NotationDeclHandler = None
+ parser.StartDoctypeDeclHandler = None
+ parser.EndDoctypeDeclHandler = None
+ parser.NotationDeclHandler = None
+ parser.ExternalEntityRefHandler = None
+ try:
+ parser.SkippedEntityHandler = None
+ except AttributeError:
+ # This pyexpat does not support SkippedEntity
+ pass
+
+ else:
+ parser.reset_dtd = False
+ # handlers are already reset
+
def reset(self):
if self._namespaces:
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
@@ -271,7 +329,7 @@
self._parser.EndElementHandler = self.end_element_ns
else:
self._parser = expat.ParserCreate(self._source.getEncoding(),
- intern = self._interning)
+ intern=self._interning)
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
@@ -296,6 +354,14 @@
self._parser.SetParamEntityParsing(
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
+ if expat.XML_BOMB_PROTECTION:
+ parser = self._parser
+ if self._max_entity_indirections is not None:
+ parser.max_entity_indirections = self._max_entity_indirections
+ if self._max_entity_expansions is not None:
+ parser.max_entity_expansions = self._max_entity_expansions
+ self._set_ignore_dtd()
+
self._parsing = 0
self._entity_stack = []
@@ -417,7 +483,7 @@
def skipped_entity_handler(self, name, is_pe):
if is_pe:
# The SAX spec requires to report skipped PEs with a '%'
- name = '%'+name
+ name = '%' + name
self._cont_handler.skippedEntity(name)
# ---
diff -r 4a254750ad20 Lib/xml/sax/handler.py
--- a/Lib/xml/sax/handler.py Sun May 17 19:36:16 2015 -0400
+++ b/Lib/xml/sax/handler.py Mon May 18 02:40:52 2015 +0000
@@ -277,12 +277,28 @@
# DTD subset.
# access: (parsing) read-only; (not parsing) read/write
+feature_max_entity_indirections = \
+ "http://www.python.org/sax/features/max-entity-indirections"
+
+feature_max_entity_expansions = \
+ "http://www.python.org/sax/features/max-entity-expansions"
+
+feature_ignore_dtd = "http://www.python.org/sax/features/ignore-dtd"
+# true: Ignore all doctype information and reset all doctype information
+# after the DTD block has been parsed. No DTD relevant handler is
+# called.
+# false: Obey DTD
+# access: (parsing) read-only; (not parsing) read/write
+
all_features = [feature_namespaces,
feature_namespace_prefixes,
feature_string_interning,
feature_validation,
feature_external_ges,
- feature_external_pes]
+ feature_external_pes,
+ feature_max_entity_indirections,
+ feature_max_entity_expansions,
+ feature_ignore_dtd]
#============================================================================
diff -r 4a254750ad20 Modules/_elementtree.c
--- a/Modules/_elementtree.c Sun May 17 19:36:16 2015 -0400
+++ b/Modules/_elementtree.c Mon May 18 02:40:52 2015 +0000
@@ -3201,20 +3201,85 @@
return (PyObject *)self;
}
+/* TODO: The last three parameters (max_entity_indirections/expansions and
+ignore_dtd) are only meant to be there when XML_BOMB_PROTECTION is defined.
+*/
/*[clinic input]
_elementtree.XMLParser.__init__
html: object = NULL
target: object = NULL
encoding: str(accept={str, NoneType}) = NULL
+ max_entity_indirections as indirections: object = NULL
+ max_entity_expansions as expansions: object = NULL
+ ignore_dtd: object = NULL
[clinic start generated code]*/
static int
_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
- PyObject *target, const char *encoding)
-/*[clinic end generated code: output=d6a16c63dda54441 input=155bc5695baafffd]*/
+ PyObject *target, const char *encoding,
+ PyObject *indirections,
+ PyObject *expansions,
+ PyObject *ignore_dtd)
+/*[clinic end generated code: output=fdc9af25bcba1e88 input=4192203c4c9cff06]*/
{
+ long ignore_dtd_flag = 0;
+#ifdef XML_BOMB_PROTECTION
+ long max_indirections;
+ long max_expansions;
+ if (indirections == NULL) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &max_indirections)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else {
+ max_indirections = PyLong_AsLong(indirections);
+ if ((max_indirections == -1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if ((max_indirections > UINT_MAX) || (max_indirections < 0)) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_indirections be between 0 and %i",
+ UINT_MAX);
+ return -1;
+ }
+ }
+
+ if (expansions == NULL) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &max_expansions)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else {
+ max_expansions = PyLong_AsLong(expansions);
+ if ((max_expansions == -1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if ((max_expansions > UINT_MAX) || (max_expansions < 0)) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_expansions be between 0 and %i",
+ UINT_MAX);
+ return -1;
+ }
+ }
+
+ if (ignore_dtd == NULL) {
+ if (!EXPAT(GetFeatureDefault)(XML_FEATURE_RESET_DTD,
+ &ignore_dtd_flag)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ }
+ else if ((ignore_dtd_flag = PyObject_IsTrue(ignore_dtd)) == -1) {
+ return -1;
+ }
+#endif
+
self->entity = PyDict_New();
if (!self->entity)
return -1;
@@ -3233,6 +3298,27 @@
return -1;
}
+#ifdef XML_BOMB_PROTECTION
+ if (!EXPAT(SetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ max_indirections)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ if (!EXPAT(SetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ max_expansions)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ if (!EXPAT(SetFeature)(self->parser,
+ XML_FEATURE_RESET_DTD,
+ ignore_dtd_flag)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+#endif
+
if (target) {
Py_INCREF(target);
} else {
@@ -3252,7 +3338,9 @@
self->handle_comment = PyObject_GetAttrString(target, "comment");
self->handle_pi = PyObject_GetAttrString(target, "pi");
self->handle_close = PyObject_GetAttrString(target, "close");
- self->handle_doctype = PyObject_GetAttrString(target, "doctype");
+ if (!ignore_dtd_flag) {
+ self->handle_doctype = PyObject_GetAttrString(target, "doctype");
+ }
PyErr_Clear();
@@ -3281,10 +3369,12 @@
self->parser,
(XML_ProcessingInstructionHandler) expat_pi_handler
);
- EXPAT(SetStartDoctypeDeclHandler)(
- self->parser,
- (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
- );
+ if (!ignore_dtd_flag) {
+ EXPAT(SetStartDoctypeDeclHandler)(
+ self->parser,
+ (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
+ );
+ }
EXPAT(SetUnknownEncodingHandler)(
self->parser,
EXPAT(DefaultUnknownEncodingHandler), NULL
@@ -3647,6 +3737,35 @@
"Expat %d.%d.%d", XML_MAJOR_VERSION,
XML_MINOR_VERSION, XML_MICRO_VERSION);
}
+#ifdef XML_BOMB_PROTECTION
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "ignore_dtd") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser, XML_FEATURE_RESET_DTD,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+
+ return PyBool_FromLong(value);
+ }
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) {
+ long value = -1;
+ if (!EXPAT(GetFeature)(self->parser,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+#endif
else
goto generic;
diff -r 4a254750ad20 Modules/clinic/_elementtree.c.h
--- a/Modules/clinic/_elementtree.c.h Sun May 17 19:36:16 2015 -0400
+++ b/Modules/clinic/_elementtree.c.h Mon May 18 02:40:52 2015 +0000
@@ -565,21 +565,27 @@
static int
_elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *html,
- PyObject *target, const char *encoding);
+ PyObject *target, const char *encoding,
+ PyObject *indirections,
+ PyObject *expansions,
+ PyObject *ignore_dtd);
static int
_elementtree_XMLParser___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;
- static char *_keywords[] = {"html", "target", "encoding", NULL};
+ static char *_keywords[] = {"html", "target", "encoding", "max_entity_indirections", "max_entity_expansions", "ignore_dtd", NULL};
PyObject *html = NULL;
PyObject *target = NULL;
const char *encoding = NULL;
+ PyObject *indirections = NULL;
+ PyObject *expansions = NULL;
+ PyObject *ignore_dtd = NULL;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOz:XMLParser", _keywords,
- &html, &target, &encoding))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOzOOO:XMLParser", _keywords,
+ &html, &target, &encoding, &indirections, &expansions, &ignore_dtd))
goto exit;
- return_value = _elementtree_XMLParser___init___impl((XMLParserObject *)self, html, target, encoding);
+ return_value = _elementtree_XMLParser___init___impl((XMLParserObject *)self, html, target, encoding, indirections, expansions, ignore_dtd);
exit:
return return_value;
@@ -663,4 +669,4 @@
exit:
return return_value;
}
-/*[clinic end generated code: output=119aed84c1545187 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=ac5bcea68d43321e input=a9049054013a1b77]*/
diff -r 4a254750ad20 Modules/expat/expat.h
--- a/Modules/expat/expat.h Sun May 17 19:36:16 2015 -0400
+++ b/Modules/expat/expat.h Mon May 18 02:40:52 2015 +0000
@@ -5,6 +5,20 @@
#ifndef Expat_INCLUDED
#define Expat_INCLUDED 1
+#define XML_BOMB_PROTECTION 1 /* Python only: hard coded */
+
+#ifdef COMPILED_FROM_DSP
+#include "winconfig.h"
+#elif defined(MACOS_CLASSIC)
+#include "macconfig.h"
+#elif defined(__amigaos__)
+#include "amigaconfig.h"
+#elif defined(__WATCOMC__)
+#include "watcomconfig.h"
+#elif defined(HAVE_EXPAT_CONFIG_H)
+#include
+#endif /* ndef COMPILED_FROM_DSP */
+
#ifdef __VMS
/* 0 1 2 3 0 1 2 3
1234567890123456789012345678901 1234567890123456789012345678901 */
@@ -96,6 +110,12 @@
XML_ERROR_RESERVED_PREFIX_XML,
XML_ERROR_RESERVED_PREFIX_XMLNS,
XML_ERROR_RESERVED_NAMESPACE_URI
+#ifdef XML_BOMB_PROTECTION
+ /* Added in 2.2. */
+ ,
+ XML_ERROR_ENTITY_INDIRECTIONS,
+ XML_ERROR_ENTITY_EXPANSION
+#endif
};
enum XML_Content_Type {
@@ -1020,6 +1040,13 @@
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
XML_FEATURE_ATTR_INFO
+#ifdef XML_BOMB_PROTECTION
+ /* Added in 2.2. */
+ ,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ XML_FEATURE_RESET_DTD
+#endif
/* Additional features must be added to the end of this enum. */
};
@@ -1032,6 +1059,92 @@
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
+/* Protection against XML bomb DoS attacks
+ Added in 2.2.
+ */
+#ifdef XML_BOMB_PROTECTION
+
+/* XML_FEATURE_MAX_ENTITY_INDIRECTIONS
+
+ Limit the amount of indirections that are allowed to occur during the
+ expansion of a nested entity. A counter starts when an entity reference
+ is encountered. It resets after the entity is fully expanded. The limit
+ protects the parser against exponential entity expansion attacks (aka
+ billion laughs attack). When the limit is exceeded the parser stops and
+ fails with `XML_ERROR_ENTITY_INDIRECTIONS`.
+ A value of 0 disables the protection.
+
+ Supported range: 0 .. UINT_MAX
+ Default: 40
+ */
+
+#ifndef XML_DEFAULT_MAX_ENTITY_INDIRECTIONS
+#define XML_DEFAULT_MAX_ENTITY_INDIRECTIONS 40
+#endif
+
+/* XML_FEATURE_MAX_ENTITY_EXPANSIONS
+
+ Limit the total length of all entity expansions throughout the entire
+ document. The lengths of all entities are accumulated in a parser variable.
+ The setting protects against quadratic blowup attacks (lots of expansions
+ of a large entity declaration). When the sum of all entities exceeds
+ the limit, the parser stops and fails with `XML_ERROR_ENTITY_EXPANSION`.
+ A value of 0 disables the protection.
+
+ Supported range: 0 .. UINT_MAX
+ Default: 8 MB
+ */
+#ifndef XML_DEFAULT_MAX_ENTITY_EXPANSIONS
+#define XML_DEFAULT_MAX_ENTITY_EXPANSIONS 1 << 23 /* 8 MiB */
+#endif
+
+/* XML_FEATURE_RESET_DTD
+
+ Reset all DTD information after the block has been parsed. When
+ the flag is set (default: false) all DTD information after the
+ endDoctypeDeclHandler has been called. The flag can be set inside the
+ endDoctypeDeclHandler. Without DTD information any entity reference in
+ the document body leads to a XML_ERROR_UNDEFINED_ENTITY.
+
+ Supported range: 0, 1
+ Default: 0
+ */
+#ifndef XML_DEFAULT_DTD_RESET
+#define XML_DEFAULT_DTD_RESET XML_FALSE
+#endif
+
+/* Feature modifiers
+
+ On success the functions shall return 1 and modify or retrieve the value.
+
+ Otherwise, 0 shall be returned and errno set to indicate an error. The
+ value shall not be modified if a function signals an error.
+
+ ENOENT feature is not supported
+ EINVAL value is invalid and outside the allowed range
+
+ As of now three features are supported:
+ - XML_FEATURE_MAX_ENTITY_INDIRECTIONS
+ - XML_FEATURE_MAX_ENTITY_EXPANSIONS
+ - XML_FEATURE_RESET_DTD
+
+ */
+
+/* Get / set feature of XML parser instance
+ */
+int XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature,
+ long *value);
+
+int XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature,
+ long value);
+
+/* Get / set global default
+ */
+int XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value);
+int XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value);
+
+#endif /* XML_BOMB_PROTECTION */
+
/* Expat follows the GNU/Linux convention of odd number minor version for
beta/development releases and even number minor version for stable
diff -r 4a254750ad20 Modules/expat/xmlparse.c
--- a/Modules/expat/xmlparse.c Sun May 17 19:36:16 2015 -0400
+++ b/Modules/expat/xmlparse.c Mon May 18 02:40:52 2015 +0000
@@ -4,23 +4,12 @@
#define XML_BUILDING_EXPAT 1
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#elif defined(HAVE_EXPAT_CONFIG_H)
-#include
-#endif /* ndef COMPILED_FROM_DSP */
-
#include
#include /* memset(), memcpy() */
#include
#include /* UINT_MAX */
#include /* time() */
+#include
#include "ascii.h"
#include "expat.h"
@@ -141,6 +130,12 @@
#define EXPAND_SPARE 24
+#ifdef XML_BOMB_PROTECTION
+static unsigned int defaultMaxEntityIndirections = XML_DEFAULT_MAX_ENTITY_INDIRECTIONS;
+static unsigned int defaultMaxEntityExpansions = XML_DEFAULT_MAX_ENTITY_EXPANSIONS;
+static XML_Bool defaultResetDTDFlag = XML_DEFAULT_DTD_RESET;
+#endif
+
typedef struct binding {
struct prefix *prefix;
struct binding *nextTagBinding;
@@ -151,6 +146,11 @@
int uriAlloc;
} BINDING;
+/* Python only: workaround for PREFIX macro in PC/pyconfig.h */
+#ifdef PREFIX
+#undef PREFIX
+#endif
+
typedef struct prefix {
const XML_Char *name;
BINDING *binding;
@@ -557,6 +557,13 @@
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
+#ifdef XML_BOMB_PROTECTION
+ unsigned int m_entityIndirections;
+ unsigned int m_maxEntityIndirections;
+ unsigned int m_entityExpansions;
+ unsigned int m_maxEntityExpansions;
+ XML_Bool m_resetDTDFlag;
+#endif /* XML_BOMB_PROTECTION */
};
#define MALLOC(s) (parser->m_mem.malloc_fcn((s)))
@@ -666,6 +673,13 @@
#define paramEntityParsing (parser->m_paramEntityParsing)
#endif /* XML_DTD */
#define hash_secret_salt (parser->m_hash_secret_salt)
+#ifdef XML_BOMB_PROTECTION
+#define entityIndirections (parser->m_entityIndirections)
+#define maxEntityIndirections (parser->m_maxEntityIndirections)
+#define entityExpansions (parser->m_entityExpansions)
+#define maxEntityExpansions (parser->m_maxEntityExpansions)
+#define resetDTDFlag (parser->m_resetDTDFlag)
+#endif /* XML_BOMB_PROTECTION */
XML_Parser XMLCALL
XML_ParserCreate(const XML_Char *encodingName)
@@ -756,6 +770,13 @@
buffer = NULL;
bufferLim = NULL;
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections = 0;
+ maxEntityIndirections = defaultMaxEntityIndirections;
+ entityExpansions = 0;
+ maxEntityExpansions = defaultMaxEntityExpansions;
+ resetDTDFlag = defaultResetDTDFlag;
+#endif
attsSize = INIT_ATTS_SIZE;
atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE));
@@ -1886,6 +1907,109 @@
return position.columnNumber;
}
+#ifdef XML_BOMB_PROTECTION
+
+int XMLCALL
+XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long *value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ *value = (long)maxEntityIndirections;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ *value = (long)maxEntityExpansions;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ *value = (long)resetDTDFlag;
+ return 1;
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ maxEntityIndirections = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ maxEntityExpansions = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ if ((value == 0) || (value == 1)) {
+ resetDTDFlag = (XML_Bool)value;
+ return 1;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ *value = (long)defaultMaxEntityIndirections;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ *value = (long)defaultMaxEntityExpansions;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ *value = (long)defaultResetDTDFlag;
+ return 1;
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+int XMLCALL
+XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value) {
+ switch (feature) {
+ case XML_FEATURE_MAX_ENTITY_INDIRECTIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ defaultMaxEntityIndirections = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_MAX_ENTITY_EXPANSIONS:
+ if ((value < 0) || (value > UINT_MAX)) {
+ errno = EINVAL;
+ return 0;
+ }
+ defaultMaxEntityExpansions = (unsigned int)value;
+ return 1;
+ case XML_FEATURE_RESET_DTD:
+ if ((value == 0) || (value == 1)) {
+ defaultResetDTDFlag = (XML_Bool)value;
+ return 1;
+ } else {
+ errno = EINVAL;
+ return 0;
+ }
+ default:
+ errno = ENOENT;
+ return 0;
+ }
+}
+
+#endif
+
+
void XMLCALL
XML_FreeContentModel(XML_Parser parser, XML_Content *model)
{
@@ -1969,6 +2093,11 @@
XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"),
XML_L("reserved prefix (xmlns) must not be declared or undeclared"),
XML_L("prefix must not be bound to one of the reserved namespace names")
+#ifdef XML_BOMB_PROTECTION
+ ,
+ XML_L("entity indirection limit exceeded"),
+ XML_L("document's entity expansion limit exceeded")
+#endif
};
if (code > 0 && code < sizeof(message)/sizeof(message[0]))
return message[code];
@@ -2040,6 +2169,17 @@
#ifdef XML_ATTR_INFO
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
#endif
+#ifdef XML_BOMB_PROTECTION
+ {XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ XML_L("XML_FEATURE_MAX_ENTITY_INDIRECTIONS"),
+ XML_DEFAULT_MAX_ENTITY_INDIRECTIONS},
+ {XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ XML_L("XML_FEATURE_MAX_ENTITY_EXPANSIONS"),
+ XML_DEFAULT_MAX_ENTITY_EXPANSIONS},
+ {XML_FEATURE_RESET_DTD,
+ XML_L("XML_FEATURE_RESET_DTD"),
+ XML_DEFAULT_DTD_RESET},
+#endif
{XML_FEATURE_END, NULL, 0}
};
@@ -2238,9 +2378,15 @@
{
/* save one level of indirection */
DTD * const dtd = _dtd;
-
const char **eventPP;
const char **eventEndPP;
+
+#ifdef XML_BOMB_PROTECTION
+ if (haveMore) {
+ entityIndirections = 0;
+ }
+#endif
+
if (enc == encoding) {
eventPP = &eventPtr;
eventEndPP = &eventEndPtr;
@@ -3974,6 +4120,11 @@
endDoctypeDeclHandler(handlerArg);
handleDefault = XML_FALSE;
}
+#ifdef XML_BOMB_PROTECTION
+ if (resetDTDFlag) {
+ dtdReset(dtd, &parser->m_mem);
+ }
+#endif
break;
case XML_ROLE_INSTANCE_START:
#ifdef XML_DTD
@@ -4800,6 +4951,22 @@
enum XML_Error result;
OPEN_INTERNAL_ENTITY *openEntity;
+#ifdef XML_BOMB_PROTECTION
+ if (maxEntityIndirections && (entityIndirections > maxEntityIndirections)) {
+ return XML_ERROR_ENTITY_INDIRECTIONS;
+ }
+ if (maxEntityExpansions) {
+ if (entity->textLen > UINT_MAX - entityExpansions) {
+ /* overflow */
+ return XML_ERROR_ENTITY_EXPANSION;
+ }
+ entityExpansions = entityExpansions + entity->textLen;
+ if (entityExpansions > maxEntityExpansions) {
+ return XML_ERROR_ENTITY_EXPANSION;
+ }
+ }
+#endif
+
if (freeInternalEntities) {
openEntity = freeInternalEntities;
freeInternalEntities = openEntity->next;
@@ -4824,13 +4991,21 @@
#ifdef XML_DTD
if (entity->is_param) {
int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections++;
+#endif
result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
next, &next, XML_FALSE);
}
else
#endif /* XML_DTD */
+ {
+#ifdef XML_BOMB_PROTECTION
+ entityIndirections++;
+#endif
result = doContent(parser, tagLevel, internalEncoding, textStart,
textEnd, &next, XML_FALSE);
+ }
if (result == XML_ERROR_NONE) {
if (textEnd != next && ps_parsing == XML_SUSPENDED) {
diff -r 4a254750ad20 Modules/expat/xmlrole.c
--- a/Modules/expat/xmlrole.c Sun May 17 19:36:16 2015 -0400
+++ b/Modules/expat/xmlrole.c Mon May 18 02:40:52 2015 +0000
@@ -2,22 +2,9 @@
See the file COPYING for copying permission.
*/
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#else
-#ifdef HAVE_EXPAT_CONFIG_H
-#include
-#endif
-#endif /* ndef COMPILED_FROM_DSP */
-
#include
+#include "expat.h"
#include "expat_external.h"
#include "internal.h"
#include "xmlrole.h"
diff -r 4a254750ad20 Modules/expat/xmltok.c
--- a/Modules/expat/xmltok.c Sun May 17 19:36:16 2015 -0400
+++ b/Modules/expat/xmltok.c Mon May 18 02:40:52 2015 +0000
@@ -2,22 +2,9 @@
See the file COPYING for copying permission.
*/
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos__)
-#include "amigaconfig.h"
-#elif defined(__WATCOMC__)
-#include "watcomconfig.h"
-#else
-#ifdef HAVE_EXPAT_CONFIG_H
-#include
-#endif
-#endif /* ndef COMPILED_FROM_DSP */
-
#include
+#include "expat.h"
#include "expat_external.h"
#include "internal.h"
#include "xmltok.h"
diff -r 4a254750ad20 Modules/pyexpat.c
--- a/Modules/pyexpat.c Sun May 17 19:36:16 2015 -0400
+++ b/Modules/pyexpat.c Mon May 18 02:40:52 2015 +0000
@@ -1071,6 +1071,9 @@
APPEND(rc, "buffer_text");
APPEND(rc, "buffer_used");
APPEND(rc, "namespace_prefixes");
+ APPEND(rc, "max_entity_expansions");
+ APPEND(rc, "max_entity_indirections");
+ APPEND(rc, "reset_dtd");
APPEND(rc, "ordered_attributes");
APPEND(rc, "specified_attributes");
APPEND(rc, "intern");
@@ -1318,6 +1321,28 @@
if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
return PyLong_FromLong((long) self->buffer_used);
}
+#ifdef XML_BOMB_PROTECTION
+ if (first_char == 'm') {
+ if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+ }
+ }
+#endif
if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
return get_pybool(self->ns_prefixes);
if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
@@ -1334,6 +1359,15 @@
return self->intern;
}
}
+#ifdef XML_BOMB_PROTECTION
+ if (PyUnicode_CompareWithASCIIString(nameobj, "reset_dtd") == 0) {
+ long value = -1;
+ if (!XML_GetFeature(self->itself, XML_FEATURE_RESET_DTD, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyBool_FromLong(value);
+ }
+#endif
generic:
return PyObject_GenericGetAttr((PyObject*)self, nameobj);
}
@@ -1472,6 +1506,65 @@
self->buffer_size = new_buffer_size;
return 0;
}
+#ifdef XML_BOMB_PROTECTION
+ if (PyUnicode_CompareWithASCIIString(name, "max_entity_expansions") == 0) {
+ unsigned long value;
+
+ value = PyLong_AsUnsignedLong(v);
+ if ((value == (unsigned long)-1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if (value > UINT_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_expansions must not be greater than %i",
+ UINT_MAX);
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_EXPANSIONS,
+ value)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (PyUnicode_CompareWithASCIIString(name, "max_entity_indirections") == 0) {
+ unsigned long value;
+
+ value = PyLong_AsUnsignedLong(v);
+ if ((value == (unsigned long)-1) && PyErr_Occurred()) {
+ return -1;
+ }
+ if (value > UINT_MAX) {
+ PyErr_Format(PyExc_ValueError,
+ "max_entity_indirections must not be greater than %i",
+ UINT_MAX);
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself,
+ XML_FEATURE_MAX_ENTITY_INDIRECTIONS,
+ value)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (PyUnicode_CompareWithASCIIString(name, "reset_dtd") == 0) {
+ int value;
+
+ if ((value = PyObject_IsTrue(v)) == -1) {
+ return -1;
+ }
+ if (!XML_SetFeature(self->itself, XML_FEATURE_RESET_DTD,
+ value ? XML_TRUE : XML_FALSE)) {
+ PyErr_SetFromErrno(PyExc_ValueError);
+ return -1;
+ }
+ return 0;
+ }
+#endif
if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
/* If we're changing the character data handler, flush all
@@ -1608,9 +1701,124 @@
/* List of methods defined in the module */
+#ifdef XML_BOMB_PROTECTION
+/* TODO: Convert to Argument Clinic */
+PyDoc_STRVAR(pyexpat_set_reset_dtd_doc,
+"set_reset_dtd(n)\n"
+);
+
+static PyObject *
+pyexpat_set_reset_dtd(PyObject *self, PyObject *args)
+{
+ PyObject *pre;
+ long value;
+ if (!PyArg_ParseTuple(args, "O:set_reset_dtd", &pre))
+ return NULL;
+ if ((value = (long)PyObject_IsTrue(pre)) == -1 ) {
+ return NULL;
+ }
+ if (!XML_SetFeatureDefault(XML_FEATURE_RESET_DTD, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_reset_dtd_doc,
+"get_reset_dtd(n)\n"
+);
+
+static PyObject *
+pyexpat_get_reset_dtd(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_RESET_DTD, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyBool_FromLong(value);
+}
+
+PyDoc_STRVAR(pyexpat_set_max_entity_expansions_doc,
+"set_max_entity_expansions(n)\n"
+);
+
+static PyObject *
+pyexpat_set_max_entity_expansions(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!PyArg_ParseTuple(args, "l:set_max_entity_expansions", &value))
+ return NULL;
+ if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_max_entity_expansions_doc,
+"get_max_entity_expansions(n)\n"
+);
+
+static PyObject *
+pyexpat_get_max_entity_expansions(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+}
+
+PyDoc_STRVAR(pyexpat_set_max_entity_indirections_doc,
+"set_max_entity_indirections(n)\n"
+);
+
+static PyObject *
+pyexpat_set_max_entity_indirections(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!PyArg_ParseTuple(args, "l:set_max_entity_indirections", &value))
+ return NULL;
+ if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+PyDoc_STRVAR(pyexpat_get_max_entity_indirections_doc,
+"get_max_entity_indirections(n)\n"
+);
+
+static PyObject *
+pyexpat_get_max_entity_indirections(PyObject *self, PyObject *args)
+{
+ long value;
+ if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, &value)) {
+ return PyErr_SetFromErrno(PyExc_ValueError);
+ }
+ return PyLong_FromLong(value);
+}
+#endif
+
+
static struct PyMethodDef pyexpat_methods[] = {
PYEXPAT_PARSERCREATE_METHODDEF
PYEXPAT_ERRORSTRING_METHODDEF
+#ifdef XML_BOMB_PROTECTION
+ {"set_reset_dtd", (PyCFunction)pyexpat_set_reset_dtd, METH_VARARGS,
+ pyexpat_set_reset_dtd_doc},
+ {"get_reset_dtd", (PyCFunction)pyexpat_get_reset_dtd, METH_NOARGS,
+ pyexpat_get_reset_dtd_doc},
+ {"set_max_entity_expansions", (PyCFunction)pyexpat_set_max_entity_expansions,
+ METH_VARARGS, pyexpat_set_max_entity_expansions_doc},
+ {"get_max_entity_expansions", (PyCFunction)pyexpat_get_max_entity_expansions,
+ METH_NOARGS, pyexpat_get_max_entity_expansions_doc},
+ {"set_max_entity_indirections", (PyCFunction)pyexpat_set_max_entity_indirections,
+ METH_VARARGS, pyexpat_set_max_entity_indirections_doc},
+ {"get_max_entity_indirections", (PyCFunction)pyexpat_get_max_entity_indirections,
+ METH_NOARGS, pyexpat_get_max_entity_indirections_doc},
+#endif
{NULL, NULL} /* sentinel */
};
@@ -1836,6 +2044,17 @@
MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
+#ifdef XML_BOMB_PROTECTION
+ MYCONST(XML_DEFAULT_MAX_ENTITY_INDIRECTIONS);
+ MYCONST(XML_DEFAULT_MAX_ENTITY_EXPANSIONS);
+ PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_True);
+ Py_INCREF(Py_True);
+#else
+ PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_INDIRECTIONS", 0);
+ PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_EXPANSIONS", 0);
+ PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_False);
+ Py_INCREF(Py_False);
+#endif
#undef MYCONST
#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
@@ -1879,6 +2098,12 @@
capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
capi.SetEncoding = XML_SetEncoding;
capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
+#ifdef XML_BOMB_PROTECTION
+ capi.GetFeature = XML_GetFeature;
+ capi.SetFeature = XML_SetFeature;
+ capi.GetFeatureDefault = XML_GetFeatureDefault;
+ capi.SetFeatureDefault = XML_SetFeatureDefault;
+#endif
/* export using capsule */
capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);