diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -109,3 +109,4 @@ 88a0792e8ba3e4916b24c7e7a522c277d326d66e v3.3.0rc2 c191d21cefafb3832c45570e84854e309aa62eaa v3.3.0rc3 bd8afb90ebf28ba4edc901d4a235f75e7bbc79fd v3.3.0 +afc380863975b201e162f94a101f15e37ca233a6 xmlbomb-2.7-merge1 diff --git a/Include/pyexpat.h b/Include/pyexpat.h --- a/Include/pyexpat.h +++ b/Include/pyexpat.h @@ -3,7 +3,11 @@ /* note: you must import expat.h before importing this module! */ +#ifdef XML_BOMB_PROTECTION +#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1" +#else #define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.0" +#endif #define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI" struct PyExpat_CAPI @@ -45,6 +49,15 @@ void (*SetUserData)(XML_Parser parser, void *userData); void (*SetStartDoctypeDeclHandler)(XML_Parser parser, XML_StartDoctypeDeclHandler start); +#ifdef XML_BOMB_PROTECTION + /* CAPI 1.1 bomb protection additions */ + int (*GetFeature)(XML_Parser parser, enum XML_FeatureEnum feature, + long *value); + int (*SetFeature)(XML_Parser parser, enum XML_FeatureEnum feature, + long value); + int (*GetFeatureDefault)(enum XML_FeatureEnum feature, long *value); + int (*SetFeatureDefault)(enum XML_FeatureEnum feature, long value); +#endif /* always add new stuff to the end! */ }; diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -7,7 +7,9 @@ from xml.parsers import expat from xml.parsers.expat import errors -from test.support import sortdict, run_unittest +from test.support import sortdict, run_unittest, findfile + +XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata") class SetAttributeTest(unittest.TestCase): @@ -95,11 +97,11 @@ def NotationDeclHandler(self, *args): name, base, sysid, pubid = args - self.out.append('Notation declared: %s' %(args,)) + self.out.append('Notation declared: %s' % (args,)) def UnparsedEntityDeclHandler(self, *args): entityName, base, systemId, publicId, notationName = args - self.out.append('Unparsed entity decl: %s' %(args,)) + self.out.append('Unparsed entity decl: %s' % (args,)) def NotStandaloneHandler(self): self.out.append('Not standalone') @@ -107,7 +109,7 @@ def ExternalEntityRefHandler(self, *args): context, base, sysId, pubId = args - self.out.append('External entity ref: %s' %(args[1:],)) + self.out.append('External entity ref: %s' % (args[1:],)) return 1 def StartDoctypeDeclHandler(self, *args): @@ -436,7 +438,7 @@ 'too many parser events') expected = self.expected_list[self.upto] self.assertEqual(pos, expected, - 'Expected position %s, got position %s' %(pos, expected)) + 'Expected position %s, got position %s' % (pos, expected)) self.upto += 1 def test(self): @@ -491,8 +493,8 @@ self.assertRaises(ValueError, f, 0) def test_unchanged_size(self): - xml1 = b"" + b'a' * 512 - xml2 = b'a'*512 + b'' + xml1 = ("%s" % ('a' * 512)) + xml2 = 'a' * 512 + '' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 512 @@ -582,7 +584,7 @@ parser.buffer_size = 2048 self.assertEqual(parser.buffer_size, 2048) - self.n=0 + self.n = 0 parser.Parse(xml1, 0) parser.buffer_size = parser.buffer_size // 2 self.assertEqual(parser.buffer_size, 1024) @@ -677,6 +679,97 @@ b"") self.assertEqual(handler_call_args, [("bar", "baz")]) +quadratic_bomb = b"""\ + + + +]> +&a; +""" + +class XmlBombTest(unittest.TestCase): + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_exponential(self): + # test that the maximum indirection limitation prevents exponential + # entity expansion attacks (billion laughs). Every expansion increases + # the indirection level. The result of an expansion is never cached. + p = expat.ParserCreate() + self.assertEqual(p.max_entity_indirections, 40) + p.max_entity_indirections = 71 + with self.assertRaises(expat.ExpatError) as e: + with open(XMLBOMB_XMLFILE, "rb") as f: + p.ParseFile(f) + self.assertEqual(str(e.exception), "entity indirection limit exceeded: line 7, column 6") + + p = expat.ParserCreate() + p.max_entity_indirections = 0 + with open(XMLBOMB_XMLFILE, "rb") as f: + p.ParseFile(f) + + p = expat.ParserCreate() + p.max_entity_indirections = 72 # 8 * 8 + 8 + with open(XMLBOMB_XMLFILE, "rb") as f: + p.ParseFile(f) + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_quadratic(self): + # test that the total amount of expanded entities chars is limited to + # prevent quadratic blowout attacks. + p = expat.ParserCreate() + self.assertEqual(p.max_entity_expansions, 8 * 1024 ** 2) + + # lower limit to 1024, must fail with one entity of 1025 chars + p.max_entity_expansions = 1024 + xml = quadratic_bomb.replace(b"MARK", b"a" * 1025) + with self.assertRaises(expat.ExpatError) as e: + p.Parse(xml) + self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6") + + # but passes with an entity of 1024 chars + xml = quadratic_bomb.replace(b"MARK", b"a" * 1024) + p = expat.ParserCreate() + p.max_entity_expansions = 1024 + p.Parse(xml) + + # one level of indirection, b = "&a;&a;" adds 6 chars + xml = quadratic_bomb.replace(b"MARK", b"a" * 512) + xml = xml.replace(b"&a;", b"&b;") + p = expat.ParserCreate() + p.max_entity_expansions = 1024 + with self.assertRaises(expat.ExpatError) as e: + p.Parse(xml) + self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6") + + p = expat.ParserCreate() + p.max_entity_expansions = 1030 # 2 * x512 + 6 + p.Parse(xml) + + # test default limit of 8 MB + xml = quadratic_bomb.replace(b"MARK", b"a" * 2 * 1024 ** 2) + xml = xml.replace(b"&a;", b"&c;") + p = expat.ParserCreate() + with self.assertRaises(expat.ExpatError) as e: + p.Parse(xml) + self.assertEqual(str(e.exception), "document's entity expansion limit exceeded: line 6, column 6") + + # disabled limit + p = expat.ParserCreate() + p.max_entity_expansions = 0 + p.Parse(xml) + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_resetdtd(self): + # with reset_dtd all DTD information are ignored + p = expat.ParserCreate() + self.assertEqual(p.reset_dtd, False) + p.reset_dtd = True + with self.assertRaises(expat.ExpatError) as e: + with open(XMLBOMB_XMLFILE, "rb") as f: + p.ParseFile(f) + self.assertEqual(str(e.exception), "undefined entity: line 7, column 6") + def test_main(): run_unittest(SetAttributeTest, @@ -690,7 +783,8 @@ ChardataBufferTest, MalformedInputTest, ErrorMessageTest, - ForeignDTDTests) + ForeignDTDTests, + XmlBombTest) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_sax.py b/Lib/test/test_sax.py --- a/Lib/test/test_sax.py +++ b/Lib/test/test_sax.py @@ -11,8 +11,11 @@ from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ XMLFilterBase from xml.sax.expatreader import create_parser -from xml.sax.handler import feature_namespaces +from xml.sax.handler import (feature_namespaces, + feature_max_entity_indirections, feature_max_entity_expansions, + feature_ignore_dtd, feature_external_ges) from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl +from xml.parsers import expat from io import BytesIO, StringIO import os.path import shutil @@ -22,6 +25,7 @@ TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") +XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata") try: TEST_XMLFILE.encode("utf-8") TEST_XMLFILE_OUT.encode("utf-8") @@ -588,7 +592,7 @@ def __init__(self): self._notations = [] - self._entities = [] + self._entities = [] def notationDecl(self, name, publicId, systemId): self._notations.append((name, publicId, systemId)) @@ -623,6 +627,7 @@ def test_expat_entityresolver(self): parser = create_parser() + parser.setFeature(feature_external_ges, True) parser.setEntityResolver(self.TestEntityResolver()) result = BytesIO() parser.setContentHandler(XMLGenerator(result)) @@ -925,6 +930,91 @@ self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") +# =========================================================================== +# +# XML bomb DoS tests +# +# =========================================================================== + +class XmlBombTest(unittest.TestCase): + + def test_protection_features(self): + parser = create_parser() + self.assertEqual(parser.getFeature(feature_max_entity_indirections), + expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS) + self.assertEqual(parser.getFeature(feature_max_entity_expansions), + expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS) + self.assertFalse(parser.getFeature(feature_ignore_dtd)) + parser.setFeature(feature_ignore_dtd, True) + self.assertTrue(parser.getFeature(feature_ignore_dtd)) + if expat.XML_BOMB_PROTECTION: + parser.setFeature(feature_max_entity_indirections, 100) + self.assertEqual(parser.getFeature(feature_max_entity_indirections), + 100) + parser.setFeature(feature_max_entity_expansions, 1024) + self.assertEqual(parser.getFeature(feature_max_entity_expansions), + 1024) + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_exponential(self): + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with self.assertRaises(SAXParseException) as e: + with open(XMLBOMB_XMLFILE, "rb") as f: + inpsrc.setByteStream(f) + parser.parse(inpsrc) + self.assertEqual(str(e.exception), + ":7:6: entity indirection limit exceeded") + + parser = create_parser() + parser.setFeature(feature_max_entity_indirections, 72) + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + with open(XMLBOMB_XMLFILE, "rb") as f: + inpsrc.setByteStream(f) + parser.parse(inpsrc) + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_quadratic(self): + parser = create_parser() + result = StringIO() + parser.setContentHandler(XMLGenerator(result)) + parser.setFeature(feature_max_entity_expansions, 1024) + + parser.feed('\n' % ('a' * 1025)) + parser.feed(']>\n') + with self.assertRaises(SAXParseException) as e: + parser.feed('&a;') + self.assertEqual(str(e.exception), + ":4:5: document's entity expansion limit " + "exceeded") + + @unittest.skipUnless(expat.XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb_resetdtd(self): + parser = create_parser() + result = StringIO() + xmlgen = XMLGenerator(result) + + parser.setContentHandler(xmlgen) + inpsrc = InputSource() + + parser.setFeature(feature_ignore_dtd, True) + with self.assertRaises(SAXParseException) as e: + with open(XMLBOMB_XMLFILE) as f: + inpsrc.setByteStream(f) + parser.parse(inpsrc) + self.assertEqual(str(e.exception), + ":7:6: undefined entity") + + def test_main(): run_unittest(MakeParserTest, SaxutilsTest, @@ -933,7 +1023,8 @@ WriterXmlgenTest, ExpatReaderTest, ErrorReportingTest, - XmlReaderTest) + XmlReaderTest, + XmlBombTest) if __name__ == "__main__": test_main() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -26,6 +26,12 @@ from test import support from test.support import TESTFN, findfile, unlink, import_fresh_module, gc_collect +try: + import pyexpat + XML_BOMB_PROTECTION = pyexpat.XML_BOMB_PROTECTION +except (ImportError, AttributeError): + XML_BOMB_PROTECTION = False + # pyET is the pure-Python implementation. # # ET is pyET in test_xml_etree and is the C accelerated version in @@ -39,6 +45,7 @@ except UnicodeEncodeError: raise unittest.SkipTest("filename is not encodable to utf8") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") +XMLBOMB_XMLFILE = findfile("xmlbomb.xml", subdir="xmltestdata") SAMPLE_XML = """\ @@ -87,6 +94,14 @@ """ +QUADRATIC_BOMB_TPL = """\ + + +]> +&a; +""" + def sanity(): """ Import sanity. @@ -1747,7 +1762,7 @@ expected = '<%s>' % elem serialized = serialize(ET.XML('<%s />' % elem), method='html') self.assertEqual(serialized, expected) - serialized = serialize(ET.XML('<%s>' % (elem,elem)), + serialized = serialize(ET.XML('<%s>' % (elem, elem)), method='html') self.assertEqual(serialized, expected) @@ -2490,6 +2505,57 @@ # -------------------------------------------------------------------- +class XmlBombTest(unittest.TestCase): + + @unittest.skipUnless(XML_BOMB_PROTECTION, "xml bomb protection n/a") + def test_xmlbomb(self): + limit = 8 * 1024 ** 2 + 1 + + # test entity indirections + with self.assertRaisesRegex(ET.ParseError, + 'entity indirection limit exceeded: ' + 'line 7, column 6'): + ET.parse(XMLBOMB_XMLFILE) + + parser = ET.XMLParser(max_entity_indirections=71) + with self.assertRaisesRegex(ET.ParseError, + 'entity indirection limit exceeded: ' + 'line 7, column 6'): + ET.parse(XMLBOMB_XMLFILE, parser=parser) + + parser = ET.XMLParser(max_entity_indirections=72) + ET.parse(XMLBOMB_XMLFILE, parser=parser) + + parser = ET.XMLParser(max_entity_indirections=0) + ET.parse(XMLBOMB_XMLFILE, parser=parser) + + # test ignore_dtd + parser = ET.XMLParser(ignore_dtd=True) + with self.assertRaisesRegex(ET.ParseError, + 'undefined entity: line 7, column 6'): + ET.parse(XMLBOMB_XMLFILE, parser=parser) + + xml = QUADRATIC_BOMB_TPL.replace("MARK", "a" * limit) + with self.assertRaisesRegex(ET.ParseError, + "document's entity expansion limit " + "exceeded: line 5, column 6"): + ET.fromstring(xml) + + parser = ET.XMLParser(max_entity_expansions=0) + e = ET.fromstring(xml, parser=parser) + self.assertEqual(e.text, "a" * limit) + + parser = ET.XMLParser(max_entity_expansions=limit + 1) + e = ET.fromstring(xml, parser=parser) + self.assertEqual(e.text, "a" * limit) + + parser = ET.XMLParser(ignore_dtd=True) + with self.assertRaisesRegex(ET.ParseError, + 'undefined entity: line 5, column 6'): + ET.fromstring(xml, parser=parser) + +# -------------------------------------------------------------------- + class CleanContext(object): """Provide default namespace mapping and path cache.""" @@ -2553,6 +2619,7 @@ ElementFindTest, ElementIterTest, TreeBuilderTest, + XmlBombTest, ] # These tests will only run for the pure-Python version that doesn't import diff --git a/Lib/test/xmltestdata/xmlbomb.xml b/Lib/test/xmltestdata/xmlbomb.xml new file mode 100644 --- /dev/null +++ b/Lib/test/xmltestdata/xmlbomb.xml @@ -0,0 +1,7 @@ + + + + +]> +&c; diff --git a/Lib/xml/dom/expatbuilder.py b/Lib/xml/dom/expatbuilder.py --- a/Lib/xml/dom/expatbuilder.py +++ b/Lib/xml/dom/expatbuilder.py @@ -172,13 +172,18 @@ def install(self, parser): """Install the callbacks needed to build the DOM into the parser.""" # This creates circular references! - parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + if not self._options.ignore_dtd: + parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler + parser.NotationDeclHandler = self.notation_decl_handler parser.StartElementHandler = self.first_element_handler parser.EndElementHandler = self.end_element_handler parser.ProcessingInstructionHandler = self.pi_handler if self._options.entities: + # or ignore_dtd parser.EntityDeclHandler = self.entity_decl_handler - parser.NotationDeclHandler = self.notation_decl_handler + if self._options.external_general_entities: + # or ignore_dtd + parser.ExternalEntityRefHandler = self.external_entity_ref_handler if self._options.comments: parser.CommentHandler = self.comment_handler if self._options.cdata_sections: @@ -187,10 +192,14 @@ parser.CharacterDataHandler = self.character_data_handler_cdata else: parser.CharacterDataHandler = self.character_data_handler - parser.ExternalEntityRefHandler = self.external_entity_ref_handler parser.XmlDeclHandler = self.xml_decl_handler parser.ElementDeclHandler = self.element_decl_handler parser.AttlistDeclHandler = self.attlist_decl_handler + if expat.XML_BOMB_PROTECTION: + options = self._options + parser.reset_dtd = options.ignore_dtd + parser.max_entity_indirections = options.max_entity_indirections + parser.max_entity_expansions = options.max_entity_expansions def parseFile(self, file): """Parse a document from a file object, returning the document @@ -199,7 +208,7 @@ first_buffer = True try: while 1: - buffer = file.read(16*1024) + buffer = file.read(16 * 1024) if not buffer: break parser.Parse(buffer, 0) @@ -272,7 +281,7 @@ def character_data_handler_cdata(self, data): childNodes = self.curNode.childNodes if self._cdata: - if ( self._cdata_continue + if (self._cdata_continue and childNodes[-1].nodeType == CDATA_SECTION_NODE): childNodes[-1].appendData(data) return @@ -356,7 +365,7 @@ for i in range(0, len(attributes), 2): a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, None, EMPTY_PREFIX) - value = attributes[i+1] + value = attributes[i + 1] a.value = value a.ownerDocument = self.document _set_attribute_node(node, a) @@ -763,7 +772,7 @@ _attrsNS = node._attrsNS for i in range(0, len(attributes), 2): aname = attributes[i] - value = attributes[i+1] + value = attributes[i + 1] if ' ' in aname: uri, localname, prefix, qname = _parse_ns_name(self, aname) a = minidom.Attr(qname, uri, localname, prefix) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -2,6 +2,7 @@ import copy import xml.dom +from xml.parsers import expat from xml.dom.NodeFilter import NodeFilter @@ -22,9 +23,9 @@ namespaces = 1 namespace_declarations = True validation = False - external_parameter_entities = True - external_general_entities = True - external_dtd_subset = True + external_parameter_entities = False # changed + external_general_entities = False # changed + external_dtd_subset = False # changed validate_if_schema = False validate = False datatype_normalization = False @@ -40,6 +41,10 @@ errorHandler = None filter = None + max_entity_indirections = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS + max_entity_expansions = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS + ignore_dtd = False + class DOMBuilder: entityResolver = None @@ -157,6 +162,18 @@ ("namespaces", 0)], ("namespaces", 1): [ ("namespaces", 1)], + ("max_entity_indirections", 0): [ + ("max_entity_indirections", 0)], + ("max_entity_expansions", 0): [ + ("max_entity_expansions", 0)], + ("ignore_dtd", 0): [ + ("ignore_dtd", 0), + ("external_general_entities", 0), + ("external_parameter_entities", 0), + ("create_entity_ref_nodes", 0), + ("entities", 0)], + ("ignore_dtd", 1): [ + ("ignore_dtd", 1)], } def getFeature(self, name): diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1498,7 +1498,7 @@ def end(self, tag): self._flush() self._last = self._elem.pop() - assert self._last.tag == tag,\ + assert self._last.tag == tag, \ "end tag mismatch (expected %s, got %s)" % ( self._last.tag, tag) self._tail = 1 @@ -1519,7 +1519,9 @@ class XMLParser: - def __init__(self, html=0, target=None, encoding=None): + def __init__(self, html=0, target=None, encoding=None, + max_entity_indirections=None, max_entity_expansions=None, + ignore_dtd=False): try: from xml.parsers import expat except ImportError: @@ -1530,6 +1532,14 @@ "No module named expat; use SimpleXMLTreeBuilder instead" ) parser = expat.ParserCreate(encoding, "}") + if expat.XML_BOMB_PROTECTION: + if max_entity_indirections is not None: + parser.max_entity_indirections = max_entity_indirections + if max_entity_expansions is not None: + parser.max_entity_expansions = max_entity_expansions + if ignore_dtd: + parser.reset_dtd = True + if target is None: target = TreeBuilder() # underscored names are provided for compatibility only @@ -1601,7 +1611,7 @@ attrib = {} if attrib_in: for i in range(0, len(attrib_in), 2): - attrib[fixname(attrib_in[i])] = attrib_in[i+1] + attrib[fixname(attrib_in[i])] = attrib_in[i + 1] return self.target.start(tag, attrib) def _end(self, tag): @@ -1651,7 +1661,9 @@ pubid = None else: return - if hasattr(self.target, "doctype"): + if self.ignore_dtd: + pass + elif hasattr(self.target, "doctype"): self.target.doctype(name, pubid, system[1:-1]) elif self.doctype != self._XMLParser__doctype: # warn about deprecated call @@ -1659,6 +1671,18 @@ self.doctype(name, pubid, system[1:-1]) self._doctype = None + @property + def max_entity_indirections(self): + return getattr(self.parser, "max_entity_indirections", None) + + @property + def max_entity_expansions(self): + return getattr(self.parser, "max_entity_expansions", None) + + @property + def ignore_dtd(self): + return getattr(self.parser, "reset_dtd", None) + ## # (Deprecated) Handles a doctype declaration. # diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py --- a/Lib/xml/sax/expatreader.py +++ b/Lib/xml/sax/expatreader.py @@ -10,6 +10,9 @@ from xml.sax.handler import feature_namespace_prefixes from xml.sax.handler import feature_external_ges, feature_external_pes from xml.sax.handler import feature_string_interning +from xml.sax.handler import feature_max_entity_indirections +from xml.sax.handler import feature_max_entity_expansions +from xml.sax.handler import feature_ignore_dtd from xml.sax.handler import property_xml_string, property_interning_dict # xml.parsers.expat does not raise ImportError in Jython @@ -84,7 +87,17 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): """SAX driver for the pyexpat C module.""" - def __init__(self, namespaceHandling=0, bufsize=2**16-20): + # default settings for security critical issues + # enables extern general entities + EXTERNAL_GES = False + # allow maximum number of indirections when resolving nested entities + MAX_ENTITY_INDIRECTIONS = expat.XML_DEFAULT_MAX_ENTITY_INDIRECTIONS + # limit total amount of expanded entites characters + MAX_ENTITY_EXPANSIONS = expat.XML_DEFAULT_MAX_ENTITY_EXPANSIONS + # ignore DTD information by resetting DTD information + IGNORE_DTD = False + + def __init__(self, namespaceHandling=0, bufsize=2 ** 16 - 20): xmlreader.IncrementalParser.__init__(self, bufsize) self._source = xmlreader.InputSource() self._parser = None @@ -92,8 +105,11 @@ self._lex_handler_prop = None self._parsing = 0 self._entity_stack = [] - self._external_ges = 1 + self._external_ges = self.EXTERNAL_GES self._interning = None + self._max_entity_indirections = self.MAX_ENTITY_INDIRECTIONS + self._max_entity_expansions = self.MAX_ENTITY_EXPANSIONS + self._ignore_dtd = self.IGNORE_DTD # XMLReader methods @@ -127,6 +143,12 @@ return 0 elif name == feature_external_ges: return self._external_ges + elif name == feature_max_entity_indirections: + return self._max_entity_indirections + elif name == feature_max_entity_expansions: + return self._max_entity_expansions + elif name == feature_ignore_dtd: + return self._ignore_dtd raise SAXNotRecognizedException("Feature '%s' not recognized" % name) def setFeature(self, name, state): @@ -155,6 +177,21 @@ if state: raise SAXNotSupportedException( "expat does not report namespace prefixes") + elif name == feature_max_entity_indirections: + if not expat.XML_BOMB_PROTECTION: + raise SAXNotSupportedException( + "expat does not support max entity indirections") + self._max_entity_indirections = state + elif name == feature_max_entity_expansions: + if not expat.XML_BOMB_PROTECTION: + raise SAXNotSupportedException( + "expat does not support max entity expansions") + self._max_entity_expansions = state + elif name == feature_ignore_dtd: + if not expat.XML_BOMB_PROTECTION and state: + raise SAXNotSupportedException( + "expat does not support ignore dtd") + self._ignore_dtd = bool(state) else: raise SAXNotRecognizedException( "Feature '%s' not recognized" % name) @@ -193,7 +230,7 @@ # IncrementalParser methods - def feed(self, data, isFinal = 0): + def feed(self, data, isFinal=0): if not self._parsing: self.reset() self._parsing = 1 @@ -214,7 +251,7 @@ if self._entity_stack: # If we are completing an external entity, do nothing here return - self.feed("", isFinal = 1) + self.feed("", isFinal=1) self._cont_handler.endDocument() self._parsing = 0 # break cycle created by expat handlers pointing to our methods @@ -244,6 +281,27 @@ parser.StartDoctypeDeclHandler = self.start_doctype_decl parser.EndDoctypeDeclHandler = lex.endDTD + def _set_ignore_dtd(self): + parser = self._parser + if self._ignore_dtd: + parser.reset_dtd = True + parser.UnparsedEntityDeclHandler = None + parser.EntityDeclHandler = None + parser.NotationDeclHandler = None + parser.StartDoctypeDeclHandler = None + parser.EndDoctypeDeclHandler = None + parser.NotationDeclHandler = None + parser.ExternalEntityRefHandler = None + try: + parser.SkippedEntityHandler = None + except AttributeError: + # This pyexpat does not support SkippedEntity + pass + + else: + parser.reset_dtd = False + # handlers are already reset + def reset(self): if self._namespaces: self._parser = expat.ParserCreate(self._source.getEncoding(), " ", @@ -253,7 +311,7 @@ self._parser.EndElementHandler = self.end_element_ns else: self._parser = expat.ParserCreate(self._source.getEncoding(), - intern = self._interning) + intern=self._interning) self._parser.StartElementHandler = self.start_element self._parser.EndElementHandler = self.end_element @@ -278,6 +336,14 @@ self._parser.SetParamEntityParsing( expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) + if expat.XML_BOMB_PROTECTION: + parser = self._parser + if self._max_entity_indirections is not None: + parser.max_entity_indirections = self._max_entity_indirections + if self._max_entity_expansions is not None: + parser.max_entity_expansions = self._max_entity_expansions + self._set_ignore_dtd() + self._parsing = 0 self._entity_stack = [] @@ -399,7 +465,7 @@ def skipped_entity_handler(self, name, is_pe): if is_pe: # The SAX spec requires to report skipped PEs with a '%' - name = '%'+name + name = '%' + name self._cont_handler.skippedEntity(name) # --- diff --git a/Lib/xml/sax/handler.py b/Lib/xml/sax/handler.py --- a/Lib/xml/sax/handler.py +++ b/Lib/xml/sax/handler.py @@ -277,12 +277,28 @@ # DTD subset. # access: (parsing) read-only; (not parsing) read/write +feature_max_entity_indirections = \ + "http://www.python.org/sax/features/max-entity-indirections" + +feature_max_entity_expansions = \ + "http://www.python.org/sax/features/max-entity-expansions" + +feature_ignore_dtd = "http://www.python.org/sax/features/ignore-dtd" +# true: Ignore all doctype information and reset all doctype information +# after the DTD block has been parsed. No DTD relevant handler is +# called. +# false: Obey DTD +# access: (parsing) read-only; (not parsing) read/write + all_features = [feature_namespaces, feature_namespace_prefixes, feature_string_interning, feature_validation, feature_external_ges, - feature_external_pes] + feature_external_pes, + feature_max_entity_indirections, + feature_max_entity_expansions, + feature_ignore_dtd] #============================================================================ diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3197,13 +3197,86 @@ XMLParserObject *self_xp = (XMLParserObject *)self; PyObject *target = NULL, *html = NULL; char *encoding = NULL; - static char *kwlist[] = {"html", "target", "encoding", 0}; + long ignore_dtd_flag = 0; + +#ifdef XML_BOMB_PROTECTION + PyObject *ignore_dtd = NULL, *indirections = NULL, *expansions = NULL; + long max_indirections; + long max_expansions; + + static char *kwlist[] = {"html", "target", "encoding", + "max_entity_indirections", + "max_entity_expansions", "ignore_dtd", 0}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOzOOO:XMLParser", kwlist, + &html, &target, &encoding, + &indirections, &expansions, + &ignore_dtd)) { + return -1; + } +#else + static char *kwlist[] = {"html", "target", "encoding", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist, &html, &target, &encoding)) { return -1; } +#endif + +#ifdef XML_BOMB_PROTECTION + if (indirections == NULL || indirections == Py_None) { + if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + &max_indirections)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + } + else { + max_indirections = PyLong_AsLong(indirections); + if ((max_indirections == -1) && PyErr_Occurred()) { + return -1; + } + if ((max_indirections > UINT_MAX) || (max_indirections < 0)) { + PyErr_Format(PyExc_ValueError, + "max_entity_indirections be between 0 and %i", + UINT_MAX); + return -1; + } + } + + if (expansions == NULL || expansions == Py_None) { + if (!EXPAT(GetFeatureDefault)(XML_FEATURE_MAX_ENTITY_EXPANSIONS, + &max_expansions)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + } + else { + max_expansions = PyLong_AsLong(expansions); + if ((max_expansions == -1) && PyErr_Occurred()) { + return -1; + } + if ((max_expansions > UINT_MAX) || (max_expansions < 0)) { + PyErr_Format(PyExc_ValueError, + "max_entity_expansions be between 0 and %i", + UINT_MAX); + return -1; + } + } + + if (ignore_dtd == NULL) { + if (!EXPAT(GetFeatureDefault)(XML_FEATURE_RESET_DTD, + &ignore_dtd_flag)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + } + else if ((ignore_dtd_flag = PyObject_IsTrue(ignore_dtd)) == -1) { + return -1; + } +#endif + self_xp->entity = PyDict_New(); if (!self_xp->entity) return -1; @@ -3222,6 +3295,27 @@ return -1; } +#ifdef XML_BOMB_PROTECTION + if (!EXPAT(SetFeature)(self_xp->parser, + XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + max_indirections)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + if (!EXPAT(SetFeature)(self_xp->parser, + XML_FEATURE_MAX_ENTITY_EXPANSIONS, + max_expansions)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + if (!EXPAT(SetFeature)(self_xp->parser, + XML_FEATURE_RESET_DTD, + ignore_dtd_flag)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } +#endif + if (target) { Py_INCREF(target); } else { @@ -3241,7 +3335,9 @@ self_xp->handle_comment = PyObject_GetAttrString(target, "comment"); self_xp->handle_pi = PyObject_GetAttrString(target, "pi"); self_xp->handle_close = PyObject_GetAttrString(target, "close"); - self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype"); + if (!ignore_dtd_flag) { + self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype"); + } PyErr_Clear(); @@ -3270,10 +3366,12 @@ self_xp->parser, (XML_ProcessingInstructionHandler) expat_pi_handler ); - EXPAT(SetStartDoctypeDeclHandler)( - self_xp->parser, - (XML_StartDoctypeDeclHandler) expat_start_doctype_handler - ); + if (!ignore_dtd_flag) { + EXPAT(SetStartDoctypeDeclHandler)( + self_xp->parser, + (XML_StartDoctypeDeclHandler) expat_start_doctype_handler + ); + } EXPAT(SetUnknownEncodingHandler)( self_xp->parser, (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL @@ -3590,6 +3688,35 @@ "Expat %d.%d.%d", XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); } +#ifdef XML_BOMB_PROTECTION + else if (PyUnicode_CompareWithASCIIString(nameobj, "ignore_dtd") == 0) { + long value = -1; + if (!EXPAT(GetFeature)(self->parser, XML_FEATURE_RESET_DTD, + &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + + return PyBool_FromLong(value); + } + else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) { + long value = -1; + if (!EXPAT(GetFeature)(self->parser, + XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); + } + else if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) { + long value = -1; + if (!EXPAT(GetFeature)(self->parser, + XML_FEATURE_MAX_ENTITY_EXPANSIONS, + &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); + } +#endif else goto generic; diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -5,6 +5,20 @@ #ifndef Expat_INCLUDED #define Expat_INCLUDED 1 +#define XML_BOMB_PROTECTION 1 /* Python only: hard coded */ + +#ifdef COMPILED_FROM_DSP +#include "winconfig.h" +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" +#elif defined(__amigaos__) +#include "amigaconfig.h" +#elif defined(__WATCOMC__) +#include "watcomconfig.h" +#elif defined(HAVE_EXPAT_CONFIG_H) +#include +#endif /* ndef COMPILED_FROM_DSP */ + #ifdef __VMS /* 0 1 2 3 0 1 2 3 1234567890123456789012345678901 1234567890123456789012345678901 */ @@ -96,6 +110,12 @@ XML_ERROR_RESERVED_PREFIX_XML, XML_ERROR_RESERVED_PREFIX_XMLNS, XML_ERROR_RESERVED_NAMESPACE_URI +#ifdef XML_BOMB_PROTECTION + /* Added in 2.2. */ + , + XML_ERROR_ENTITY_INDIRECTIONS, + XML_ERROR_ENTITY_EXPANSION +#endif }; enum XML_Content_Type { @@ -1020,6 +1040,13 @@ XML_FEATURE_NS, XML_FEATURE_LARGE_SIZE, XML_FEATURE_ATTR_INFO +#ifdef XML_BOMB_PROTECTION + /* Added in 2.2. */ + , + XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + XML_FEATURE_MAX_ENTITY_EXPANSIONS, + XML_FEATURE_RESET_DTD +#endif /* Additional features must be added to the end of this enum. */ }; @@ -1032,6 +1059,92 @@ XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); +/* Protection against XML bomb DoS attacks + Added in 2.2. + */ +#ifdef XML_BOMB_PROTECTION + +/* XML_FEATURE_MAX_ENTITY_INDIRECTIONS + + Limit the amount of indirections that are allowed to occur during the + expansion of a nested entity. A counter starts when an entity reference + is encountered. It resets after the entity is fully expanded. The limit + protects the parser against exponential entity expansion attacks (aka + billion laughs attack). When the limit is exceeded the parser stops and + fails with `XML_ERROR_ENTITY_INDIRECTIONS`. + A value of 0 disables the protection. + + Supported range: 0 .. UINT_MAX + Default: 40 + */ + +#ifndef XML_DEFAULT_MAX_ENTITY_INDIRECTIONS +#define XML_DEFAULT_MAX_ENTITY_INDIRECTIONS 40 +#endif + +/* XML_FEATURE_MAX_ENTITY_EXPANSIONS + + Limit the total length of all entity expansions throughout the entire + document. The lengths of all entities are accumulated in a parser variable. + The setting protects against quadratic blowup attacks (lots of expansions + of a large entity declaration). When the sum of all entities exceeds + the limit, the parser stops and fails with `XML_ERROR_ENTITY_EXPANSION`. + A value of 0 disables the protection. + + Supported range: 0 .. UINT_MAX + Default: 8 MB + */ +#ifndef XML_DEFAULT_MAX_ENTITY_EXPANSIONS +#define XML_DEFAULT_MAX_ENTITY_EXPANSIONS 1 << 23 /* 8 MiB */ +#endif + +/* XML_FEATURE_RESET_DTD + + Reset all DTD information after the block has been parsed. When + the flag is set (default: false) all DTD information after the + endDoctypeDeclHandler has been called. The flag can be set inside the + endDoctypeDeclHandler. Without DTD information any entity reference in + the document body leads to a XML_ERROR_UNDEFINED_ENTITY. + + Supported range: 0, 1 + Default: 0 + */ +#ifndef XML_DEFAULT_DTD_RESET +#define XML_DEFAULT_DTD_RESET XML_FALSE +#endif + +/* Feature modifiers + + On success the functions shall return 1 and modify or retrieve the value. + + Otherwise, 0 shall be returned and errno set to indicate an error. The + value shall not be modified if a function signals an error. + + ENOENT feature is not supported + EINVAL value is invalid and outside the allowed range + + As of now three features are supported: + - XML_FEATURE_MAX_ENTITY_INDIRECTIONS + - XML_FEATURE_MAX_ENTITY_EXPANSIONS + - XML_FEATURE_RESET_DTD + + */ + +/* Get / set feature of XML parser instance + */ +int XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature, + long *value); + +int XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature, + long value); + +/* Get / set global default + */ +int XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value); +int XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value); + +#endif /* XML_BOMB_PROTECTION */ + /* Expat follows the GNU/Linux convention of odd number minor version for beta/development releases and even number minor version for stable diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -7,21 +7,10 @@ #include #include /* UINT_MAX */ #include /* time() */ +#include #define XML_BUILDING_EXPAT 1 -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#elif defined(HAVE_EXPAT_CONFIG_H) -#include -#endif /* ndef COMPILED_FROM_DSP */ - #include "ascii.h" #include "expat.h" @@ -141,6 +130,12 @@ #define EXPAND_SPARE 24 +#ifdef XML_BOMB_PROTECTION +static unsigned int defaultMaxEntityIndirections = XML_DEFAULT_MAX_ENTITY_INDIRECTIONS; +static unsigned int defaultMaxEntityExpansions = XML_DEFAULT_MAX_ENTITY_EXPANSIONS; +static XML_Bool defaultResetDTDFlag = XML_DEFAULT_DTD_RESET; +#endif + typedef struct binding { struct prefix *prefix; struct binding *nextTagBinding; @@ -151,6 +146,11 @@ int uriAlloc; } BINDING; +/* Python only: workaround for PREFIX macro in PC/pyconfig.h */ +#ifdef PREFIX +#undef PREFIX +#endif + typedef struct prefix { const XML_Char *name; BINDING *binding; @@ -557,6 +557,13 @@ enum XML_ParamEntityParsing m_paramEntityParsing; #endif unsigned long m_hash_secret_salt; +#ifdef XML_BOMB_PROTECTION + unsigned int m_entityIndirections; + unsigned int m_maxEntityIndirections; + unsigned int m_entityExpansions; + unsigned int m_maxEntityExpansions; + XML_Bool m_resetDTDFlag; +#endif /* XML_BOMB_PROTECTION */ }; #define MALLOC(s) (parser->m_mem.malloc_fcn((s))) @@ -666,6 +673,13 @@ #define paramEntityParsing (parser->m_paramEntityParsing) #endif /* XML_DTD */ #define hash_secret_salt (parser->m_hash_secret_salt) +#ifdef XML_BOMB_PROTECTION +#define entityIndirections (parser->m_entityIndirections) +#define maxEntityIndirections (parser->m_maxEntityIndirections) +#define entityExpansions (parser->m_entityExpansions) +#define maxEntityExpansions (parser->m_maxEntityExpansions) +#define resetDTDFlag (parser->m_resetDTDFlag) +#endif /* XML_BOMB_PROTECTION */ XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) @@ -756,6 +770,13 @@ buffer = NULL; bufferLim = NULL; +#ifdef XML_BOMB_PROTECTION + entityIndirections = 0; + maxEntityIndirections = defaultMaxEntityIndirections; + entityExpansions = 0; + maxEntityExpansions = defaultMaxEntityExpansions; + resetDTDFlag = defaultResetDTDFlag; +#endif attsSize = INIT_ATTS_SIZE; atts = (ATTRIBUTE *)MALLOC(attsSize * sizeof(ATTRIBUTE)); @@ -1886,6 +1907,109 @@ return position.columnNumber; } +#ifdef XML_BOMB_PROTECTION + +int XMLCALL +XML_GetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long *value) { + switch (feature) { + case XML_FEATURE_MAX_ENTITY_INDIRECTIONS: + *value = (long)maxEntityIndirections; + return 1; + case XML_FEATURE_MAX_ENTITY_EXPANSIONS: + *value = (long)maxEntityExpansions; + return 1; + case XML_FEATURE_RESET_DTD: + *value = (long)resetDTDFlag; + return 1; + default: + errno = ENOENT; + return 0; + } +} + +int XMLCALL +XML_SetFeature(XML_Parser parser, enum XML_FeatureEnum feature, long value) { + switch (feature) { + case XML_FEATURE_MAX_ENTITY_INDIRECTIONS: + if ((value < 0) || (value > UINT_MAX)) { + errno = EINVAL; + return 0; + } + maxEntityIndirections = (unsigned int)value; + return 1; + case XML_FEATURE_MAX_ENTITY_EXPANSIONS: + if ((value < 0) || (value > UINT_MAX)) { + errno = EINVAL; + return 0; + } + maxEntityExpansions = (unsigned int)value; + return 1; + case XML_FEATURE_RESET_DTD: + if ((value == 0) || (value == 1)) { + resetDTDFlag = (XML_Bool)value; + return 1; + } else { + errno = EINVAL; + return 0; + } + default: + errno = ENOENT; + return 0; + } +} + +int XMLCALL +XML_GetFeatureDefault(enum XML_FeatureEnum feature, long *value) { + switch (feature) { + case XML_FEATURE_MAX_ENTITY_INDIRECTIONS: + *value = (long)defaultMaxEntityIndirections; + return 1; + case XML_FEATURE_MAX_ENTITY_EXPANSIONS: + *value = (long)defaultMaxEntityExpansions; + return 1; + case XML_FEATURE_RESET_DTD: + *value = (long)defaultResetDTDFlag; + return 1; + default: + errno = ENOENT; + return 0; + } +} + +int XMLCALL +XML_SetFeatureDefault(enum XML_FeatureEnum feature, long value) { + switch (feature) { + case XML_FEATURE_MAX_ENTITY_INDIRECTIONS: + if ((value < 0) || (value > UINT_MAX)) { + errno = EINVAL; + return 0; + } + defaultMaxEntityIndirections = (unsigned int)value; + return 1; + case XML_FEATURE_MAX_ENTITY_EXPANSIONS: + if ((value < 0) || (value > UINT_MAX)) { + errno = EINVAL; + return 0; + } + defaultMaxEntityExpansions = (unsigned int)value; + return 1; + case XML_FEATURE_RESET_DTD: + if ((value == 0) || (value == 1)) { + defaultResetDTDFlag = (XML_Bool)value; + return 1; + } else { + errno = EINVAL; + return 0; + } + default: + errno = ENOENT; + return 0; + } +} + +#endif + + void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { @@ -1969,6 +2093,11 @@ XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"), XML_L("reserved prefix (xmlns) must not be declared or undeclared"), XML_L("prefix must not be bound to one of the reserved namespace names") +#ifdef XML_BOMB_PROTECTION + , + XML_L("entity indirection limit exceeded"), + XML_L("document's entity expansion limit exceeded") +#endif }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; @@ -2040,6 +2169,17 @@ #ifdef XML_ATTR_INFO {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, #endif +#ifdef XML_BOMB_PROTECTION + {XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + XML_L("XML_FEATURE_MAX_ENTITY_INDIRECTIONS"), + XML_DEFAULT_MAX_ENTITY_INDIRECTIONS}, + {XML_FEATURE_MAX_ENTITY_EXPANSIONS, + XML_L("XML_FEATURE_MAX_ENTITY_EXPANSIONS"), + XML_DEFAULT_MAX_ENTITY_EXPANSIONS}, + {XML_FEATURE_RESET_DTD, + XML_L("XML_FEATURE_RESET_DTD"), + XML_DEFAULT_DTD_RESET}, +#endif {XML_FEATURE_END, NULL, 0} }; @@ -2238,9 +2378,15 @@ { /* save one level of indirection */ DTD * const dtd = _dtd; - const char **eventPP; const char **eventEndPP; + +#ifdef XML_BOMB_PROTECTION + if (haveMore) { + entityIndirections = 0; + } +#endif + if (enc == encoding) { eventPP = &eventPtr; eventEndPP = &eventEndPtr; @@ -3974,6 +4120,11 @@ endDoctypeDeclHandler(handlerArg); handleDefault = XML_FALSE; } +#ifdef XML_BOMB_PROTECTION + if (resetDTDFlag) { + dtdReset(dtd, &parser->m_mem); + } +#endif break; case XML_ROLE_INSTANCE_START: #ifdef XML_DTD @@ -4800,6 +4951,22 @@ enum XML_Error result; OPEN_INTERNAL_ENTITY *openEntity; +#ifdef XML_BOMB_PROTECTION + if (maxEntityIndirections && (entityIndirections > maxEntityIndirections)) { + return XML_ERROR_ENTITY_INDIRECTIONS; + } + if (maxEntityExpansions) { + if (entity->textLen > UINT_MAX - entityExpansions) { + /* overflow */ + return XML_ERROR_ENTITY_EXPANSION; + } + entityExpansions = entityExpansions + entity->textLen; + if (entityExpansions > maxEntityExpansions) { + return XML_ERROR_ENTITY_EXPANSION; + } + } +#endif + if (freeInternalEntities) { openEntity = freeInternalEntities; freeInternalEntities = openEntity->next; @@ -4824,13 +4991,21 @@ #ifdef XML_DTD if (entity->is_param) { int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); +#ifdef XML_BOMB_PROTECTION + entityIndirections++; +#endif result = doProlog(parser, internalEncoding, textStart, textEnd, tok, next, &next, XML_FALSE); } else #endif /* XML_DTD */ + { +#ifdef XML_BOMB_PROTECTION + entityIndirections++; +#endif result = doContent(parser, tagLevel, internalEncoding, textStart, textEnd, &next, XML_FALSE); + } if (result == XML_ERROR_NONE) { if (textEnd != next && ps_parsing == XML_SUSPENDED) { diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c --- a/Modules/expat/xmlrole.c +++ b/Modules/expat/xmlrole.c @@ -4,20 +4,7 @@ #include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include -#endif -#endif /* ndef COMPILED_FROM_DSP */ - +#include "expat.h" #include "expat_external.h" #include "internal.h" #include "xmlrole.h" diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c --- a/Modules/expat/xmltok.c +++ b/Modules/expat/xmltok.c @@ -4,20 +4,7 @@ #include -#ifdef COMPILED_FROM_DSP -#include "winconfig.h" -#elif defined(MACOS_CLASSIC) -#include "macconfig.h" -#elif defined(__amigaos__) -#include "amigaconfig.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" -#else -#ifdef HAVE_EXPAT_CONFIG_H -#include -#endif -#endif /* ndef COMPILED_FROM_DSP */ - +#include "expat.h" #include "expat_external.h" #include "internal.h" #include "xmltok.h" diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1322,6 +1322,28 @@ if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0) return PyLong_FromLong((long) self->buffer_used); } +#ifdef XML_BOMB_PROTECTION + if (first_char == 'm') { + if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_indirections") == 0) { + long value = -1; + if (!XML_GetFeature(self->itself, + XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); + } + if (PyUnicode_CompareWithASCIIString(nameobj, "max_entity_expansions") == 0) { + long value = -1; + if (!XML_GetFeature(self->itself, + XML_FEATURE_MAX_ENTITY_EXPANSIONS, + &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); + } + } +#endif if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0) return get_pybool(self->ns_prefixes); if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0) @@ -1338,6 +1360,15 @@ return self->intern; } } +#ifdef XML_BOMB_PROTECTION + if (PyUnicode_CompareWithASCIIString(nameobj, "reset_dtd") == 0) { + long value = -1; + if (!XML_GetFeature(self->itself, XML_FEATURE_RESET_DTD, &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyBool_FromLong(value); + } +#endif generic: return PyObject_GenericGetAttr((PyObject*)self, nameobj); } @@ -1374,6 +1405,9 @@ APPEND(rc, "buffer_text"); APPEND(rc, "buffer_used"); APPEND(rc, "namespace_prefixes"); + APPEND(rc, "max_entity_expansions"); + APPEND(rc, "max_entity_indirections"); + APPEND(rc, "reset_dtd"); APPEND(rc, "ordered_attributes"); APPEND(rc, "specified_attributes"); APPEND(rc, "intern"); @@ -1520,6 +1554,65 @@ self->buffer_size = new_buffer_size; return 0; } +#ifdef XML_BOMB_PROTECTION + if (PyUnicode_CompareWithASCIIString(name, "max_entity_expansions") == 0) { + unsigned long value; + + value = PyLong_AsUnsignedLong(v); + if ((value == (unsigned long)-1) && PyErr_Occurred()) { + return -1; + } + if (value > UINT_MAX) { + PyErr_Format(PyExc_ValueError, + "max_entity_expansions must not be greater than %i", + UINT_MAX); + return -1; + } + if (!XML_SetFeature(self->itself, + XML_FEATURE_MAX_ENTITY_EXPANSIONS, + value)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + return 0; + } + + if (PyUnicode_CompareWithASCIIString(name, "max_entity_indirections") == 0) { + unsigned long value; + + value = PyLong_AsUnsignedLong(v); + if ((value == (unsigned long)-1) && PyErr_Occurred()) { + return -1; + } + if (value > UINT_MAX) { + PyErr_Format(PyExc_ValueError, + "max_entity_indirections must not be greater than %i", + UINT_MAX); + return -1; + } + if (!XML_SetFeature(self->itself, + XML_FEATURE_MAX_ENTITY_INDIRECTIONS, + value)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + return 0; + } + + if (PyUnicode_CompareWithASCIIString(name, "reset_dtd") == 0) { + int value; + + if ((value = PyObject_IsTrue(v)) == -1) { + return -1; + } + if (!XML_SetFeature(self->itself, XML_FEATURE_RESET_DTD, + value ? XML_TRUE : XML_FALSE)) { + PyErr_SetFromErrno(PyExc_ValueError); + return -1; + } + return 0; + } +#endif if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) { /* If we're changing the character data handler, flush all @@ -1654,12 +1747,125 @@ /* List of methods defined in the module */ +#ifdef XML_BOMB_PROTECTION +PyDoc_STRVAR(pyexpat_set_reset_dtd_doc, +"set_reset_dtd(n)\n" +); + +static PyObject * +pyexpat_set_reset_dtd(PyObject *self, PyObject *args) +{ + PyObject *pre; + long value; + if (!PyArg_ParseTuple(args, "O:set_reset_dtd", &pre)) + return NULL; + if ((value = (long)PyObject_IsTrue(pre)) == -1 ) { + return NULL; + } + if (!XML_SetFeatureDefault(XML_FEATURE_RESET_DTD, value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(pyexpat_get_reset_dtd_doc, +"get_reset_dtd(n)\n" +); + +static PyObject * +pyexpat_get_reset_dtd(PyObject *self, PyObject *args) +{ + long value; + if (!XML_GetFeatureDefault(XML_FEATURE_RESET_DTD, &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyBool_FromLong(value); +} + +PyDoc_STRVAR(pyexpat_set_max_entity_expansions_doc, +"set_max_entity_expansions(n)\n" +); + +static PyObject * +pyexpat_set_max_entity_expansions(PyObject *self, PyObject *args) +{ + long value; + if (!PyArg_ParseTuple(args, "l:set_max_entity_expansions", &value)) + return NULL; + if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(pyexpat_get_max_entity_expansions_doc, +"get_max_entity_expansions(n)\n" +); + +static PyObject * +pyexpat_get_max_entity_expansions(PyObject *self, PyObject *args) +{ + long value; + if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_EXPANSIONS, &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); +} + +PyDoc_STRVAR(pyexpat_set_max_entity_indirections_doc, +"set_max_entity_indirections(n)\n" +); + +static PyObject * +pyexpat_set_max_entity_indirections(PyObject *self, PyObject *args) +{ + long value; + if (!PyArg_ParseTuple(args, "l:set_max_entity_indirections", &value)) + return NULL; + if (!XML_SetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + Py_INCREF(Py_None); + return Py_None; +} + +PyDoc_STRVAR(pyexpat_get_max_entity_indirections_doc, +"get_max_entity_indirections(n)\n" +); + +static PyObject * +pyexpat_get_max_entity_indirections(PyObject *self, PyObject *args) +{ + long value; + if (!XML_GetFeatureDefault(XML_FEATURE_MAX_ENTITY_INDIRECTIONS, &value)) { + return PyErr_SetFromErrno(PyExc_ValueError); + } + return PyLong_FromLong(value); +} +#endif + + static struct PyMethodDef pyexpat_methods[] = { {"ParserCreate", (PyCFunction)pyexpat_ParserCreate, METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__}, {"ErrorString", (PyCFunction)pyexpat_ErrorString, METH_VARARGS, pyexpat_ErrorString__doc__}, - +#ifdef XML_BOMB_PROTECTION + {"set_reset_dtd", (PyCFunction)pyexpat_set_reset_dtd, METH_VARARGS, + pyexpat_set_reset_dtd_doc}, + {"get_reset_dtd", (PyCFunction)pyexpat_get_reset_dtd, METH_NOARGS, + pyexpat_get_reset_dtd_doc}, + {"set_max_entity_expansions", (PyCFunction)pyexpat_set_max_entity_expansions, + METH_VARARGS, pyexpat_set_max_entity_expansions_doc}, + {"get_max_entity_expansions", (PyCFunction)pyexpat_get_max_entity_expansions, + METH_NOARGS, pyexpat_get_max_entity_expansions_doc}, + {"set_max_entity_indirections", (PyCFunction)pyexpat_set_max_entity_indirections, + METH_VARARGS, pyexpat_set_max_entity_indirections_doc}, + {"get_max_entity_indirections", (PyCFunction)pyexpat_get_max_entity_indirections, + METH_NOARGS, pyexpat_get_max_entity_indirections_doc}, +#endif {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */ }; @@ -1896,6 +2102,17 @@ MYCONST(XML_PARAM_ENTITY_PARSING_NEVER); MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS); +#ifdef XML_BOMB_PROTECTION + MYCONST(XML_DEFAULT_MAX_ENTITY_INDIRECTIONS); + MYCONST(XML_DEFAULT_MAX_ENTITY_EXPANSIONS); + PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_True); + Py_INCREF(Py_True); +#else + PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_INDIRECTIONS", 0); + PyModule_AddIntConstant(m, "XML_DEFAULT_MAX_ENTITY_EXPANSIONS", 0); + PyModule_AddObject(m, "XML_BOMB_PROTECTION", Py_False); + Py_INCREF(Py_False); +#endif #undef MYCONST #define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c) @@ -1937,6 +2154,12 @@ capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler; capi.SetUserData = XML_SetUserData; capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler; +#ifdef XML_BOMB_PROTECTION + capi.GetFeature = XML_GetFeature; + capi.SetFeature = XML_SetFeature; + capi.GetFeatureDefault = XML_GetFeatureDefault; + capi.SetFeatureDefault = XML_SetFeatureDefault; +#endif /* export using capsule */ capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);