# HG changeset patch # Parent f4e99b589c5f51ec12a715a16a024c0026726d14 add ElementTree benchmark diff -r f4e99b589c5f perf.py --- a/perf.py Mon Mar 25 17:56:25 2013 -0400 +++ b/perf.py Fri Mar 29 12:34:47 2013 +0100 @@ -1593,6 +1593,60 @@ return _PickleBenchmark(base_python, changed_python, options, ["unpickle"]) + +def MeasureEtree(python, options, extra_args): + """Test the performance of Python's ElementTree implementations. + + Args: + python: prefix of a command line for the Python binary. + options: optparse.Values instance. + extra_args: list of arguments to append to the command line. + + Returns: + RawData instance. + """ + bm_path = Relative("performance/bm_elementtree.py") + return MeasureGeneric(python, options, bm_path, extra_args=extra_args) + + +def _EtreeBenchmark(base_python, changed_python, options, extra_args): + """Test the performance of Python's ElementTree implementations. + + Args: + base_python: prefix of a command line for the reference + Python binary. + changed_python: prefix of a command line for the + experimental Python binary. + options: optparse.Values instance. + extra_args: list of arguments to append to the command line. + + Returns: + Summary of whether the experimental Python is better/worse than the + baseline. + """ + return SimpleBenchmark(MeasureEtree, + base_python, changed_python, options, extra_args) + + +def BM_ElementTree(base_python, changed_python, options): + args = ["et"] + return _EtreeBenchmark(base_python, changed_python, options, args) + +def BM_cElementTree(base_python, changed_python, options): + args = ["cet"] + return _EtreeBenchmark(base_python, changed_python, options, args) + +try: + import lxml.etree as _lxml_trial_import +except ImportError: + pass +else: + del _lxml_trial_import + def BM_lxml(base_python, changed_python, options): + args = ["lxml"] + return _EtreeBenchmark(base_python, changed_python, options, args) + + def MeasureJSON(python, options, extra_args): """Test the performance of Python's json implementation. diff -r f4e99b589c5f performance/bm_elementtree.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/performance/bm_elementtree.py Fri Mar 29 12:34:47 2013 +0100 @@ -0,0 +1,192 @@ +#!/usr/bin/python + +"""Benchmark script for testing the performance of ElementTree. + +This is intended to support Unladen Swallow's perf.py. + +This will have ElementTree, cElementTree and lxml (if available) +parse a generated XML file, search it, create new XML trees from +it and serialise the result. +""" + +__author__ = "stefan_ml@behnel.de (Stefan Behnel)" + +# Python imports +import optparse +import tempfile +import time +import sys +import os +from collections import defaultdict + +# Local imports +import util +from compat import xrange + + +def build_xml_input(etree): + root = etree.Element('root') + + # create a couple of repetitive broad subtrees + for c in xrange(100): + child = etree.SubElement(root, 'child-%d' % c, + tag_type="child") + for i in xrange(1000): + etree.SubElement(child, 'subchild').text = 'LEAF-%d-%d' % (c, i) + + # create a deep subtree + deep = etree.SubElement(root, 'deepchildren', tag_type="deepchild") + for i in xrange(250): + deep = etree.SubElement(deep, 'deepchild') + etree.SubElement(deep, 'deepleaf', tag_type="leaf").text = "LEAF" + + return etree.tostring(root) + + +def benchmark(etree, xml_file): + root = etree.parse(xml_file).getroot() + + # find*() + found = sum(child.find('.//deepleaf') is not None + for child in root) + if found != 1: + raise RuntimeError("find() failed") + + text = 'LEAF-5-99' + found = any(1 for child in root + for el in child.iterfind('.//subchild') + if el.text == text) + if not found: + raise RuntimeError("iterfind() failed") + + found = sum(el.text == 'LEAF' + for el in root.findall('.//deepchild/deepleaf')) + if found != 1: + raise RuntimeError("findall() failed") + + # tree creation based on original tree + dest = etree.Element('root2') + target = etree.SubElement(dest, 'result-1') + for child in root: + etree.SubElement(target, child.tag).text = str(len(child)) + if len(target) != len(root): + raise RuntimeError("transform #1 failed") + + target = etree.SubElement(dest, 'result-2') + for child in root.iterfind('.//subchild'): + etree.SubElement(target, child.tag, attr=child.text).text = "found" + + if (len(target) < len(root) or + not all(el.text == 'found' + for el in target.iterfind('subchild'))): + raise RuntimeError("transform #2 failed") + + # moving subtrees around + orig_len = len(root[0]) + new_root = root.makeelement('parent', {}) + new_root[:] = root[0] + el = root[0] + del el[:] + for child in new_root: + if child is not None: + el.append(child) + if len(el) != orig_len: + raise RuntimeError("child moving failed") + + # check iteration tree consistency + d = defaultdict(list) + for child in root: + tags = d[child.get('tag_type')] + for sub in child.iter(): + tags.append(sub) + + check_dict = dict((n, iter(ch)) for n, ch in d.items()) + target = etree.SubElement(dest, 'transform-2') + for child in root: + tags = check_dict[child.get('tag_type')] + for sub in child.iter(): + # note: explicit object identity check to make sure + # users can properly keep state in the tree + if sub is not next(tags): + raise RuntimeError("tree iteration consistency check failed") + etree.SubElement(target, sub.tag).text = 'worked' + + # final probability check for serialisation (we added enough content + # to make the result tree larger than the original) + orig = etree.tostring(root, encoding='utf8') + result = etree.tostring(dest, encoding='utf8') + if (len(result) < len(orig) + or b'worked' not in result + or b'>LEAF<' not in orig): + raise RuntimeError("serialisation probability check failed") + + +def run_etree_benchmark(etree, iterations): + times = [] + + xml = build_xml_input(etree) + + # not using NamedTemporaryFile() here as re-opening it is not portable + tf, file_path = tempfile.mkstemp() + try: + with open(file_path, 'wb') as f: + f.write(xml) + + # warm up + benchmark(etree, file_path) + benchmark(etree, file_path) + + for _ in xrange(iterations): + t0 = time.time() + benchmark(etree, file_path) + t1 = time.time() + times.append(t1 - t0) + finally: + try: os.unlink(file_path) + except: pass + try: os.close(tf) + except: pass + try: os.unlink(file_path) + except: pass + + return times + + +def test_et(iterations): + import xml.etree.ElementTree as etree + return run_etree_benchmark(etree, iterations) + + +def test_cet(iterations): + import xml.etree.cElementTree as etree + return run_etree_benchmark(etree, iterations) + + +def test_lxml(iterations): + import lxml.etree as etree + return run_etree_benchmark(etree, iterations) + + +if __name__ == "__main__": + parser = optparse.OptionParser( + usage="%prog [options] [et|cet|lxml]", + description="Test the performance of ElementTree XML processing.") + util.add_standard_options_to(parser) + options, args = parser.parse_args() + + benchmarks = ["et", "cet", "lxml"] + for bench_name in benchmarks: + if bench_name in args: + bench_func = globals()["test_" + bench_name] + break + else: + raise RuntimeError("Need to specify one of %s" % benchmarks) + + if bench_name == 'et' and sys.version_info >= (3, 3): + # prevent C accelerator from being used in 3.3 + sys.modules['_elementtree'] = None + import xml.etree.ElementTree as et + if et.Element.__module__ != 'xml.etree.ElementTree': + raise RuntimeError("Unexpected C accelerators for ElementTree") + + util.run_benchmark(options, options.num_runs, bench_func)