# HG changeset patch
# Parent f4e99b589c5f51ec12a715a16a024c0026726d14
add ElementTree benchmark

diff -r f4e99b589c5f perf.py
--- a/perf.py	Mon Mar 25 17:56:25 2013 -0400
+++ b/perf.py	Sat Mar 30 10:20:58 2013 +0100
@@ -1593,6 +1593,40 @@
     return _PickleBenchmark(base_python, changed_python, options, ["unpickle"])
 
 
+
+def MeasureEtree(python, options, extra_args):
+    """Test the performance of Python's (c)ElementTree implementations.
+
+    Args:
+        python: prefix of a command line for the Python binary.
+        options: optparse.Values instance.
+        extra_args: list of arguments to append to the command line.
+
+    Returns:
+        RawData instance.
+    """
+    bm_path = Relative("performance/bm_elementtree.py")
+    return MeasureGeneric(python, options, bm_path, extra_args=extra_args)
+
+
+def BM_ETreeParse(base_python, changed_python, options):
+    extra_args = ['parse']
+    return SimpleBenchmark(MeasureEtree,
+                           base_python, changed_python, options, extra_args)
+
+
+def BM_ETreeGenerate(base_python, changed_python, options):
+    extra_args = ['generate']
+    return SimpleBenchmark(MeasureEtree,
+                           base_python, changed_python, options, extra_args)
+
+
+def BM_ETreeProcess(base_python, changed_python, options):
+    extra_args = ['process']
+    return SimpleBenchmark(MeasureEtree,
+                           base_python, changed_python, options, extra_args)
+
+
 def MeasureJSON(python, options, extra_args):
     """Test the performance of Python's json implementation.
 
diff -r f4e99b589c5f performance/bm_elementtree.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/performance/bm_elementtree.py	Sat Mar 30 10:20:58 2013 +0100
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+
+"""Benchmark script for testing the performance of ElementTree.
+
+This is intended to support Unladen Swallow's perf.py.
+
+This will have ElementTree, cElementTree and lxml (if available)
+parse a generated XML file, search it, create new XML trees from
+it and serialise the result.
+"""
+
+__author__ = "stefan_ml@behnel.de (Stefan Behnel)"
+
+default_etmodule = "xml.etree.cElementTree"
+
+# Python imports
+import optparse
+import tempfile
+import time
+import sys
+import os
+from collections import defaultdict
+
+# Local imports
+import util
+from compat import xrange
+
+
+def build_xml_tree(etree):
+    SubElement = etree.SubElement
+    root = etree.Element('root')
+
+    # create a couple of repetitive broad subtrees
+    for c in xrange(100):
+        child = SubElement(root, 'child-%d' % c,
+                                 tag_type="child")
+        for i in xrange(1000):
+            SubElement(child, 'subchild').text = 'LEAF-%d-%d' % (c, i)
+
+    # create a deep subtree
+    deep = SubElement(root, 'deepchildren', tag_type="deepchild")
+    for i in xrange(250):
+        deep = SubElement(deep, 'deepchild')
+    SubElement(deep, 'deepleaf', tag_type="leaf").text = "LEAF"
+
+    return root
+
+
+def benchmark(etree, xml_file=None, xml_data=None, xml_root=None):
+    SubElement = etree.SubElement
+
+    if xml_file is not None:
+        root = etree.parse(xml_file).getroot()
+    elif xml_data is not None:
+        root = etree.fromstring(xml_data)
+    elif xml_root is not None:
+        root = xml_root
+    else:
+        root = build_xml_tree(etree)
+
+    # find*()
+    found = sum(child.find('.//deepleaf') is not None
+                for child in root)
+    if found != 1:
+        raise RuntimeError("find() failed")
+
+    text = 'LEAF-5-99'
+    found = any(1 for child in root
+                for el in child.iterfind('.//subchild')
+                if el.text == text)
+    if not found:
+        raise RuntimeError("iterfind() failed")
+
+    found = sum(el.text == 'LEAF'
+                for el in root.findall('.//deepchild/deepleaf'))
+    if found != 1:
+        raise RuntimeError("findall() failed")
+
+    # tree creation based on original tree
+    dest = etree.Element('root2')
+    target = SubElement(dest, 'result-1')
+    for child in root:
+        SubElement(target, child.tag).text = str(len(child))
+    if len(target) != len(root):
+        raise RuntimeError("transform #1 failed")
+
+    target = SubElement(dest, 'result-2')
+    for child in root.iterfind('.//subchild'):
+        SubElement(target, child.tag, attr=child.text).text = "found"
+
+    if (len(target) < len(root) or
+            not all(el.text == 'found'
+                    for el in target.iterfind('subchild'))):
+        raise RuntimeError("transform #2 failed")
+
+    # moving subtrees around
+    orig_len = len(root[0])
+    new_root = root.makeelement('parent', {})
+    new_root[:] = root[0]
+    el = root[0]
+    del el[:]
+    for child in new_root:
+        if child is not None:
+            el.append(child)
+    if len(el) != orig_len:
+        raise RuntimeError("child moving failed")
+
+    # check iteration tree consistency
+    d = defaultdict(list)
+    for child in root:
+        tags = d[child.get('tag_type')]
+        for sub in child.iter():
+            tags.append(sub)
+
+    check_dict = dict((n, iter(ch)) for n, ch in d.items())
+    target = SubElement(dest, 'transform-2')
+    for child in root:
+        tags = check_dict[child.get('tag_type')]
+        for sub in child.iter():
+            # note: explicit object identity check to make sure
+            # users can properly keep state in the tree
+            if sub is not next(tags):
+                raise RuntimeError("tree iteration consistency check failed")
+            SubElement(target, sub.tag).text = 'worked'
+
+    # final probability check for serialisation (we added enough content
+    # to make the result tree larger than the original)
+    orig = etree.tostring(root, encoding='utf8')
+    result = etree.tostring(dest, encoding='utf8')
+    if (len(result) < len(orig)
+            or b'worked' not in result
+            or b'>LEAF<' not in orig):
+        raise RuntimeError("serialisation probability check failed")
+    return result
+
+
+def bench_parse(etree, xml_file, xml_data, xml_root):
+    result1 = benchmark(etree, xml_file=xml_file)
+    result2 = benchmark(etree, xml_data=xml_data)
+    if result1 != result2 or '>found<' not in result2:
+        raise RuntimeError("serialisation check failed")
+
+
+def bench_process(etree, xml_file, xml_data, xml_root):
+    result1 = benchmark(etree, xml_root=xml_root)
+    result2 = benchmark(etree, xml_root=xml_root)
+    if result1 != result2 or '>found<' not in result2:
+        raise RuntimeError("serialisation check failed")
+
+
+def bench_generate(etree, xml_file, xml_data, xml_root):
+    output = []
+    for _ in range(10):
+        root = build_xml_tree(etree)
+        output.append(etree.tostring(root))
+
+    length = None
+    for xml in output:
+        if length is None:
+            length = len(xml)
+        elif length != len(xml):
+            raise RuntimeError("inconsistent output detected")
+        if b'>LEAF<' not in xml:
+            raise RuntimeError("unexpected output detected")
+
+
+def run_etree_benchmark(iterations, etree, bench_func):
+    times = []
+
+    xml_root = build_xml_tree(etree)
+    xml_data = etree.tostring(xml_root)
+
+    # not using NamedTemporaryFile() here as re-opening it is not portable
+    tf, file_path = tempfile.mkstemp()
+    try:
+        etree.ElementTree(xml_root).write(file_path)
+
+        # warm up
+        bench_func(etree, file_path, xml_data, xml_root)
+        bench_func(etree, file_path, xml_data, xml_root)
+
+        for _ in xrange(iterations):
+            t0 = time.time()
+            bench_func(etree, file_path, xml_data, xml_root)
+            t1 = time.time()
+            times.append(t1 - t0)
+    finally:
+        try: os.unlink(file_path)
+        except: pass
+        try: os.close(tf)
+        except: pass
+        try: os.unlink(file_path)
+        except: pass
+
+    return times
+
+
+def test_et(iterations, **kwargs):
+    import xml.etree.ElementTree as etree
+    return run_etree_benchmark(iterations, etree, **kwargs)
+
+
+def test_cet(iterations, **kwargs):
+    import xml.etree.cElementTree as etree
+    return run_etree_benchmark(iterations, etree, **kwargs)
+
+
+def test_lxml(iterations, **kwargs):
+    import lxml.etree as etree
+    return run_etree_benchmark(iterations, etree, **kwargs)
+
+
+if __name__ == "__main__":
+    benchmarks = 'parse generate process'.split()
+
+    parser = optparse.OptionParser(
+        usage="%%prog [options] [%s]" % '|'.join(benchmarks),
+        description="Test the performance of ElementTree XML processing.")
+    parser.add_option(
+        "--etree-module", default=default_etmodule, metavar="FQMN",
+        help="Select an ElementTree module to use (fully qualified module name). "
+             "Default is '%s'" % default_etmodule)
+    parser.add_option(
+        "--no-accelerator", action="store_true", default=False,
+        help="Disable the '_elementree' accelerator module for ElementTree "
+             "in Python 3.3+.")
+    util.add_standard_options_to(parser)
+    options, args = parser.parse_args()
+
+    if not args:
+        bench_func = bench_parse
+    elif args[0] not in benchmarks:
+        raise RuntimeError("invalid benchmark requested")
+    else:
+        bench_func = globals()['bench_%s' % args[0]]
+
+    if options.no_accelerator and sys.version_info >= (3, 3):
+        # prevent C accelerator from being used in 3.3
+        sys.modules['_elementtree'] = None
+        import xml.etree.ElementTree as et
+        if et.SubElement.__module__ != 'xml.etree.ElementTree':
+            raise RuntimeError("Unexpected C accelerator for ElementTree")
+
+    try:
+        from importlib import import_module
+    except ImportError:
+        def import_module(module_name):
+            return __import__(module_name, fromlist=['*'])
+
+    etree_module = import_module(options.etree_module)
+    util.run_benchmark(options, options.num_runs,
+                       run_etree_benchmark, etree_module, bench_func)