Message 104676 - Python tracker

➜

This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author	dmtr
Recipients	dmtr, flox
Date	2010-04-30.23:25:56
SpamBayes Score	0.0006762682
Marked as misclassified	No
Message-id	<1272669958.46.0.751435830057.issue8583@psf.upfronthosting.co.za>
In-reply-to

Content
And obviously iterparse can be either overridden in the local user code or patched in the library. Here's the iterparse code/test code: import cElementTree from cStringIO import StringIO class iterparse(object): root = None def __init__(self, file, events=None, namespace_separator = "}"): if not hasattr(file, 'read'): file = open(file, 'rb') self._file = file self._events = events self._namespace_separator = namespace_separator def __iter__(self): events = [] b = cElementTree.TreeBuilder() p = cElementTree.XMLParser(b, namespace_separator= \ self._namespace_separator) p._setevents(events, self._events) while 1: data = self._file.read(16384) if not data: break p.feed(data) for event in events: yield event del events[:] root = p.close() for event in events: yield event self.root = root x = """<root xmlns="http://www.very_long_url.com"><child>text</child></root>""" context = iterparse(StringIO(x), events=("start", "end", "start-ns")) for event, elem in context: print event, elem context = iterparse(StringIO(x), events=("start", "end", "start-ns"), namespace_separator = None) for event, elem in context: print event, elem It produces: start-ns ('', 'http://www.very_long_url.com') start <Element '{http://www.very_long_url.com}root' at 0xb7ccf650> start <Element '{http://www.very_long_url.com}child' at 0xb7ccf5a8> end <Element '{http://www.very_long_url.com}child' at 0xb7ccf5a8> end <Element '{http://www.very_long_url.com}root' at 0xb7ccf650> start <Element 'root' at 0xb7ccf620> start <Element 'child' at 0xb7ccf458> end <Element 'child' at 0xb7ccf458> end <Element 'root' at 0xb7ccf620> Note the absence of URIs and ignored start-ns events in the 'space_separator = None' version.

And obviously iterparse can be either overridden in the local user code or patched in the library. Here's the iterparse code/test code:

import  cElementTree
from cStringIO import StringIO

class iterparse(object):
    root = None
    def __init__(self, file, events=None, namespace_separator = "}"):
        if not hasattr(file, 'read'):
            file = open(file, 'rb')
        self._file = file
        self._events = events
        self._namespace_separator = namespace_separator
    def __iter__(self):
        events = []
        b = cElementTree.TreeBuilder()
        p = cElementTree.XMLParser(b, namespace_separator= \
                                        self._namespace_separator)
        p._setevents(events, self._events)
        while 1:
          data = self._file.read(16384)
          if not data:
            break
          p.feed(data)
          for event in events:
            yield event
          del events[:]
        root = p.close()
        for event in events:
          yield event
        self.root = root


x = """<root xmlns="http://www.very_long_url.com"><child>text</child></root>"""
context = iterparse(StringIO(x), events=("start", "end", "start-ns"))
for event, elem in context: print event, elem

context = iterparse(StringIO(x), events=("start", "end", "start-ns"), namespace_separator = None)
for event, elem in context: print event, elem


It produces:
start-ns ('', 'http://www.very_long_url.com')
start <Element '{http://www.very_long_url.com}root' at 0xb7ccf650>
start <Element '{http://www.very_long_url.com}child' at 0xb7ccf5a8>
end <Element '{http://www.very_long_url.com}child' at 0xb7ccf5a8>
end <Element '{http://www.very_long_url.com}root' at 0xb7ccf650>
start <Element 'root' at 0xb7ccf620>
start <Element 'child' at 0xb7ccf458>
end <Element 'child' at 0xb7ccf458>
end <Element 'root' at 0xb7ccf620>

Note the absence of URIs and ignored start-ns events in the 'space_separator = None' version.

History
Date	User	Action	Args
2010-04-30 23:25:58	dmtr	set	recipients: + dmtr, flox
2010-04-30 23:25:58	dmtr	set	messageid: <1272669958.46.0.751435830057.issue8583@psf.upfronthosting.co.za>
2010-04-30 23:25:56	dmtr	link	issue8583 messages
2010-04-30 23:25:56	dmtr	create