import sgmllib, xml.sax.saxutils
class Parser(sgmllib.SGMLParser):
def reset(self):
sgmllib.SGMLParser.reset(self)
self.text = []
def unknown_starttag(self, tag, attrs):
print "Tag opened:", tag
for a, v in attrs:
print "Arg: ", a, " -> ", v
print "Argv escaped:", xml.sax.saxutils.quoteattr(v)
strattrs = "".join([' %s=%s' % (key, xml.sax.saxutils.quoteattr(value)) for key, value in attrs])
self.text.append("<%(tag)s%(strattrs)s>" % locals())
def unknown_endtag(self, tag):
print "Tag closed:", tag
self.text.append("%s>" % tag)
def handle_data(self, text):
print "Data:", text
self.text.append(xml.sax.saxutils.escape(text))
def get_text(self):
return "".join(self.text)
import unittest
class TestSGML(unittest.TestCase):
def test(self):
in_str = """example"""
out_str = """example"""
parser = Parser()
print "Fed:", in_str
parser.feed(in_str)
result = parser.get_text()
self.assertEqual(out_str, result)
if __name__ == "__main__":
unittest.main()