import sgmllib, xml.sax.saxutils class Parser(sgmllib.SGMLParser): def reset(self): sgmllib.SGMLParser.reset(self) self.text = [] def unknown_starttag(self, tag, attrs): print "Tag opened:", tag for a, v in attrs: print "Arg: ", a, " -> ", v print "Argv escaped:", xml.sax.saxutils.quoteattr(v) strattrs = "".join([' %s=%s' % (key, xml.sax.saxutils.quoteattr(value)) for key, value in attrs]) self.text.append("<%(tag)s%(strattrs)s>" % locals()) def unknown_endtag(self, tag): print "Tag closed:", tag self.text.append("" % tag) def handle_data(self, text): print "Data:", text self.text.append(xml.sax.saxutils.escape(text)) def get_text(self): return "".join(self.text) import unittest class TestSGML(unittest.TestCase): def test(self): in_str = """example""" out_str = """example""" parser = Parser() print "Fed:", in_str parser.feed(in_str) result = parser.get_text() self.assertEqual(out_str, result) if __name__ == "__main__": unittest.main()