from sgmllib import SGMLParser class SGMLProcessor(SGMLParser): def reset(self): self.pieces = [] SGMLParser.reset(self) def unknown_starttag(self, tag, attrs): strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs]) self.pieces.append("<%(tag)s%(strattrs)s>" % locals()) def unknown_endtag(self, tag): self.pieces.append("" % locals()) def handle_data(self, text): self.pieces.append(text) def output(self): return ''.join(self.pieces) input = """ hello """ parser = SGMLProcessor() parser.feed(input) parser.close() print print 'Input:' print input print print 'Output:' print ''.join(parser.output()) print print 'Expected:' print input.replace('', '')