from HTMLParser import HTMLParser
class VerbatimParser(HTMLParser):
def __init__(self, out):
HTMLParser.__init__(self)
# @#%#@% HTMLParser uses old-style classes, can't use super()
self.out = out
def emit(self, text):
self.out.write(text)
def handle_starttag(self, tag, attrs):
self.emit(self.get_starttag_text())
def handle_endtag(self, tag):
self.emit('')
self.emit(tag)
self.emit('>')
def handle_startendtag(self, tag, attrs):
self.emit(self.get_starttag_text())
def handle_data(self, data):
self.emit(data)
def handle_entityref(self, name):
self.emit('&')
self.emit(name)
self.emit(';')
def handle_charref(self, name):
self.emit('')
self.emit(name)
self.emit(';')
def handle_comment(self, data):
self.emit('')
def handle_decl(self, decl):
self.emit('')
def handle_pi(self, data):
self.emit('')
self.emit(data)
self.emit('>')
def unknown_decl(self, data):
self.emit('')
def doit(infile, outfile):
with open(outfile,'w') as fout:
parser = VerbatimParser(fout)
with open(infile) as f:
parser.feed(f.read())
parser.close()
doit('test1.html','test1b.html')