#!/usr/bin/env python """A simple script to minify HTML. It may have bugs. It does not handle JavaScript or CSS; there are good programs available that do that. Usage: minify.py somefile.html > somefile-minified.html minify.py < somefile.html > somefile-minified.html Due to the fact that this is just a hack, sometimes it helps to minify a file twice: minify.py somefile.html | minify.py > somefile-minified.html $Id: minify.py 1745 2008-12-01 05:57:12Z chris $ """ from HTMLParser import HTMLParser, HTMLParseError import re import sys REMOVE_WS = re.compile(r"\s{2,}").sub class HTMLMinifier(HTMLParser): """An HTML minifier.""" def __init__(self, output): """output: This callback function will be called when there is data to output. A good candidate to use is sys.stdout.write.""" HTMLParser.__init__(self) self.output = output self.inside_pre = False def error(self, message): sys.stderr.write("Warning: " + message + "\n") def handle_starttag(self, tag, attributes): if "pre" == tag.lower(): self.inside_pre = True self.output(self.get_starttag_text()) def handle_startendtag(self, tag, attributes): self.handle_starttag(tag, attributes) def handle_endtag(self, tag): if "pre" == tag.lower(): self.inside_pre = False self.output("") def handle_data(self, data): if not self.inside_pre: data = REMOVE_WS("\n", data) self.output(data) def handle_charref(self, name): self.output("&#" + name + ";") def handle_entityref(self, name): self.output("&" + name + ";") def handle_comment(self, data): return def handle_decl(self, data): return def handle_pi(self, data): return if __name__ == "__main__": if "-h" in sys.argv or "--help" in sys.argv: print __doc__ sys.exit(1) m = HTMLMinifier(sys.stdout.write) f = sys.stdin if len(sys.argv) > 1: f = file(sys.argv[1], "rb") m.feed(f.read()) try: m.close() except HTMLParseError, e: sys.stderr.write("Warning: " + str(e) + "\n")