#! /usr/bin/env python """ read daily dat fromShannon Airport at http://www.met.ie/climate/daily-data.asp Data returned contains 7 or 8 elements: 0 1 2 3 4 5 6 7 Date Rainfall Temp Temp Sunshine Gusts Windspeed gmin (mm) Max Min (hours) (if >= 34 knots) (knots) (Centigrade) """ import sys from HTMLParser import HTMLParser def read_html_data(mydata): """ Extract relevant information from http://www.met.ie/climate/daily-data.asp data stream. We are interested in the daily data from Shannon Airport. The data is embedded in a longs stream in side various tables. The interesting bit starts with the word "gmin". The next table row contains the measured data, starting with the date in the format %d/%m/%Y. """ class MyHTMLParser(HTMLParser): """my HTML browser. We have to subclass the tags 'table', 'tr', 'th' and 'td'. """ def __init__(self): "Standard init plus extras." HTMLParser.__init__(self) self._go = False self.table = [] self.row = [] self.tags = { 'tr' : 1, 'table' : 1, 'td' : 1, } self.result = [] def handle_starttag(self, tag, attrs): "start tag: set state, call method via getattr." #print "handle_starttag %s" % tag self._state = tag if tag in self.tags: try: getattr(self, tag + '_start')(attrs) except AttributeError: pass def handle_endtag(self, tag): "end tag: reset state, call method via getattr." #print "handle_endtag !%s!" % tag self._state = "" if tag in self.tags: try: getattr(self, tag + '_end')() except AttributeError: pass def tr_start(self, attrs): "Handle a new row: empty row element" print "tr_start" self.row = [] def tr_end(self): "End of a row." # attach row to self.table if self._go: self.table.append(self.row) self.row = [] print "tr_end" def td_start(self, attrs): if self._go: print "td_start" self.td_data = True self.found_td_data = False def td_end(self): self.td_data = False if not self.found_td_data: self.row.append('') if self._go: print "td_end" def table_end(self): "Reset table state: save result." if self._go: self.result = self.table[1] self._go = False print "table_end" def handle_data(self, data): """Check for data.""" # we must be triggered by "gmin" ###data = data.strip() if data == 'gmin': self._go = True if self._go and self.td_data: print "data=!%s!" % data if self._go and self.td_data: self.row.append(data) self.found_td_data = True html = MyHTMLParser() html . feed(mydata) html . close() return html.result def get_data(filename): handle = open(filename, 'r') data = handle.read() handle.close() return read_html_data(data) def main(filename): gimme = get_data(filename) print ';'.join(gimme) if __name__ == '__main__': main(sys.argv[1])