import sgmllib class MyParser(sgmllib.SGMLParser): "A simple parser class." def parse(self, s): "Parse the given string 's'." self.hyperlinks=[] self.feed(s) self.close() def start_a(self, attributes): "Process a hyperlink and its 'attributes'." for name, value in attributes: if name == "href": self.hyperlinks.append(value) def get_hyperlinks(self): "Return the list of hyperlinks." return self.hyperlinks test="""
""" p=sgmllib.SGMLParser() p.feed(test) # Try and process the page. # The class should have been defined first, remember. myparser = MyParser() myparser.parse(test) # Get the hyperlinks. print myparser.get_hyperlinks()