from HTMLParser import HTMLParser html = """ test

title

ok ok

CPAI-2012-809

CVE-2011-2089

SCADA ICONICS WebHMI ActiveX Stack Overflow (2011-2089)

""" class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): attrs = dict(attrs) if tag == 'a' and 'href' in attrs and 'cve' in attrs['href']: print attrs['href'] parser = MyHTMLParser() parser.feed(html)