from HTMLParser import HTMLParser html = """ test

title

ok ok
CPAI-2012-809 CVE-2011-2089
SCADA ICONICS WebHMI ActiveX Stack Overflow (2011-2089)
""" class MyHTMLParser(HTMLParser): def handle_starttag(self, tag, attrs): attrs = dict(attrs) if tag == 'a' and 'href' in attrs and 'cve' in attrs['href']: print attrs['href'] parser = MyHTMLParser() parser.feed(html)