import re from urllib import urlopen from BeautifulSoup import BeautifulSoup as BS url = 'http://www.w3.org/TR/html5/named-character-references.html' page = BS(urlopen(url)) total = 0 for tr in page.findAll('tr', id=re.compile('^entity-')): name_td, value_td, _ = tr.findAll('td') name = name_td.code.string.rstrip(';') values = [int(value[2:], 16) for value in value_td.string.split()] hex_values = ', '.join(map(hex, values)) print " '{}': [{}],".format(name, hex_values) total += 1 print total, 'entities found.'