import collections
import urllib
import re
for n in range(ord('A'), ord('Z') + 1):
url = "http://docs.python.org/genindex-%c.html" % chr(n)
print "/////////////////////////"
print "// %s" % url
content = urllib.urlopen(url).read()
for m1 in re.finditer('
(.+?)', content, re.I):
hrefs = collections.defaultdict(list)
for m2 in re.finditer('(.+?)', m1.group(1)):
m3 = re.search('href=["\'](.+?)["\']', m2.group(1))
hrefs[m3.group(1)].append(m2.group(2))
for href, texts in hrefs.iteritems():
if len(texts) >= 2:
print href
for text in texts:
print "\t" + text
print