import urllib import re import os print 'Getting data' if os.path.exists('page.html'): dat = open('page.html', 'rb').read().decode('utf-8') else: dat = urllib.urlopen('http://btjunkie.org/search?q=family+guy').read().decode('utf8', 'replace') # Added by AMK to save the output output = open('page.html', 'wb') output.write(dat.encode('utf-8')) output.close() # I know it's not very readable, but the SGML parser feels in pain torrent_re = re.compile('(?s)