import sgmllib CHUNK = 1024 # increasing this to 8212 makes the problem go away f = open('sgmllib.html') fp = sgmllib.SGMLParser() while 1: data = f.read(CHUNK) fp.feed(data) if len(data) != CHUNK: break