import sys import random class Markov: def __init__(self): self.wlist = {} self.wstart = [] self.fname = "" def addword(self, prev, curr, next): if curr in self.wlist: self.wlist[curr].append(next) else: self.wlist[curr] = [next] if not prev: self.wstart.append(curr) def parseline(self, line): l = line.lower().rstrip("\n").split(" ") p = c = n = None for w in l: if len(w) >= 1: p, c, n = c, n, w if c: self.addword(p,c,n) p, c, n = c, n, None while c: self.addword(p,c,n) p, c, n = c, n, None def parsefile(self, fname): fp = open(fname,"r") for o in fp: self.parseline(o) fp.close() def savedb(self, fname): fp = open(fname,"w") for o in self.wstart: if len(o) <= 0xFF: fp.write(chr(len(o))) fp.write(o) fp.write(chr(0)) for o in self.wlist: if len(o) > 0xFF: o = o[:0xFF] fp.write(chr(len(o))) fp.write(o) for s in self.wlist[o]: if s == None: fp.write("\xFF") elif len(s) <= 0xFE: fp.write(chr(len(s))) fp.write(s) else: fp.write("\xFE") fp.write(s[:0xFE]) fp.write(chr(0)) fp.write(chr(0)) fp.close() def loaddb(self, fname): fp = open(fname,"r") while True: l = ord(fp.read(1)) if l == 0: break o = fp.read(l) self.wstart.append(o) while True: l = ord(fp.read(1)) if l == 0: break o = fp.read(l) if not o in self.wlist: self.wlist[o] = [] while True: l = ord(fp.read(1)) if l == 0: break elif l == 0xFF: s = None else: s = fp.read(l) self.wlist[o].append(s) fp.close() def generate(self): w = self.wstart[int(random.random() * len(self.wstart))] s = w w = self.wlist[w][int(random.random() * len(self.wlist[w]))] while w: s += " " + w try: w = self.wlist[w][int(random.random() * len(self.wlist[w]))] except KeyError: s += " " + "KEYERROR" w = None return s if __name__ == "__main__": if len(sys.argv) <= 1: print "usage:" print " " + sys.argv[0] + " " print "prefix filenames with a : to read a database" raise SystemExit lib = Markov() for o in sys.argv[1:]: if o[0] == ':': print "Loading database \"" + o[1:] + "\"..." lib.loaddb(o[1:]) else: print "Loading \"" + o + "\"..." lib.parsefile(o) print "Generating text..." s = lib.generate() print s