#!/usr/bin/python -3 import bisect import random import copy import sys PAGE_SIZE=8 SPLIT_POINT = PAGE_SIZE//2 def split_list(l, k): answer = [] ps = 0 for x in k: pe = ps+x answer.append(l[ps:pe]) ps = pe answer.append(l[ps:]) return answer class DP: def __init__(self, initial_data=None): #print "initial_data", initial_data if initial_data is None: initial_data = [] self.data = initial_data #print "selfdata", self.data def split(self, p): b = split_list(self.data, p) return [ DP(x) for x in b ] def where(self, d): self.check_invarient() p = bisect.bisect_right(self.data, d) return p # def insert(self, d): # return self.insert_at(self.where(d), d) def insert_at(self, d, p): if p!=0 and self.data[p-1]==d: return False # if d in self.data: # print "FF", p # print "FF", self.data # return False if p>=len(self.data): # print "JJ", p self.data.append(d) elif self.data[p]!=d: self.data.insert(p, d) self.check_invarient() return True def check_invarient(self): c = copy.deepcopy(self.data) c.sort() assert str(c)==str(self.data), str(c)+"<>"+str(self.data) assert len(c)==len(set(c)) def __len__(self): return len(self.data) def __getitem__(self, x): return self.data[x] def __str__(self): return repr(self.data) def __iter__(self): return self.data.__iter__() def has(self, d): return d in set(self.data) class PP: def __init__(self, data=False, keys=None, values=None, ptrs=None): if data: self.keys = DP() if keys: self.keys = keys self.values = [] if values: self.values = values self.ptrs = None assert(ptrs is None) assert(len(self.keys)==len(self.values)) else: self.keys = DP() if keys: self.keys = keys self.ptrs = [] if ptrs: self.ptrs = ptrs self.values = None assert(values is None) def is_data(self): return self.values is not None def insert_key(self, k, l, r): assert(not self.is_data()) assert(not self.keys.has(k)) p = self.keys.where(k) if len(self.keys)>=PAGE_SIZE: # # the page below has just been split # and we are being asked to insert a key (say 5) above those two half-full pages # # 4 6 # a b c << existing pointers # # 4 5 6 # a l r c << some existing pointers and the two params passed # # what if there is not room? # # # 5 # / \ << new pointers # 4 6 # a l r c << existing pointers and the two params passed # lkeys, rkeys = self.keys.split([ p ]) lptrs, ignored, rptrs = split_list(self.ptrs, [ p, 1 ]) print ">>", lkeys, rkeys print ">>", lptrs, rptrs lptrs.append(l) rptrs.insert(0, r) l = PP(data=False, keys=lkeys, ptrs=lptrs) r = PP(data=False, keys=rkeys, ptrs=rptrs) self.ptrs = [l, r] self.keys = DP([k]) return print self.keys, k, p assert(self.keys.insert_at(k, p)) self.ptrs.insert(p+1, r) self.ptrs[p] = l return self.insert(k, v) def insert(self, k, v): p = self.keys.where(k) if self.is_data(): if self.keys.has(k): ## TODO: sharper? we've already looked for it print "data overwrite" self.values[p-1] = v return [] if len(self.keys)>=PAGE_SIZE: # split the page print "splitting" a, b = self.keys.split([ SPLIT_POINT ]) c, d = split_list(self.values, [ SPLIT_POINT ]) print a, c print b, d left = PP(data=True, keys=a, values=c) right = PP(data=True, keys=b, values=d) return [ self.keys[SPLIT_POINT], left, right ] # add a new value print "data insert" assert(self.keys.insert_at(k, p)) self.values.insert(p, v) return None else: if p>=len(self.ptrs): print "first key insert" assert(self.keys.insert_at(k, p)) a = PP(data=True) b = PP(data=True) self.ptrs = [ a, b ] p += 1 else: print "hunting insert at "+str(p), "in", self.keys #if self.keys[p]==k: p += 1 x = self.ptrs[p] subinsert = x.insert(k, v) if subinsert is not None and len(subinsert): print "need extra key", subinsert self.insert_key(subinsert[0], subinsert[1], subinsert[2]) def __len__(self): return len(self.data) def dump(self, indent=0, kmin=None, kmax=None, validate=True): sindent = indent * " " if self.is_data(): #print "d", self.keys, self.values self.keys.check_invarient() if len(self.keys): assert len(self.keys)==len(self.values) line = [] for p, k in enumerate(self.keys): line.append(str(k)+"="+str(self.values[p])) if kmin and k=kmax: line.append(">="+str(kmax)+"!!!") if not line: line = [ 'empty' ] else: line.append(" len="+str(len(self.keys))) print sindent+(" ".join(line)) else: #print "k", self.keys, self.ptrs self.keys.check_invarient() assert len(self.keys)+1==len(self.ptrs) lk = 0 for p, k in enumerate(self.keys): self.ptrs[p].dump(indent+1, kmin=lk, kmax=k) print sindent + "(" + str(k) + ")" lk = k # # python bug? # show_python_bug = len(sys.argv)>1 if show_python_bug: # # why not this? assert p == len(self.keys)-1 else: # # why do I need this instead? p = len(self.keys)-1 self.ptrs[p+1].dump(indent+1, kmin=lk) rs = random.Random("cheese") page = PP() for i in xrange(0, 1000): k = rs.randrange(0, 800) v = rs.randrange(0, 100) print "-" * 75 print "inserting", k, v page.insert(k, v) page.dump()