''' Topologicl merge for use with algorithms such as the C3 linearization''' from collections import defaultdict from itertools import izip def merge(*seqs): #merge(['a','b','c'], ['b','e','g','h'], ['c','d','e','h']) # |----> ['a', 'b', 'c', 'd', 'e', 'g', 'h'] succ = defaultdict(set) pred = defaultdict(set) for seq in seqs: for p, q in izip(seq, seq[1:]): succ[p].add(q) pred[q].add(p) result = [s for s in succ if s not in pred] for h in result: for s in succ[h]: pred[s].discard(h) if not pred[s]: result.append(s) if any(pred.itervalues()): bases = sorted(set(b for p in pred.values() for b in p)) raise TypeError('Cannot create a consistent MRO for bases %r' % bases) return result def new_merge(*seqs): ''' Pair each sequence with a corresponding set of successors: a b c d with {b,c,d} b f g with {f,g} c g with {g} Scan top three for any one that doesn't appear in the other sets remove it, and update the other sets. ''' succs = [set(seq[1:]) for seq in seqs] seqs = [list(reversed(seq)) for seq in seqs] result = [] while any(seqs): for seq in seqs: if seq and not any(seq[-1] in succ for succ in succs): match = seq[-1] result.append(match) for seq, succ in izip(seqs, succs): if seq and seq[-1] == match: seq.pop() if seq: succ.discard(seq[-1]) break else: raise TypeError('No solution for %r' % seqs) return result #################### TEST GENERATOR ####################### import string import random def random_combination(iterable, r): "Random selection from itertools.combinations(iterable, r)" pool = tuple(iterable) n = len(pool) indices = sorted(random.sample(xrange(n), r)) return tuple(pool[i] for i in indices) def make_seqs(popsize, seqlen, numseqs): return [random_combination(string.letters[:popsize], seqlen) for i in range(numseqs)] def verify_subsequence(seq1, seq2): pos={val:i for i, val in enumerate(seq2)} last = -1 for val in seq1: p = pos[val] if p <= last: return False last = p return True def test(merger, times): for i in xrange(times): seqs = make_seqs(15, 5, 4) comb = merger(*seqs) if not all(verify_subsequence(seq, comb) for seq in seqs): print '\n!!!', comb, '<--', seqs elif i % 1000 == 0: print '.', if __name__ == '__main__': print merge(['a', 'b', 'c'], ['b', 'e', 'g', 'h'], ['c', 'd', 'e', 'h']) print merge(['a', 'b', 'c'], ['b', 'e', 'g', 'h'], ['c', 'd', 'e', 'h']) print '\n-- merge() -----------------------' test(merge, 100000) print '\n-- new_merge() -------------------' test(new_merge, 100000) print '\n-- Example with MRO conflict -----' try: merge(['a', 'b', 'c'], ['b', 'h', 'g', 'e'], ['c', 'd', 'e', 'h']) except TypeError: print 'Observed the expected TypeError' else: print 'Failed to observe the expected TypeError'