from collections import defaultdict from itertools import izip, islice # TODO # make "ordered" have same as input order # alter input to build pairs from sequences # ? generator version conflicts with cycle detection def topological_sort(dependency_pairs): 'Sort values subject to dependency constraints' num_heads = defaultdict(int) # num arrows pointing in tails = defaultdict(list) # list of arrows going out heads = [] # unique list of heads in order first seen for h, t in dependency_pairs: num_heads[t] += 1 if h in tails: tails[h].append(t) else: tails[h] = [t] heads.append(h) print 'Heads:', heads ordered = [h for h in heads if h not in num_heads] print 'Ordered:', ordered for h in ordered: for t in tails[h]: num_heads[t] -= 1 if not num_heads[t]: ordered.append(t) cyclic = [n for n, heads in num_heads.iteritems() if heads] return ordered, cyclic def is_toposorted(ordered, dependency_pairs): '''Return True if all dependencies have been honored. Raise KeyError for missing tasks. ''' rank = {t: i for i, t in enumerate(ordered)} return all(rank[h] < rank[t] for h, t in dependency_pairs) print topological_sort('aa'.split()) ordered, cyclic = topological_sort('ah bg cf ch di ed fb fg hd he ib'.split()) print ordered, cyclic print is_toposorted(ordered, 'ah bg cf ch di ed fb fg hd he ib'.split()) print topological_sort('ah bg cf ch di ed fb fg hd he ib ba xx'.split()) def tsort(seqs): pairs = [pair for seq in seqs for pair in izip(seq, islice(seq, 1, None))] print 'Pairs:', pairs ordered, cycles = topological_sort(pairs) if cycles: raise ValueError return ordered if __name__ == '__main__': print tsort(['ABDGI', 'BEG', 'CEH', 'KCFHJ']) print ['A', 'B', 'D', 'K', 'C', 'E', 'G', 'I', 'F', 'H', 'J'], '<-- target'