'Data munging use cases' from itertools import dropwhile, takewhile, chain, tee, filterfalse from statistics import mean def is_header(line): return not line.strip().isdigit() report = '''\ Daily Average Temperature in Fahrenheit Station: KSJC Timeframe: July 2021 --------- 72 75 73 74 78 85 89 88 85 88 91 86 82 76 '''.splitlines() ################################################### ## Only want headers ############################## # Correct example with takewhile() print(list(takewhile(is_header, report))) ################################################### ## Only want data ################################# # Correct exaple with dropwhile() print(mean(map(int, dropwhile(is_header, report)))) ################################################### ## Want headers, then data ######################## # Broken example with takewhile() it = iter(report) print('headers', list(takewhile(is_header, it))) print('data', list(map(int, it))) print('Gah! The 72 measurement is missing') # Correct example with plain for-loop and with data in a list it = iter(report) headers = [] data = [] for line in it: if is_header(line): headers.append(line) else: data.append(int(line)) print(mean(data)) # Correct example with plain for-loop and chain() it = iter(report) for line in it: if is_header(line): print('Header:', repr(line)) else: it = chain([line], it) break print(mean(map(int, it))) # Correct example with proposed API it = iter(report) tw_it = takewhile(is_header, it) for line in takewhile(is_header, tw_it): print('Header:', repr(line)) if hasattr(tw_it, 'odd_element'): it = chain([tw_it.odd_element], it) print(mean(map(int, it))) # Correct but inefficient example with partition() recipe def partition(pred, iterable): t1, t2 = tee(iterable) return filter(pred, t1), filterfalse(pred, t2) headers, data = partition(is_header, report) for line in headers: print('Header:', repr(line)) print(mean(map(int, data))) # Correct, efficient, and clean example with custom generator def before_and_after(predicate, it): 'Return a true iterator and false iterator' it = iter(it) transition = [] def true_iterator(): for elem in it: if predicate(elem): yield elem else: transition.append(elem) return def false_iterator(): yield from iter(transition) yield from it return true_iterator(), false_iterator() headers, data = before_and_after(is_header, report) for line in headers: print('Header:', repr(line)) print(mean(map(int, data)))