Index: ../stringbench/stringbench.py =================================================================== --- ../stringbench/stringbench.py (revision 76951) +++ ../stringbench/stringbench.py (working copy) @@ -32,7 +32,6 @@ _RANGE_1000 = range(1000) -_RANGE_1000 = range(1000) _RANGE_100 = range(100) _RANGE_10 = range(10) @@ -116,12 +115,8 @@ #### same tests as 'in' but use 'find' -# XXX: TODO: Add rfind - - - @bench('("A"*1000).find("A")', "early match, single character", 1000) -def find_quick_match_single_character(STR): +def find_test_quick_match_single_character(STR): s1 = STR("A" * 1000) s2 = STR("A") s1_find = s1.find @@ -153,6 +148,14 @@ for x in _RANGE_1000: s1_find(s2) +@bench('("AB"*1000).find("CA")', "no match, two characters", 1000) +def find_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + @bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000) def find_test_slow_match_two_characters(STR): s1 = STR("AB" * 300+"C") @@ -161,7 +164,15 @@ for x in _RANGE_1000: s1_find(s2) -@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s)', +@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000) +def find_test_slow_match_two_characters_bis(STR): + s1 = STR("AB" * 300+"CA") + s2 = STR("CA") + s1_find = s1.find + for x in _RANGE_1000: + s1_find(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")', "late match, 100 characters", 100) def find_test_slow_match_100_characters(STR): m = STR("ABC"*33) @@ -171,10 +182,99 @@ for x in _RANGE_100: s1_find(s2) +@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)', + "late match, 100 characters", 100) +def find_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + s1 = (m+"D")*500 + "E"+m + s2 = "E"+m + s1_find = s1.find + for x in _RANGE_100: + s1_find(s2) + + +#### Same tests for 'rfind' + +@bench('("A"*1000).rfind("A")', "early match, single character", 1000) +def rfind_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("A"*1000).rfind("B")', "no match, single character", 1000) +def rfind_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + + +@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000) +def rfind_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000) +def rfind_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000) +def rfind_test_no_match_two_character_bis(STR): + s1 = STR("AB" * 1000) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000) +def rfind_test_slow_match_two_characters_bis(STR): + s1 = STR("BC" + "AB" * 300) + s2 = STR("BC") + s1_rfind = s1.rfind + for x in _RANGE_1000: + s1_rfind(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E"+m + ("D"+m)*500 + s2 = "E"+m + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + +@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")', + "late match, 100 characters", 100) +def rfind_test_slow_match_100_characters_bis(STR): + m = STR("ABC"*33) + s1 = m+"E" + ("D"+m)*500 + s2 = m+"E" + s1_rfind = s1.rfind + for x in _RANGE_100: + s1_rfind(s2) + + #### Now with index. # Skip the ones which fail because that would include exception overhead. -# Add rindex tests. - @bench('("A"*1000).index("A")', "early match, single character", 1000) def index_test_quick_match_single_character(STR): @@ -184,7 +284,6 @@ for x in _RANGE_1000: s1_index(s2) - @bench('("AB"*1000).index("AB")', "early match, two characters", 1000) def index_test_quick_match_two_characters(STR): s1 = STR("AB" * 1000) @@ -201,7 +300,7 @@ for x in _RANGE_1000: s1_index(s2) -@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s)', +@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")', "late match, 100 characters", 100) def index_test_slow_match_100_characters(STR): m = STR("ABC"*33) @@ -211,6 +310,260 @@ for x in _RANGE_100: s1_index(s2) + +#### Same for rindex + +@bench('("A"*1000).rindex("A")', "early match, single character", 1000) +def rindex_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000) +def rindex_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000) +def rindex_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rindex = s1.rindex + for x in _RANGE_1000: + s1_rindex(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)', + "late match, 100 characters", 100) +def rindex_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E" + m + ("D"+m)*500 + s2 = "E" + m + s1_rindex = s1.rindex + for x in _RANGE_100: + s1_rindex(s2) + + +#### Same for partition + +@bench('("A"*1000).partition("A")', "early match, single character", 1000) +def partition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("A"*1000).partition("B")', "no match, single character", 1000) +def partition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + + +@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000) +def partition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000) +def partition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000) +def partition_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_partition = s1.partition + for x in _RANGE_1000: + s1_partition(s2) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")', + "late match, 100 characters", 100) +def partition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = (m+"D")*500 + m+"E" + s2 = m+"E" + s1_partition = s1.partition + for x in _RANGE_100: + s1_partition(s2) + + +#### Same for rpartition + +@bench('("A"*1000).rpartition("A")', "early match, single character", 1000) +def rpartition_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("A"*1000).rpartition("B")', "no match, single character", 1000) +def rpartition_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + + +@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000) +def rpartition_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000) +def rpartition_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000) +def rpartition_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rpartition = s1.rpartition + for x in _RANGE_1000: + s1_rpartition(s2) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)', + "late match, 100 characters", 100) +def rpartition_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E" + m + ("D"+m)*500 + s2 = "E" + m + s1_rpartition = s1.rpartition + for x in _RANGE_100: + s1_rpartition(s2) + + +#### Same for split(s, 1) + +@bench('("A"*1000).split("A", 1)', "early match, single character", 1000) +def split_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("A"*1000).split("B", 1)', "no match, single character", 1000) +def split_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + + +@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000) +def split_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000) +def split_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000) +def split_test_slow_match_two_characters(STR): + s1 = STR("AB" * 300+"C") + s2 = STR("BC") + s1_split = s1.split + for x in _RANGE_1000: + s1_split(s2, 1) + +@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)', + "late match, 100 characters", 100) +def split_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = (m+"D")*500 + m+"E" + s2 = m+"E" + s1_split = s1.split + for x in _RANGE_100: + s1_split(s2, 1) + + +#### Same for rsplit(s, 1) + +@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000) +def rsplit_test_quick_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("A") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000) +def rsplit_test_no_match_single_character(STR): + s1 = STR("A" * 1000) + s2 = STR("B") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + + +@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000) +def rsplit_test_quick_match_two_characters(STR): + s1 = STR("AB" * 1000) + s2 = STR("AB") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000) +def rsplit_test_no_match_two_character(STR): + s1 = STR("AB" * 1000) + s2 = STR("BC") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000) +def rsplit_test_slow_match_two_characters(STR): + s1 = STR("C" + "AB" * 300) + s2 = STR("CA") + s1_rsplit = s1.rsplit + for x in _RANGE_1000: + s1_rsplit(s2, 1) + +@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)', + "late match, 100 characters", 100) +def rsplit_test_slow_match_100_characters(STR): + m = STR("ABC"*33) + s1 = "E" + m + ("D"+m)*500 + s2 = "E" + m + s1_rsplit = s1.rsplit + for x in _RANGE_100: + s1_rsplit(s2, 1) + + #### Benchmark the operator-based methods @bench('"A"*10', "repeat 1 character 10 times", 1000) @@ -382,6 +735,24 @@ for x in _RANGE_1000: s_rsplit(N, 1) +@bench('("Here are some words. "*2).partition(" ")', + "split 1 whitespace", 1000) +def whitespace_partition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_partition = s.partition + for x in _RANGE_1000: + s_partition(sep) + +@bench('("Here are some words. "*2).rpartition(" ")', + "split 1 whitespace", 1000) +def whitespace_rpartition(STR): + sep = STR(" ") + s = STR("Here are some words. "*2) + s_rpartition = s.rpartition + for x in _RANGE_1000: + s_rpartition(sep) + human_text = """\ Python is a dynamic object-oriented programming language that can be used for many kinds of software development. It offers strong support @@ -539,31 +910,35 @@ @bench('GFF3_example.split("\\t")', "tab split", 1000) def tab_split_no_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_split = s.split for x in _RANGE_1000: - s_split("\t") + s_split(sep) @bench('GFF3_example.split("\\t", 8)', "tab split", 1000) def tab_split_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_split = s.split for x in _RANGE_1000: - s_split("\t", 8) + s_split(sep, 8) @bench('GFF3_example.rsplit("\\t")', "tab split", 1000) def tab_rsplit_no_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_rsplit = s.rsplit for x in _RANGE_1000: - s_rsplit("\t") + s_rsplit(sep) @bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000) def tab_rsplit_limit(STR): + sep = STR("\t") s = STR(GFF3_example) s_rsplit = s.rsplit for x in _RANGE_1000: - s_rsplit("\t", 8) + s_rsplit(sep, 8) #### Count characters @@ -996,6 +1371,8 @@ for title, group in itertools.groupby(bench_functions, operator.itemgetter(0)): + # Flush buffer before each group + sys.stdout.flush() print "="*10, title for (_, k, v) in group: if hasattr(v, "is_bench"): @@ -1024,7 +1401,7 @@ print "That was zippy!" else: try: - ratio = str_time/uni_time + ratio = str_total/uni_total except ZeroDivisionError: ratio = 0.0 print "%.2f\t%.2f\t%.1f\t%s" % (