#!python3 """Investigation of how useful it is to optimize Python powers. See longobject.c, lines 4245: /* Left-to-right binary exponentiation (HAC Algorithm 14.79) */ /* http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf */ for (i = Py_SIZE(b) - 1; i >= 0; --i) { digit bi = b->ob_digit[i]; for (j = (digit)1 << (PyLong_SHIFT-1); j != 0; j >>= 1) { MULT(z, z, z); if (bi & j) MULT(z, a, z); and for the 5-ary (which is actually 32-ary); note that each digit contains 15 or 30 bits of a number. /* Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) */ Py_INCREF(z); /* still holds 1L */ table[0] = z; for (i = 1; i < 32; ++i) MULT(table[i-1], a, table[i]); for (i = Py_SIZE(b) - 1; i >= 0; --i) { const digit bi = b->ob_digit[i]; for (j = PyLong_SHIFT - 5; j >= 0; j -= 5) { const int index = (bi >> j) & 0x1f; for (k = 0; k < 5; ++k) MULT(z, z, z); if (index) MULT(z, table[index], z); } } """ # for phone ---------------------------------------- import os try: os.chdir(os.path.dirname(__file__) or '.') except NameError: pass import re from collections import namedtuple, defaultdict, Counter from random import randrange, getrandbits, sample from operator import methodcaller, itemgetter, add from itertools import chain, product,islice from functools import partial # number of multiplications, squares and "empty" squares (of 1) _Mults = namedtuple("_Mults", "squares mults empty chunkSize") class Mults(namedtuple("Mults", "squares mults empty chunkSize")): """Make sensible ordering in Mults""" def __int__(self): return self.mults + self.squares def __eq__(self, other): return int(self) == int(other) def __lt__(self, other): return int(self) < int(other) def __gt__(self, other): return int(self) > int(other) def __le__(self, other): return int(self) <= int(other) def __ge__(self, other): return int(self) >= int(other) def __ne__(self, other): return int(self) != int(other) # auxiliary functions ---------------------------------------- def digits(n, chunkSize=30): """Split n into 'big digits' of chunkSize bits each >>> digits(10000, 3) [0, 2, 4, 3, 2] """ mask = (1 << chunkSize) - 1 return [n >> i & mask for i in range(0, n.bit_length(), chunkSize)] def hamming(n): return bin(n).count('1') # original functions ---------------------------------------- def mul_bin(n, ws=30): """Compute number of multiplications using basic binary method As close to python code as possible counts squares, multiplications, empty squares >>> mul_bin(10000, 8) Mults(squares=13, mults=5, empty=3, chunkSize=None) """ oneSquares = squares = mults = 0 resultPower = 0 for digit in reversed(digits(n, ws)): for bitPos in range(ws - 1, -1, -1): if resultPower: squares += 1 else: oneSquares += 1 resultPower *= 2 if digit >> bitPos & 1: mults += 1 resultPower += 1 if resultPower != n: raise AssertionError return Mults(squares, mults, oneSquares, None) def est_mul_bin(n, ws=30): """Compute counts of mul_bin directly >>> est_mul_bin(10000, 8) Mults(squares=13, mults=5, empty=3, chunkSize=None) """ if not n: return Mults(0, 0, 0, None) bl = n.bit_length() return Mults(bl - 1, hamming(n), -bl % ws + 1, None) def mulP(n, ws=30, chunkSize=5): """Compute number of multiplications with chunkSize bits at a time As close to python code as possible counts squares, multiplications, empty squares >>> mulP(10000, 8, 4) Mults(squares=13, mults=17, empty=4, chunkSize=4) """ # make table if ws % chunkSize: raise AssertionError mask = (1 << chunkSize) - 1 oneSquares, squares, mults = 0, 1, (1 << chunkSize) - 2 resultPower = 0 for digit in reversed(digits(n, ws)): for bitPos in range(ws - chunkSize, -chunkSize, -chunkSize): if resultPower: squares += chunkSize else: oneSquares += chunkSize resultPower <<= chunkSize d = digit >> bitPos & mask if d: mults += 1 resultPower += d if resultPower != n: raise AssertionError return Mults(squares, mults, oneSquares, chunkSize) def est_mulP(n, ws=30, chunkSize=5): """Compute counts of mulP directly >>> est_mulP(10000, 8, 4) Mults(squares=13, mults=17, empty=4, chunkSize=4) """ if not n: return Mults(1, (1 << chunkSize) - 2, 0, chunkSize) bl = n.bit_length() dg = digits(n, chunkSize) return Mults((len(dg) - 1) * chunkSize + 1, sum(map(bool, dg)) + (1 << chunkSize) - 2, (chunkSize - 1 - len(dg) * chunkSize) % ws + 1, chunkSize) # proposed function -------------------------------------------- """C: static const unsigned char[8] CHUNKSIZE5TO8 = {2,3,2,3,2,3,2,2}; static const unsigned char[8] MAXLENGTHFOR4 = { static const unsigned char[8] MAXLENGTHFOR5 = { int goodChunkSize(PyLongPtr* n) { int length = n -> length; if (length == 1) { firstDigit = n -> digits [0]; if (firstDigit < 16) return (2); } else { firstDigit = n[-1]< firstDigitLength - 4 if length == 1: if firstDigitLength < 8: return [2, 2, 3, 3, 2, 2, 3, 2][prefix - 8] moreDigits = 0 else: moreDigits = length - 2 fullLength = firstDigitLength + moreDigits * ws if moreDigits: <* h2 free length checks *> h2 = (firstDigit >> 2).bit_count() switch (prefix) { case 8: case 12: if (fullLength <= 14) return 2; if (fullLength <= 14) return 2; return ... case 9: return ... case 10: case 14: return 3; case 11: return ... case 13: return... case 15: return ... } """ def otfR(n, ws=30, chunkSize=2, special=False): """On the fly powering. Using a table of powers, this routine multplies multiple bits at a time. Optimizations: - only store odd numbers in the table, and search for the locations to apply the power. - do not compute table entries until needed - use the square that is needed to construct the table where useful - special case for numbers that start with 1001 Results: chunkSize = 2 is never worse than binary (!) chunkSize = 3 costs at most 1 (8% of numbers <100, 1.4% under a million) The best value for the chunkSize parameter is computed by otfR_good >>> otfR(27, 8, 2) Mults(squares=4, mults=2, empty=0, chunkSize=2) """ if n < 2: return Mults(0, 0, 0, chunkSize) squares, mults, maxTable = 1, 0, 1 def tableFetch(p): nonlocal maxTable, mults assert p & 1 while maxTable < p: mults += 1 maxTable += 2 resultPower = 0 myDigits = list(digits(n, ws)) if len(myDigits) > 1: myDigits[-2] |= myDigits[-1] << ws del myDigits[-1] for digit in reversed(myDigits): resultShift = ws while digit: # compute active position bitPos = max(digit.bit_length(), chunkSize) - chunkSize head = digit >> bitPos if not resultPower: # start here: first part of first digit if head == 2: pass # from variable elif head == 4 and bitPos > 1 and digit >> bitPos - 1 == 9: # head can be extended to 9 (we know chunkSize==3 now) bitPos -= 1 head = 9 tableFetch(head) else: # make digit >> bitPos odd by moving to the left bitPos += (head ^ head - 1).bit_length() - 1 head = digit >> bitPos if head == 1 and bitPos and digit >> bitPos - 1 == 2: # we can move to the right for free from the square bitPos -= 1 head = 2 else: tableFetch(head) else: # make digit >> bitPos odd bitPos += (head ^ head - 1).bit_length() - 1 head = digit >> bitPos # shift to bitPos, multiply with table entry squares += resultShift - bitPos resultPower <<= resultShift - bitPos tableFetch(head) mults += 1 resultShift = bitPos resultPower += head # remove chunk digit &= (1 << resultShift) - 1 squares += resultShift resultPower <<= resultShift if resultPower != n: raise AssertionError return Mults(squares, mults, 0, chunkSize) def goodChunksize(n): """Determine a very good chunk size for n. Roughly, using chunk size n costs 1<> length - 4 if length > 100: if length > [647, 669, 657, 671, 647, 670, 657, 671][prefix - 8]: return 6 if length > [197, 230, 215, 230, 197, 230, 214, 231][prefix - 8]: return 5 return 4 # now n has between 8 and 100 bits # the optimal chunksize depends heavily on bit pattern subtleties in n # we use the two values below to get an idea on the best approach if length < 8: return [2, 2, 3, 3, 2, 2, 3, 2][prefix - 8] # number of times there are two ones with distance 2 apart in n # note this is at least 1 for prefix 10,11,13,14 and at least two for 15 h2 = hamming(n & n >> 2) # with these two numbers, we can combine a value for chunksize # that is (in average) a fraction of a multiplication from optimal # for lengths up to 17, it is optimal if prefix == 8 or prefix == 12: if length <= 14: return 2 if length > 83: return 4 return 2 if h2 < 4 + 25 // (length - 14) else 3 if prefix == 9: # this is the most complicated one if h2 < (31 - length) // 8: return 2 if length >= 60: return 4 if h2 < 3 + 110 // (64 - length) >> 1: return 4 return 3 if prefix == 10: return 3 if length + h2 < 105 else 4 if prefix == 11: # return 3 if length < 46 else 4 # there's a very complicated region around 45 to be handled here return 3 if length + (h2 - 18) ** 2 // 19 < 50 else 4 # this saves .08 multiplications for numbers of 46 bits if prefix == 13: if length <= 14 or h2 < 9 + 40 // (length - 14) >> 1: return 2 return 3 if length < 28 else 4 if prefix == 14: return 3 if length < 84 else 4 if prefix == 15: # the complicated region around 57 bits costs about .01 multiplication: # we leave that since there's no benefit anymore return 2 if h2 < 4 else (3 if length + h2 < 71 else 4) raise ValueError def fastChunksize(n): """Determine a very good chunk size for n, but don't look further than the first two digits of n. Roughly, using chunk size n costs 1<> length - 4 # very short numbers if length < 8: return [2, 2, 3, 3, 2, 2, 3, 2][prefix - 8] if length > 60: # we don't want to compute h2 for such long numbers # for odd prefix we don't need 3 if length < 84 and ~prefix & 1: return 3 if length < [198, 231, 216, 231, 198, 231, 215, 232][prefix - 8]: return 4 # in the C code we compare length >> 2 if length < [648, 670, 658, 672, 648, 671, 658, 672][prefix - 8]: return 5 return 6 # now n has between 8 and 60 bits # the optimal chunksize depends heavily on bit pattern subtleties in n # we use the h2 below to get an idea on the best approach # number of times there are two ones with distance 2 apart in n # note this is at least 1 for prefix 10,11,13,14 and at least two for 15 h2 = hamming(n & n >> 2) if length <= 60 else length // 4 # with length prefix and h2, we can combine a value for chunksize # that is (in average) a fraction of a multiplication from optimal # for lengths up to 17, it is optimal if prefix == 8 or prefix == 12: if length <= 14: return 2 if length > 83: return 4 return 2 if h2 < 4 + 25 // (length - 14) else 3 if prefix == 9: # this is the most complicated one if h2 < (31 - length) // 8: return 2 if h2 < 3 + 110 // (64 - length) >> 1: return 4 return 3 if prefix == 10 or prefix == 14: return 3 if prefix == 11: # return 3 if length < 46 else 4 # there's a very complicated region around 45 to be handled here return 3 if length + (h2 - 18) ** 2 // 19 < 50 else 4 # this saves .08 multiplications for numbers of 46 bits if prefix == 13: if length <= 14 or h2 < 9 + 40 // (length - 14) >> 1: return 2 return 3 if length < 28 else 4 if prefix == 15: # the complicated region around 57 bits costs about .01 multiplication: # we leave that since there's no benefit anymore return 2 if h2 < 4 else (3 if length + h2 < 71 else 4) raise ValueError #goodChunksize = fastChunksize def otfR_good(n, ws=30): """This optimal choice for chunk size gives pretty nice results. Here's a comparison with respect to optimal parameter choice. Method Up to 10**3 10**4 10**5 10**6 10**7 10**8 Binary 12910 178226 2283954 27836418 327657410 3780229378 Optimal 11032 154298 1970878 23957755 282553573 Good 11039 154553 1974797 24024764 283496827 3254408073 Difference -14.5% -13.3% -13.5% -13.7% -13.5% -13.9% """ return otfR(n, 32, goodChunksize(n)) # analytic stuff ------------------------------------------- def otfR_optimal(n, ws=30): return min((otfR(n, ws, k) for k in range(2, 6)), key=int) def chooseBigChunk(): """Give statistical information for very big numbers Apparently, we get something like this: - 30: what otfR_good says 30-70: 4 appears to work nicely for prefixes 13, 9, 11, 15 too 70-250: most prefer 4, some 5 250-: all numbers use 5 750: all numbers use 66 """ for p in range(100): if p % 10 == 0: print("length 8 9 10 11 12 13 14 15 h2") length = int(20 * 1.05 ** p) n = getrandbits(length - 1) | 1 << length - 1 mults = [int(otfR(n, 30, k)) for k in range(2, 7)] opt = min(mults) opts = [i for i,m in enumerate(mults, start=2) if m==opt] prefix = n >> length - 4 mask = (1 << length) - 1 h2 = hamming(mask & n & n >> 2) print(f"{length:4d}:", " " * (prefix - 8), str(min(opts))+str(max(opts)), " " * (15 - prefix), h2) def showSummary(m=9): """Print the text in the header of otfR_good """ print(end="Method To ") print(*(" " * (d-2) + f"10**{d:d}" for d in range(3, m))) binary = [sum(int(est_mul_bin(n, 30)) for n in range(10 ** d)) for d in range(3, m)] print(end="Binary ") print(*(f"{binary[d-3]:{d+3}d}" for d in range(3, m))) optimal = [sum(int(otfR_optimal(n, 30)) for n in range(10 ** d)) for d in range(3, m-1)] print(end="Optimal ") print(*(f"{optimal[d-3]:{d+3}d}" for d in range(3, m-1))) good = [sum(int(otfR_good(n, 30)) for n in range(10 ** d)) for d in range(3, m)] print(end="Good ") print(*(f"{good[d-3]:{d+3}d}" for d in range(3, m))) print(end="Difference ") print(*(f"{(good[d-3]-binary[d-3])/binary[d-3]:{d+3}.1%}" for d in range(3, m))) # If you look at the output of this routine, you see the patterns # that allow to guess the optimal chunk size # for more, see showOverviewOutput.txt def showOverview(margin=.01, ws=30, lengths=None, prefixes=None, verbose=False): """Shows a very concise overview of te best choices to make for the otfR parameter. Parameters: accuracy margin (take e.g. .001 for high accuracy (slow!) ws: word size of Python's integers lengths: iterable of bit lengths: default 6 to 100 in increasing steps prefixs: iterable of prefixes: default is all Lines are like this: [22,9:2<1;3>4]24 4 4 4 4 34 3 s 3 3 3 3 3 3 3 3 3 | | | | ^ choices for chunkSize for h2=0 and up that give value | | | | that uses almost optimal multiplications | | | | e.g. 34 means that for h2=5, both s and 4 are almost optimal | | | ^ if h2>4, you can safely use s (! means: use always) | | ^ if h2<1, you can safely use 2 | ^ prefix (first four bits of number in hex) ^ length of number in bits If the line is near optimal for 2 (prefix 89cd) or 3 (prefix abef) irrespective of h2, is it omitted. Also notice the wordsize artifacts for numbers just above 30, 60. Lower margin is more accurate, but slower of course """ samplesize = int(1 / margin**2) if prefixes is None: prefixes = range(8, 16) if lengths is None: lengths = chain( range(6,35), range(35, 65, 2), range(65, 100, 5)) for length in lengths: for prefix in prefixes: m2 = defaultdict(int) m3 = defaultdict(int) m4 = defaultdict(int) m5 = defaultdict(int) ns = range(prefix << length-4, prefix+1 << length-4) if samplesize >> length - 4: pass elif length < 30: ns = sample(ns, samplesize) else: ns = islice(iter( lambda:getrandbits(length-4)|prefix<>2) m2[h2] += int(otfR(n,ws,2)) m3[h2] += int(otfR(n,ws,3)) m4[h2] += int(otfR(n,ws,4)) m5[h2] += int(otfR(n,ws,5)) line = [] irrelevant = sum(m2.values()) * margin for k in range(max(m2)+1): m = m2[k],m3[k],m4[k],m5[k] b = min(m) * (margin + 1) line.append((''.join( str(i) for i,v in enumerate(m, start=2) if irrelevant <= v <= b)) or '-') if not verbose: prediction = str((prefix >> 1 & 1) + 2) if all(prediction in w or w=='-' for w in line): continue while line[-1] == '-': del line[-1] head = "" try: non2 = min(i for i,w in enumerate(line) if "2" not in w and w != '-') head += f"2<{non2}" except ValueError: head += "2!" try: hi3 = max(i for i,w in enumerate(line) if "3" not in w and w != '-') head += f";3>{hi3}" except ValueError: head += ";3!" try: hi4 = max(i for i,w in enumerate(line) if "4" not in w and w != '-') if hi4 < len(line) - 1: head += f";4>{hi4}" except ValueError: head += ";4!" line = ']' + ' '.join(line) if prefix == 9: line = line.replace("3", "s") print(f"[{length},{prefix:x}:" + head + line) print() #if length>15: input("?") def showOverview2(margin=.01, ws=30, lengths=None, prefixes=None): """Checks out if goodChunksize works. Parameters: accuracy margin (take e.g. .001 for high accuracy (slow!) ws: word size of Python's integers lengths: iterable of bit lengths: default 6 to 100 in increasing steps prefixs: iterable of prefixes: default is all [22,9:0.100]24 4 4 4 4 s4 s s s s s s s s s s s ^ ^ ^ ^ choices for chunkSize for h2=0 and up that give value | | | that uses almost optimal multiplications | | | e.g. 34 means that for h2=5, both s and 4 are almost optimal | | ^ number of multiplications lost per call to goodChunksize | ^ prefix (first four bits of number in hex) ^ length of number in bits """ if prefixes is None: prefixes = range(8, 16) if lengths is None: lengths = chain( range(6,35), range(35, 65, 2), range(65, 100, 5)) totalLoss = 0 samplesize = int(1 / margin**2) for length in lengths: for prefix in prefixes: m2 = defaultdict(int) m3 = defaultdict(int) m4 = defaultdict(int) m5 = defaultdict(int) m6 = defaultdict(int) choice = {} ns = range(prefix << length-4, prefix+1 << length-4) if samplesize >> length - 4: pass elif length < 30: ns = sample(ns, samplesize) else: ns = islice(iter( lambda:getrandbits(length-4)|prefix<>2) m2[h2] += int(otfR(n,ws,2)) m3[h2] += int(otfR(n,ws,3)) m4[h2] += int(otfR(n,ws,4)) m5[h2] += int(otfR(n,ws,5)) m6[h2] += int(otfR(n,ws,6)) if h2 not in choice: choice[h2] = goodChunksize(n) line = [] irrelevant = sum(m2.values()) * margin loss = 0 for k in range(max(m2)+1): m = m2[k],m3[k],m4[k],m5[k],m6[k] b = min(m) * (margin + 1) line.append((''.join( str(i) for i,v in enumerate(m, start=2) if irrelevant <= v <= b)) or '-') if k in choice: loss += m[choice[k] - 2] - min(m) while line and line[-1] == '-': del line[-1] if line[:3] == ['-', '-', '-']: firstNonDash = 0 while line[firstNonDash] == '-': firstNonDash += 1 line[:firstNonDash] = [str(firstNonDash) + '*-'] line = ']' + ' '.join(line) print(f"[{length},{prefix:x}:{loss/samples:.3f}" + line) totalLoss += loss/samples if len(prefixes) != 1: print() return totalLoss # old stuff, no longer used ------------------------------------- def genWithDoubles(length, doubles, includeZero=True): """generate binary tuples up to length m with given number of consecutive ones overlap is allowed set includeZero to False if you want to start with a 1 >>> for t in genWithDoubles(5, 2): ... print(*t) 0 0 1 1 1 0 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 0 1 """ lenMdbl = length - doubles if doubles < 0 or lenMdbl < bool(length): raise ValueError(length, doubles) if length < 3: table = { (0,0): ((),), (1,0): ((0,), (1,)), (2,0): ((0,0), (0,1), (1,0)), (2,1): ((1,1),), } if not includeZero: table[1,0] = () table[2,0] = (1,0), yield from table[length, doubles] return # only one way possible: all ones if lenMdbl == 1: yield (1,) * length return # we know 0 <= doubles and lenMdbl > 1 # put a 0 in front if includeZero: yield from map( partial(add, (0,)), genWithDoubles(length - 1, doubles)) # put 1..1 0 in front if doubles and lenMdbl > 2: for i in range(doubles): yield from map( partial(add, (1,) * (i + 1) + (0,)), genWithDoubles(length - i - 2, doubles - i)) # put, 1..1 0 in front, no doubles left yield from map( partial(add, (1,) * (doubles + 1) + (0,)), genWithDoubles(lenMdbl - 2, 0)) def genH2numbers(length, doubles, prefix): """generate numbers of length bits with given value for hamming(n & n >> 2) and four bit prefix (n >> n.bit_length() - 4) I wrote very complicated and nice code for this, but it is not faster, alas. """ mask = (1 << length - 4) - 1 if length < 30: biglist = range(prefix << length - 4, prefix + 1 << length - 4) if len(biglist) > 1000000: biglist = sample(biglist, 1000000) else: biglist = iter( lambda:getrandbits(length - 4) | prefix << length - 4, None) return islice( (n for n in biglist if hamming(mask & n & n >> 2) == doubles), 10000) def genWithInitial(length, doubles, prefix): """Like genWithDoubles, but given the initial few bits. """ if not prefix: return genWithDoubles(length, doubles) # adjust doubles for the doubles we already have initialPatValue = int(''.join(map(str, prefix)), 2) doubles -= hamming(initialPatValue & initialPatValue >> 1) # compute new length newLength = length - len(prefix) # allow all results for now; see below includeZero = True if prefix[-1]: # to make sure the pairs are counted correctly # we let the generator fix the initial bit to 1 # and clip it from the prefix newLength += 1 includeZero = False prefix = prefix[:-1] # check if there is any result to be expected if not 0 <= doubles < newLength + (newLength==0): # new length is too short; there is no solution return () # now build result return map(partial(add, prefix), genWithDoubles(newLength, doubles, includeZero)) def genH2numbers(length, doubles, prefix): """generate numbers of length bits with given value for hamming(n & n >> 2) and four bit prefix (n >> n.bit_length() - 4) Not the most readable code, but it works. >>> successes = 0 >>> for length in range(4, 12): ... mask = (1 << length - 4) - 1 ... for prefix in range(8, 16): ... for doubles in range(length - 4 >> 1): ... s = set( ... n ... for n in range(prefix << length - 4, prefix + 1 << length - 4) ... if hamming(mask & n & n >> 2) == doubles) ... if s ^ set(genH2numbers(length, doubles, prefix)): ... print(length, doubles, prefix) ... else: successes += 1 >>> successes 96 """ finalPrefix = prefix >> 2 << length - 2 # Generate even and odd bits separately, so that # the "1.1" pattern turns into consecutive bits # the first bit of the pattern is given by prefix. patLen = (length - 3) // 2 + 1 # determine upper bits of patterns, determined by prefix if length & 1: # make sure the even generator produces an extra bit # so we can zip later evenHiBits = (prefix >> 2 & 1, prefix & 1) oddHiBits = (1,) if prefix & 2 else (0,) else: evenHiBits = (1,) if prefix & 2 else (0,) oddHiBits = (1,) if prefix & 1 else (0,) # Distribute the doubles over both parts in all ways maxDoubles = patLen - 1 for evenDoubles in range(max(0, doubles - maxDoubles), min(doubles, maxDoubles) + 1): # there's an extra double in evenHiBits that doesn't count evenGen = genWithInitial(patLen, evenDoubles + (evenHiBits == (1,1)), evenHiBits) oddGen = genWithInitial(patLen, doubles - evenDoubles, oddHiBits) for even, odd in product(evenGen, oddGen): # weave the bits, build a string, convert to binary yield int( ''.join(map( str, chain.from_iterable(zip(even, odd)) )), 2) | finalPrefix def mulJ(n, ws=30, chunkSize=5): """My way of doing chunkSize bits at a time Bit pattern: 00100111 00010000 sqr mult oneSquare table 1 3 0 001 0 1 3 00111 5 1 0 0001 4 1 0 0000 4 0 0 >>> mulJ(10000, 8, 3) Mults(squares=12, mults=6, empty=0, chunkSize=3) """ # make table squares, mults = 0, (1 << chunkSize - 1) - 1 if chunkSize > 1: squares += 1 resultPower = 0 for digit in reversed(digits(n, ws)): resultShift = ws while digit: # compute active position bitPos = max(digit.bit_length(), chunkSize) - chunkSize if not resultPower: # first part of first digit resultPower = digit >> bitPos if resultPower & 1: pass # from table elif resultPower == 2: pass # from variable else: mults += 1 # from previous table entry resultShift = bitPos else: # make digit >> bitPos odd while digit >> bitPos & 1 == 0: bitPos += 1 # shift to bitPos, multiply with table entry squares += resultShift - bitPos resultPower <<= resultShift - bitPos resultShift = bitPos mults += 1 resultPower += digit >> resultShift digit &= (1 << resultShift) - 1 squares += resultShift resultPower <<= resultShift if resultPower != n: raise AssertionError return Mults(squares, mults, 0, chunkSize) def est_mulJ(n, ws=30, chunkSize=5): """Estimate output of mulJ >>> est_mulJ(10000, 8, 3) Mults(squares=12, mults=6, empty=0, chunkSize=3) """ squares, mults = (chunkSize>1), (1 << chunkSize - 1) - 1 if not n: return Mults(squares, mults, 0, chunkSize) dg = digits(n, ws) pat = "0*(.{,%d}1)" % (chunkSize - 1) mults += sum( len(re.findall(pat, format(digit, 'b'))) for digit in dg) squares += ws * (len(dg) - 1) first = format(dg[-1], 'b') m = first[:chunkSize] if m[-1] == '1' or m == '10': mults -= 1 squares += len(first) - len(m) return Mults(squares, mults, 0, chunkSize) # extended tests --------------------------------------------------- __test__ = { 'mul_bin_small': ''' >>> for i in range(256): ... if est_mul_bin(i,4) != mul_bin(i, 4): ... print(i, est_mul_bin(i,4), mul_bin(i, 4)) ''', 'mul_bin_big': ''' >>> for b in range(8, 100): ... r = getrandbits(b) ... if est_mul_bin(r, 15) != mul_bin(r, 15): ... print(i, est_mul_bin(i,15), mul_bin(i, 15)) ... if est_mul_bin(r, 30) != mul_bin(r, 30): ... print(i, est_mul_bin(i,30), mul_bin(i, 30)) ''', 'mulP_small': ''' >>> for i in range(256): ... if est_mulP(i,4,2) != mulP(i, 4, 2): ... print(i, est_mulP(i,4, 2), mulP(i, 4, 2)) ''', 'mulP_big': ''' >>> for b in range(8, 100): ... r = getrandbits(b) ... if est_mulP(r, 15, 3) != mulP(r, 15, 3): ... print(b, est_mulP(r, 15, 3), mulP(r, 15, 3)) ... if est_mulP(r, 30, 5) != mulP(r, 30, 5): ... print(b, est_mulP(r, 30, 5), mulP(r, 30, 5)) ''', 'mulJ_small': ''' >>> mulJ(0, 15, 1) Mults(squares=0, mults=0, empty=0, chunkSize=1) >>> mulJ(0, 15, 3) Mults(squares=1, mults=3, empty=0, chunkSize=3) >>> mulJ(0, 15, 5) Mults(squares=1, mults=15, empty=0, chunkSize=5) >>> for i in range(12): ... mulJ(i, 8, 3) #doctest: +REPORT_NDIFF Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=1, mults=4, empty=0, chunkSize=3) Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=1, mults=4, empty=0, chunkSize=3) Mults(squares=1, mults=3, empty=0, chunkSize=3) Mults(squares=2, mults=4, empty=0, chunkSize=3) Mults(squares=2, mults=5, empty=0, chunkSize=3) Mults(squares=2, mults=3, empty=0, chunkSize=3) Mults(squares=2, mults=4, empty=0, chunkSize=3) ''', 'mulJ_big': ''' >>> for b in range(8, 20): ... r = getrandbits(b) ... if est_mulJ(r, 15, 3) != mulJ(r, 15, 3): ... print(b, r, est_mulJ(r, 15, 3), mulJ(r, 15, 3)) ... if est_mulJ(r, 30, 5) != mulJ(r, 30, 5): ... print(b, r, est_mulJ(r, 30, 5), mulJ(r, 30, 5)) ... # doctest: ''', 'otfR_small': ''' >>> otfR(0x1b, 15, 2) Mults(squares=4, mults=2, empty=0, chunkSize=2) >>> otfR(0x27, 15, 3) Mults(squares=3, mults=5, empty=0, chunkSize=3) >>> otfR(0x73, 15, 3) Mults(squares=5, mults=4, empty=0, chunkSize=3) >>> for i in range(10): ... print (i, otfR(i, 8, 2 + i % 2)) #doctest: +REPORT_NDIFF 0 Mults(squares=0, mults=0, empty=0, chunkSize=2) 1 Mults(squares=0, mults=0, empty=0, chunkSize=3) 2 Mults(squares=1, mults=0, empty=0, chunkSize=2) 3 Mults(squares=1, mults=1, empty=0, chunkSize=3) 4 Mults(squares=2, mults=0, empty=0, chunkSize=2) 5 Mults(squares=1, mults=2, empty=0, chunkSize=3) 6 Mults(squares=2, mults=1, empty=0, chunkSize=2) 7 Mults(squares=1, mults=3, empty=0, chunkSize=3) 8 Mults(squares=3, mults=0, empty=0, chunkSize=2) 9 Mults(squares=3, mults=1, empty=0, chunkSize=3) ''', 'quality': ''' >>> sum(int(est_mulP(n, 30, 5)) for n in range(1024)) 38688 >>> sum(int(est_mulJ(n, 30, 5)) for n in range(1024)) 21934 >>> sum(int(est_mulP(n, 30, 3)) for n in range(1024)) 17832 >>> sum(int(est_mulP(n, 30, 2)) for n in range(1024)) 14424 >>> sum(int(est_mul_bin(n, 30)) for n in range(1024)) 13314 >>> sum(int(est_mulJ(n, 30, 3)) for n in range(1024)) 12482 >>> sum(int(est_mulJ(n, 30, 2)) for n in range(1024)) 11724 >>> sum(int(otfR(n, 30, 3)) for n in range(1024)) 11581 >>> sum(int(otfR(n, 30, 2)) for n in range(1024)) 11578 >>> sum(int(otfR(n, 30, 2)) > int(est_mul_bin(n, 30)) for n in range(1024)) 0 ''', } def setupDoctest(): import ast parsed = ast.parse(open(__file__).read(), "doctest") doctypes = ast.Module, ast.FunctionDef, ast.ClassDef for node in ast.walk(parsed): if isinstance(node, doctypes): d = ast.get_docstring(node, True) if d: if getattr(node, "name", "module") in __test__: print("Doubly defined:", node.name) __test__[getattr(node, "name", "module")] = d if __name__ == "__main__": try: assert False setupDoctest() except AssertionError: pass print("Testing...") import doctest print(doctest.testmod()) if False: while True: prefix = 4 l = int(input("length:")) n = getrandbits(l-3)|prefix<