As the pure-Python code below demonstrates, when called as `python3 test.py jitshift` it is much faster than when called as `python3 test.py builtinops` (which takes many seconds/minutes past the 24th iteration). I am using fedora 32 x86-64 on a Cannon lake intel core i5 NUC. Tested with latest cpython3 git master and with /usr/bin/pypy3
The improvement was done in pure-python / userland and may be improved upon further given these or others: https://gmplib.org/ and https://github.com/ridiculousfish/libdivide .
```
#!/usr/bin/env python3
# The Expat License
#
# Copyright (c) 2020, Shlomi Fish
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import sys
OPT = "builtinops"
def mytest(p):
pint2p = p << p
myrange = range(1000)
if OPT == "builtinpow":
ret = pow(2, (1 << p), pint2p)
elif OPT == "builtinops":
ret = 2
for i in myrange:
print('sq', i, flush=True)
ret *= ret
print('mod', i, flush=True)
ret %= pint2p
print('after mod', i, (ret % 10 ** 20), flush=True)
else:
class ShiftMod:
"""docstring for ShiftMod:d"""
def __init__(self, base, shift):
self.base = base
self.shift = shift
self.mask = (1 << shift) - 1
self.n = base << shift
def mod(self, inp):
if inp < self.n:
return inp
return ((((inp >> self.shift) % self.base)
<< self.shift) | (inp & self.mask))
def gen_shift_mod(x):
s = 0
for offset in (200000, 20000, 2000, 200, 20, 1):
bits_mask = (1 << offset) - 1
while x & bits_mask == 0:
s += offset
x >>= offset
return ShiftMod(x, s)
ret = 2
if OPT == "shiftmodpre":
m = ShiftMod(p, p)
for i in myrange:
print('sq', i, flush=True)
ret *= ret
print('mod', i, flush=True)
# m = gen_shift_mod(pint2p)
ret = m.mod(ret)
print('after mod', i, (ret % 10 ** 20), flush=True)
elif OPT == "gen_shift_mod":
m = gen_shift_mod(pint2p)
for i in myrange:
print('sq', i, flush=True)
ret *= ret
print('mod', i, flush=True)
ret = m.mod(ret)
print('after mod', i, (ret % 10 ** 20), flush=True)
elif OPT == "jitshift":
for i in myrange:
print('sq', i, flush=True)
ret *= ret
print('mod', i, flush=True)
ret = gen_shift_mod(pint2p).mod(ret)
print('after mod', i, (ret % 10 ** 20), flush=True)
return ret % (p*p) // p
def main(which):
global OPT
OPT = which
'''
if which == "builtinpow":
OPT = 1
elif which == "builtinops":
OPT = 0
elif which == "shiftmod":
OPT = 2
else:
raise Exception("unknown choice")
'''
x = mytest(9816593)
print(x)
return
if __name__ == "__main__":
main(sys.argv[1])
``` |