#!python """Print or convert numbers in any base. By Jurjen Bos, 2016. Works in Python 2 and Python 3. Subquadratic algorithm that starts to be faster than str(n) when n has more than about 100000 bits. Uses Barret algorithm to use (Karatsuba) multiplications instead of division. Also supports other number bases. Reusing the BaseConverter object saves about half the time, because the first step of the computation can be skipped. Writing to a file like object (anything that supports 'write'): >>> BaseConverter(36).write(1<<200, sys.stdout); print() bnklg118comha6gqury14067gur54n8won6guf4 Producing a string (internally uses write to build the string): >>> BaseConverter(31).str(572482572) 'jurjen' Reusing a BaseConverter object: >>> b = BaseConverter(10) >>> b.str(1<<4321)==str(1<<4321) True >>> #faster than building a new BaseConverter... >>> b.str(1<<1234)==str(1<<1234) True I converted the latest Mersenne number in 18.5 minutes (instead of hours). Warning when timing this routine: note that the timing is dependent on how the "bigBase" blocks fit in the number; numbers that are just below a bigBase**2**i are relatively fast, while numbers that are just over a bigBase**2**i are relatively slow. Oh yes, and running this under PyPy doesn't help; as PyPy doesn't have Karatsuba multiplication there is nothing to win, so it is just slower. """ from __future__ import print_function import sys import string from math import log class StringCollector: """Simple class to collect strings. Like StringIO, without the unicode. Used to convert write to str in BaseConverter """ def __init__(self): self.data = '' def write(self, s): self.data += s def getvalue(self): return self.data class BaseConverter: """Class to convert numbers to strings in any base, faster even than str If you keep a BaseConverter object around, conversion will be much faster, since about half the time is the construction of the power table. base: the number base bigBaseDigits: number of digits converted in one step bigBase: base**bigBaseDigits table: the table used for the conversion; see expand and __len__ >>> BaseConverter(10).str(1234) '1234' """ #apparently, about twice the wordsize is optimal baseSize = 60 #the digits for all number bases DIGITS = string.digits+string.ascii_lowercase def __init__(self, base): self.base = base #determine the number base we are working in: bigBase self.bigBaseDigits = int( log(2)*self.baseSize/log(base) ) self.bigBase = base**self.bigBaseDigits #set up power table x = self.bigBase k = x.bit_length()*2+2 y = (1<>k (and this is much faster for long numbers) """ bigBase = self.bigBase x, y, k = self.table[-1] #write x as xm<>xe while len(self)>k1*2 y = y1*y1 >> 2*k1-k #compute error from 1<> k-xe #adjust error error -= correction*xm #correct one more if needed if error>xm: correction +=1 error -= xm #now y+correction must be correct assert 0 <= error < xm #now compute y exactly y += correction self.table.append((xm<>> int(BaseConverter(17).str(1<<1000), 17)==1<<1000 True """ if self.base in (2, 8, 16): return self.simple_str(n) s = StringCollector() self.write(n, s) return s.getvalue() def write(self, n, out=sys.stdout): """Write digits of n into output stream. Uses a divide and conquer algorithm dividing n by bigBase**2**i >>> BaseConverter(10).write(1<<232, sys.stdout); print() 6901746346790563787434755862277025452451108972170386555162524223799296 """ if self.base in (2, 4, 8, 16, 32): #optimize for base that is a power of two self._write2power(n, out) return outfileWrite = out.write def recurse(n, i, suppress): """Internal function: write n to output stream, assuming n has fewer than bigBaseDigits*2**i digits and suppress leading zeroes if requested """ #tail recursion expansion for lower half of n while i: i -=1 x,y,k = self.table[i] if suppress and n>k #put lower half back in n n -= nHi*x #nHi may be at most one to small if n >= x: nHi, n = nHi+1, n-x assert n>> BaseConverter(32).str(3**214) 'i4qj4ttib12j6mgd6hsq7fmq2mgnoi6o7mnvuupjkgp6ike0mt56rk254pdt29902lop' """ outfileWrite = out.write logBigBase = self.bigBase.bit_length()-1 def recurse(n, i, suppress): """Internal function: write n to output stream, assuming n has fewer than bigBaseDigits*2**i digits and suppress leading zeroes if requested """ #tail recursion expansion for lower half of n while i: i -=1 logx = logBigBase<>logx #put lower half back in n n &= (1<>> BaseConverter(31).simple_str(572482572, 7) '0jurjen' >>> BaseConverter(10).simple_str(1<<10000)==str(1<<10000) True """ base = self.base #speedup cases with internal routines if base==2: return bin(n)[2:].rjust(digits, '0') elif base==8: return '%0*o'%(digits, n) elif base==10: return str(n).rjust(digits, '0') elif base==16: return '%0*x'%(digits, n) #other bases are harder result = '' while n or len(result)>> int(BaseConverter(32)._simpleStr2power(1<<10000), 32)==1<<10000 True """ logbase = self.base.bit_length()-1 mask = self.base-1 #other bases are harder result = '' while n or len(result)>=logbase return result def __len__(self): """Give length of longest number that can be output without having to call expand >>> len(BaseConverter(10)) in (120, 240) True """ #we allow twice the length of x, which is equal to k-2 return self.table[-1][2]-2 __test__ = { 'sweat': """ >>> from random import getrandbits >>> for base in range(2, 37): ... n = getrandbits(1000) ... assert int(BaseConverter(base).str(n), base)==n """, } if __name__=='__main__': import doctest doctest.testmod() #my way of testing: #try: from time import process_time as time #except ImportError: from time import clock as time # #from random import uniform,getrandbits #for base in range(2,37): # b = BaseConverter(base) # if b not in (2, 4, 8, 16, 32): b.expand(1<<21) # baseType = {2:1,8:1,16:1, 4:2,32:2, 10:3}.get(base, 4) # for i in range(5): # n = getrandbits(int(10**uniform(5, 6.3))) # t = time() # a = b.str(n) # t = time()-t # print('%02d %7d %7.4f %d'%(base, n.bit_length(), t, baseType)) # #base = 10 #for i in range(10): # n = getrandbits(int(10**uniform(5, 6.3))) # t = time() # a = str(n) # t = time()-t # print('%02d %7d %7.4f %d'%(base, n.bit_length(), t, baseType)) #