#Take the standard base64 alphabet as "A-Za-Z0-9+/" def b64decode(b64_string, alphabet=None): padding='=' #If they've provided no alphabet, decode with the standard if(not alphabet): alphabet=[] #Uppercase, lowercase, numbers, symbols for code in range(65,91): alphabet.append(chr(code)) for code in range(97,123): alphabet.append(chr(code)) for code in range(48,58): alphabet.append(chr(code)) alphabet.extend(['+','/']) else: #If they have provided an alphabet, it must contain 64 distinct values alphaSet=set(alphabet) lenValid=(len(alphaSet)==64 and len(alphabet)==64) if(not lenValid): raise ValueError("Invalid alphabet provided") #Now we have an alphabet to work with - create a dict, inverse of the alphabet list, for fast referencing alphaDict={} for i in range(len(alphabet)): alphaDict[alphabet[i]]=i #Now check that the input string is valid lenValid=(len(b64_string)%4 == 0) #Length must be a multiple of 4 #All chars in the input string must be in the alphabet, or be the padding char '=' alphabetValid=True for x in b64_string: if(x not in alphaDict and x is not padding): alphabetValid=False break if(not lenValid or not alphabetValid): raise ValueError("Invalid base64 input string") #Split b64_string into 4-char chunks chunks=[b64_string[i:i+4] for i in range(0,len(b64_string),4)] #Check the validity of each chunk for chunk in chunks: #These are the trivial invalid padding cases which are currently dealt with paddingValid=True if(chunk[0] is padding or chunk[1] is padding): paddingValid=False #A single byte affects the first two 6-bit entries elif(chunk[2] is padding and chunk[3] is not padding): paddingValid=False #Non-padding chars can't come after padding if(not paddingValid): raise ValueError("Invalid base64 input string") #These are the cases that the current method doesn't deal with - when the padding is non-obviously invalid subtlePaddingError=False #If the last two chars are padding: if(chunk[2] is padding): #That implies that there was exactly 1 initial input byte to form this chunk, and hence that any bit after the 8th must be #a padding-produced 0. If we find one that isn't, then it's an invalid chunk of base64 encoding. bitsAfter8 = alphaDict[chunk[1]] % 16 if(bitsAfter8): subtlePaddingError=True #If only the last char is padding: elif(chunk[3] is padding): #That implies that there were exactly 2 initial input bytes to form this chunk, and hence that any bit after the 16th must be #a padding-produced 0. If we find one that isn't, then it's an invalid chunk of base64 encoding. bitsAfter16 = alphaDict[chunk[3]] % 4 if(bitsAfter16): subtlePaddingError=True if(subtlePaddingError): raise ValueError("Invalid base64 input string") #Now we have a list of valid 4-char chunks, so we can decode them into bytes decoded_bytes=[] for chunk in chunks: byte_1=(alphaDict[chunk[0]] << 2) + (alphaDict[chunk[1]] >> 4) byte_2=0 if(chunk[2] is not padding): byte_2 = ((alphaDict[chunk[1]] % 16) << 4) + (alphaDict[chunk[2]] >> 2) byte_3=0 if(chunk[3] is not padding): byte_3 = ((alphaDict[chunk[2]] % 4) << 6) + (alphaDict[chunk[3]]) decoded_bytes.extend([byte_1, byte_2, byte_3]) #Create the byte object out of the list of decoded bytes, and return it return bytes(decoded_bytes) #The standard just calls the general function with no alphabet argument, so it defaults to the default, as described in line 1 def standard_b64decode(b64_string): return b64decode(b64_string)