import urllib.parse import posixpath import os def translate_path_old(path): """Translate a /-separated PATH to the local filename syntax. Components that mean special things to the local file system (e.g. drive or directory names) are ignored. (XXX They should probably be diagnosed.) """ # abandon query parameters path = path.split('?',1)[0] path = path.split('#',1)[0] # Don't forget explicit trailing slash when normalizing. Issue17324 trailing_slash = path.rstrip().endswith('/') try: path = urllib.parse.unquote(path, errors='surrogatepass') except UnicodeDecodeError: path = urllib.parse.unquote(path) path = posixpath.normpath(path) words = path.split('/') words = filter(None, words) path = os.getcwd() for word in words: drive, word = os.path.splitdrive(word) head, word = os.path.split(word) if word in (os.curdir, os.pardir): continue path = os.path.join(path, word) if trailing_slash: path += '/' return path def translate_path_new(path): """Translate a /-separated PATH to the local filename syntax. Components that mean special things to the local file system (e.g. drive or directory names) are ignored. (XXX They should probably be diagnosed.) """ # abandon query parameters path = path.split('?',1)[0] path = path.split('#',1)[0] # Don't forget explicit trailing slash when normalizing. Issue17324 trailing_slash = path.rstrip().endswith('/') try: path = urllib.parse.unquote(path, errors='surrogatepass') except UnicodeDecodeError: path = urllib.parse.unquote(path) # Why is this posixpath.normpath? # Really ignore backslashes on windows? path = posixpath.normpath(path) words = path.split('/') # <- only '/'? words = filter(None, words) path = os.getcwd() for word in words: # Call split and splitdrive multiple times until # word does not change anymore. has_changed = True while has_changed: previous_word = word _, word = os.path.split(word) _, word = os.path.splitdrive(word) has_changed = word != previous_word # Discard words that are '.' and '..' if word in (os.curdir, os.pardir): continue # WARNING: # We do not want any '..' in path. # However: # os.path.join('c:/secret/public', 'c:..') # == 'c:/secret/public\\..' path = os.path.join(path, word) if trailing_slash: path += '/' return path def can_reach_secret_file(path): try: open(path).close() return True except (FileNotFoundError, OSError) as e: return False def fuzz(translate_path_func, depth): import itertools for repeat in range(depth): for parts in itertools.product(["asdf", " ", ".", "/", "..", "\\", "c:"], repeat = repeat): path = "".join(parts) + "secret.txt" escaped_path = translate_path_func(path) if can_reach_secret_file(escaped_path): print("possible path traversal with path:", path) def main(): # make a secret file that should not be reachable secret_path = "../secret.txt" open(secret_path, "w").close() assert(can_reach_secret_file(secret_path)) print("translate_path_old:") fuzz(translate_path_old, 6) print("") print("translate_path_new:") fuzz(translate_path_new, 7) print("ok") if __name__ == "__main__": main()