diff -r d9c98730e2e8 -r f5c57ba1124b Lib/ntpath.py --- a/Lib/ntpath.py Sat Jul 07 13:34:50 2012 +1000 +++ b/Lib/ntpath.py Sat Jul 07 15:32:29 2012 +0200 @@ -35,48 +35,41 @@ altsep = '/' devnull = 'nul' -def _get_empty(path): - if isinstance(path, bytes): - return b'' +# constants to be used internally so that we make the code more maintainable +_empty = '' +_sep = '\\' +_altsep = '/' +_bothsep = '\\/' +_dot = '.' +_colon = ':' +_tilde = '~' + +# quick look up to avoid calling instance a huge amounts of times +unicode_bytes_map = { + _empty: b'', + _sep: b'\\', + _altsep: b'/', + _bothsep: b'\\/', + _dot: b'.', + _colon: b':', + _tilde: b'~', +} + + +def _get_literal(constant, is_bytes): + if is_bytes: + return unicode_bytes_map[constant] else: - return '' + return constant -def _get_sep(path): - if isinstance(path, bytes): - return b'\\' - else: - return '\\' -def _get_altsep(path): - if isinstance(path, bytes): - return b'/' - else: - return '/' - -def _get_bothseps(path): - if isinstance(path, bytes): - return b'\\/' - else: - return '\\/' - -def _get_dot(path): - if isinstance(path, bytes): - return b'.' - else: - return '.' - -def _get_colon(path): - if isinstance(path, bytes): - return b':' - else: - return ':' - -def _get_special(path): - if isinstance(path, bytes): +def _get_special(path, is_bytes): + if is_bytes: return (b'\\\\.\\', b'\\\\?\\') else: return ('\\\\.\\', '\\\\?\\') + # Normalize the case of a pathname and map slashes to backslashes. # Other normalizations (such as optimizing '../' away) are not done # (this is done by normpath). @@ -88,7 +81,9 @@ if not isinstance(s, (bytes, str)): raise TypeError("normcase() argument must be str or bytes, " "not '{}'".format(s.__class__.__name__)) - return s.replace(_get_altsep(s), _get_sep(s)).lower() + is_bytes = isinstance(s, bytes) + return s.replace(_get_literal(_altsep, is_bytes), + _get_literal(_sep, is_bytes)).lower() # Return whether a path is absolute. @@ -100,7 +95,8 @@ def isabs(s): """Test whether a path is absolute""" s = splitdrive(s)[1] - return len(s) > 0 and s[:1] in _get_bothseps(s) + is_bytes = isinstance(s, bytes) + return len(s) > 0 and s[:1] in _get_literal(_bothsep, is_bytes) # Join two (or more) paths. @@ -109,9 +105,10 @@ """Join two or more pathname components, inserting "\\" as needed. If any component is an absolute path, all previous path components will be discarded.""" - sep = _get_sep(a) - seps = _get_bothseps(a) - colon = _get_colon(a) + is_bytes = isinstance(a, bytes) + sep = _get_literal(_sep, is_bytes) + seps = _get_literal(_bothsep, is_bytes) + colon = _get_literal(_colon, is_bytes) path = a for b in p: b_wins = 0 # set to 1 iff b makes path irrelevant @@ -204,9 +201,10 @@ Paths cannot contain both a drive letter and a UNC path. """ - empty = _get_empty(p) + is_bytes = isinstance(p, bytes) + empty = _get_literal(_empty, is_bytes) if len(p) > 1: - sep = _get_sep(p) + sep = _get_literal(_sep, is_bytes) normp = normcase(p) if (normp[0:2] == sep*2) and (normp[2:3] != sep): # is a UNC path: @@ -224,7 +222,7 @@ if index2 == -1: index2 = len(p) return p[:index2], p[index2:] - if normp[1:2] == _get_colon(p): + if normp[1:2] == _get_literal(_colon, is_bytes): return p[:2], p[2:] return empty, p @@ -244,7 +242,8 @@ import warnings warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead", DeprecationWarning) - sep = _get_sep(p) + is_bytes = isinstance(p, bytes) + sep = _get_literal(_sep, is_bytes) if not p[1:2]: return p[:0], p # Drive letter present firstTwo = p[0:2] @@ -276,7 +275,8 @@ Return tuple (head, tail) where tail is everything after the final slash. Either part may be empty.""" - seps = _get_bothseps(p) + is_bytes = isinstance(p, bytes) + seps = _get_literal(_bothsep, is_bytes) d, p = splitdrive(p) # set i to index beyond p's last slash i = len(p) @@ -297,8 +297,12 @@ # It is always true that root + ext == p. def splitext(p): - return genericpath._splitext(p, _get_sep(p), _get_altsep(p), - _get_dot(p)) + is_bytes = isinstance(p, bytes) + return genericpath._splitext(p, + _get_literal(_sep, is_bytes), + _get_literal(_altsep, is_bytes), + _get_literal(_dot, is_bytes)) + splitext.__doc__ = genericpath._splitext.__doc__ @@ -344,7 +348,8 @@ def ismount(path): """Test whether a path is a mount point (defined as root of drive)""" - seps = _get_bothseps(path) + is_bytes = isinstance(path, bytes) + seps = _get_literal(_bothsep, is_bytes) root, rest = splitdrive(path) if root and root[0] in seps: return (not rest) or (rest in seps) @@ -364,14 +369,13 @@ """Expand ~ and ~user constructs. If user or $HOME is unknown, do nothing.""" - if isinstance(path, bytes): - tilde = b'~' - else: - tilde = '~' + is_bytes = isinstance(path, bytes) + tilde = _get_literal(_tilde, is_bytes) + if not path.startswith(tilde): return path i, n = 1, len(path) - while i < n and path[i] not in _get_bothseps(path): + while i < n and path[i] not in _get_literal(_bothsep, is_bytes): i += 1 if 'HOME' in os.environ: @@ -387,7 +391,7 @@ drive = '' userhome = join(drive, os.environ['HOMEPATH']) - if isinstance(path, bytes): + if is_bytes: userhome = userhome.encode(sys.getfilesystemencoding()) if i != 1: #~user @@ -413,7 +417,9 @@ """Expand shell variables of the forms $var, ${var} and %var%. Unknown variables are left unchanged.""" - if isinstance(path, bytes): + is_bytes = isinstance(path, bytes) + + if is_bytes: if ord('$') not in path and ord('%') not in path: return path import string @@ -459,13 +465,13 @@ index = pathlen - 1 else: var = path[:index] - if isinstance(path, bytes): + if is_bytes: var = var.decode('ascii') if var in os.environ: value = os.environ[var] else: value = '%' + var + '%' - if isinstance(path, bytes): + if is_bytes: value = value.encode('ascii') res += value elif c == dollar: # variable or '$$' @@ -476,22 +482,22 @@ path = path[index+2:] pathlen = len(path) try: - if isinstance(path, bytes): + if is_bytes: index = path.index(b'}') else: index = path.index('}') var = path[:index] - if isinstance(path, bytes): + if is_bytes: var = var.decode('ascii') if var in os.environ: value = os.environ[var] else: value = '${' + var + '}' - if isinstance(path, bytes): + if is_bytes: value = value.encode('ascii') res += value except ValueError: - if isinstance(path, bytes): + if is_bytes: res += b'${' + path else: res += '${' + path @@ -501,7 +507,7 @@ index += 1 c = path[index:index + 1] while c and c in varchars: - if isinstance(path, bytes): + if is_bytes: var += c.decode('ascii') else: var += c @@ -511,7 +517,7 @@ value = os.environ[var] else: value = '$' + var - if isinstance(path, bytes): + if is_bytes: value = value.encode('ascii') res += value if c: @@ -528,16 +534,18 @@ def normpath(path): """Normalize path, eliminating double slashes, etc.""" - sep = _get_sep(path) - dotdot = _get_dot(path) * 2 - special_prefixes = _get_special(path) + is_bytes = isinstance(path, bytes) + sep = _get_literal(_sep, is_bytes) + dot = _get_literal(_dot, is_bytes) + dotdot = dot * 2 + special_prefixes = _get_special(path, is_bytes) if path.startswith(special_prefixes): # in the case of paths with these prefixes: # \\.\ -> device names # \\?\ -> literal paths # do not do any normalization, but return the path unchanged return path - path = path.replace(_get_altsep(path), sep) + path = path.replace(_get_literal(_altsep, is_bytes), sep) prefix, path = splitdrive(path) # collapse initial backslashes @@ -548,13 +556,13 @@ comps = path.split(sep) i = 0 while i < len(comps): - if not comps[i] or comps[i] == _get_dot(path): + if not comps[i] or comps[i] == dot: del comps[i] elif comps[i] == dotdot: if i > 0 and comps[i-1] != dotdot: del comps[i-1:i+1] i -= 1 - elif i == 0 and prefix.endswith(_get_sep(path)): + elif i == 0 and prefix.endswith(sep): del comps[i] else: i += 1 @@ -562,7 +570,7 @@ i += 1 # If the path is now empty, substitute '.' if not prefix and not comps: - comps.append(_get_dot(path)) + comps.append(dot) return prefix + sep.join(comps) @@ -604,10 +612,11 @@ def relpath(path, start=curdir): """Return a relative version of a path""" - sep = _get_sep(path) + is_bytes = isinstance(path, bytes) + sep = _get_literal(_sep, is_bytes) if start is curdir: - start = _get_dot(path) + start = _get_literal(_dot, is_bytes) if not path: raise ValueError("no path specified") @@ -630,13 +639,11 @@ break i += 1 - if isinstance(path, bytes): - pardir = b'..' - else: - pardir = '..' + dot = _get_literal(_dot, is_bytes) + pardir = dot * 2 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] if not rel_list: - return _get_dot(path) + return dot return join(*rel_list)