Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(28233)

Delta Between Two Patch Sets: Lib/posixpath.py

Issue 10395: new os.path function to extract common prefix based on path components
Left Patch Set: Created 6 years, 11 months ago
Right Patch Set: Created 5 years, 3 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Lib/ntpath.py ('k') | Lib/test/test_ntpath.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """Common operations on Posix pathnames. 1 """Common operations on Posix pathnames.
2 2
3 Instead of importing this module directly, import os and refer to 3 Instead of importing this module directly, import os and refer to
4 this module as os.path. The "os.path" name is an alias for this 4 this module as os.path. The "os.path" name is an alias for this
5 module on Posix systems; on other systems (e.g. Mac, Windows), 5 module on Posix systems; on other systems (e.g. Mac, Windows),
6 os.path provides the same operations in a manner specific to that 6 os.path provides the same operations in a manner specific to that
7 platform, and is an alias to another module (e.g. macpath, ntpath). 7 platform, and is an alias to another module (e.g. macpath, ntpath).
8 8
9 Some of this can actually be useful on non-Posix systems too, e.g. 9 Some of this can actually be useful on non-Posix systems too, e.g.
10 for manipulation of the pathname component of URLs. 10 for manipulation of the pathname component of URLs.
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
42 else: 42 else:
43 return '/' 43 return '/'
44 44
45 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. 45 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
46 # On MS-DOS this may also turn slashes into backslashes; however, other 46 # On MS-DOS this may also turn slashes into backslashes; however, other
47 # normalizations (such as optimizing '../' away) are not allowed 47 # normalizations (such as optimizing '../' away) are not allowed
48 # (another function should be defined to do that). 48 # (another function should be defined to do that).
49 49
50 def normcase(s): 50 def normcase(s):
51 """Normalize case of pathname. Has no effect under Posix""" 51 """Normalize case of pathname. Has no effect under Posix"""
52 # TODO: on Mac OS X, this should really return s.lower().
53 if not isinstance(s, (bytes, str)): 52 if not isinstance(s, (bytes, str)):
54 raise TypeError("normcase() argument must be str or bytes, " 53 raise TypeError("normcase() argument must be str or bytes, "
55 "not '{}'".format(s.__class__.__name__)) 54 "not '{}'".format(s.__class__.__name__))
56 return s 55 return s
57 56
58 57
59 # Return whether a path is absolute. 58 # Return whether a path is absolute.
60 # Trivial in Posix, harder on the Mac or MS-DOS. 59 # Trivial in Posix, harder on the Mac or MS-DOS.
61 60
62 def isabs(s): 61 def isabs(s):
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 return head 155 return head
157 156
158 157
159 # Is a path a symbolic link? 158 # Is a path a symbolic link?
160 # This will always return false on systems where os.lstat doesn't exist. 159 # This will always return false on systems where os.lstat doesn't exist.
161 160
162 def islink(path): 161 def islink(path):
163 """Test whether a path is a symbolic link""" 162 """Test whether a path is a symbolic link"""
164 try: 163 try:
165 st = os.lstat(path) 164 st = os.lstat(path)
166 except (os.error, AttributeError): 165 except (OSError, AttributeError):
167 return False 166 return False
168 return stat.S_ISLNK(st.st_mode) 167 return stat.S_ISLNK(st.st_mode)
169 168
170 # Being true for dangling symbolic links is also useful. 169 # Being true for dangling symbolic links is also useful.
171 170
172 def lexists(path): 171 def lexists(path):
173 """Test whether a path exists. Returns True for broken symbolic links""" 172 """Test whether a path exists. Returns True for broken symbolic links"""
174 try: 173 try:
175 os.lstat(path) 174 os.lstat(path)
176 except os.error: 175 except OSError:
177 return False 176 return False
178 return True 177 return True
179 178
180 179
181 # Are two filenames really pointing to the same file?
182
183 def samefile(f1, f2):
184 """Test whether two pathnames reference the same actual file"""
185 s1 = os.stat(f1)
186 s2 = os.stat(f2)
187 return samestat(s1, s2)
188
189
190 # Are two open files really referencing the same file?
191 # (Not necessarily the same file descriptor!)
192
193 def sameopenfile(fp1, fp2):
194 """Test whether two open file objects reference the same file"""
195 s1 = os.fstat(fp1)
196 s2 = os.fstat(fp2)
197 return samestat(s1, s2)
198
199
200 # Are two stat buffers (obtained from stat, fstat or lstat)
201 # describing the same file?
202
203 def samestat(s1, s2):
204 """Test whether two stat buffers reference the same file"""
205 return s1.st_ino == s2.st_ino and \
206 s1.st_dev == s2.st_dev
207
208
209 # Is a path a mount point? 180 # Is a path a mount point?
210 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) 181 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
211 182
212 def ismount(path): 183 def ismount(path):
213 """Test whether a path is a mount point""" 184 """Test whether a path is a mount point"""
214 if islink(path):
215 # A symlink can never be a mount point
216 return False
217 try: 185 try:
218 s1 = os.lstat(path) 186 s1 = os.lstat(path)
219 if isinstance(path, bytes): 187 except OSError:
220 parent = join(path, b'..') 188 # It doesn't exist -- so not a mount point. :-)
221 else: 189 return False
222 parent = join(path, '..') 190 else:
191 # A symlink can never be a mount point
192 if stat.S_ISLNK(s1.st_mode):
193 return False
194
195 if isinstance(path, bytes):
196 parent = join(path, b'..')
197 else:
198 parent = join(path, '..')
199 try:
223 s2 = os.lstat(parent) 200 s2 = os.lstat(parent)
224 except os.error: 201 except OSError:
225 return False # It doesn't exist -- so not a mount point :-) 202 return False
203
226 dev1 = s1.st_dev 204 dev1 = s1.st_dev
227 dev2 = s2.st_dev 205 dev2 = s2.st_dev
228 if dev1 != dev2: 206 if dev1 != dev2:
229 return True # path/.. on a different device as path 207 return True # path/.. on a different device as path
230 ino1 = s1.st_ino 208 ino1 = s1.st_ino
231 ino2 = s2.st_ino 209 ino2 = s2.st_ino
232 if ino1 == ino2: 210 if ino1 == ino2:
233 return True # path/.. is the same i-node as path 211 return True # path/.. is the same i-node as path
234 return False 212 return False
235 213
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
294 global _varprog, _varprogb 272 global _varprog, _varprogb
295 if isinstance(path, bytes): 273 if isinstance(path, bytes):
296 if b'$' not in path: 274 if b'$' not in path:
297 return path 275 return path
298 if not _varprogb: 276 if not _varprogb:
299 import re 277 import re
300 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII) 278 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
301 search = _varprogb.search 279 search = _varprogb.search
302 start = b'{' 280 start = b'{'
303 end = b'}' 281 end = b'}'
282 environ = getattr(os, 'environb', None)
304 else: 283 else:
305 if '$' not in path: 284 if '$' not in path:
306 return path 285 return path
307 if not _varprog: 286 if not _varprog:
308 import re 287 import re
309 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) 288 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
310 search = _varprog.search 289 search = _varprog.search
311 start = '{' 290 start = '{'
312 end = '}' 291 end = '}'
292 environ = os.environ
313 i = 0 293 i = 0
314 while True: 294 while True:
315 m = search(path, i) 295 m = search(path, i)
316 if not m: 296 if not m:
317 break 297 break
318 i, j = m.span(0) 298 i, j = m.span(0)
319 name = m.group(1) 299 name = m.group(1)
320 if name.startswith(start) and name.endswith(end): 300 if name.startswith(start) and name.endswith(end):
321 name = name[1:-1] 301 name = name[1:-1]
322 if isinstance(name, bytes): 302 try:
323 name = str(name, 'ASCII') 303 if environ is None:
324 if name in os.environ: 304 value = os.fsencode(os.environ[os.fsdecode(name)])
305 else:
306 value = environ[name]
307 except KeyError:
308 i = j
309 else:
325 tail = path[j:] 310 tail = path[j:]
326 value = os.environ[name]
327 if isinstance(path, bytes):
328 value = value.encode('ASCII')
329 path = path[:i] + value 311 path = path[:i] + value
330 i = len(path) 312 i = len(path)
331 path += tail 313 path += tail
332 else:
333 i = j
334 return path 314 return path
335 315
336 316
337 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. 317 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
338 # It should be understood that this may change the meaning of the path 318 # It should be understood that this may change the meaning of the path
339 # if it contains symbolic links! 319 # if it contains symbolic links!
340 320
341 def normpath(path): 321 def normpath(path):
342 """Normalize path, eliminating double slashes, etc.""" 322 """Normalize path, eliminating double slashes, etc."""
343 if isinstance(path, bytes): 323 if isinstance(path, bytes):
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 path = join(cwd, path) 365 path = join(cwd, path)
386 return normpath(path) 366 return normpath(path)
387 367
388 368
389 # Return a canonical path (i.e. the absolute location of a file on the 369 # Return a canonical path (i.e. the absolute location of a file on the
390 # filesystem). 370 # filesystem).
391 371
392 def realpath(filename): 372 def realpath(filename):
393 """Return the canonical path of the specified filename, eliminating any 373 """Return the canonical path of the specified filename, eliminating any
394 symbolic links encountered in the path.""" 374 symbolic links encountered in the path."""
395 if isinstance(filename, bytes): 375 path, ok = _joinrealpath(filename[:0], filename, {})
376 return abspath(path)
377
378 # Join two paths, normalizing ang eliminating any symbolic links
379 # encountered in the second path.
380 def _joinrealpath(path, rest, seen):
381 if isinstance(path, bytes):
396 sep = b'/' 382 sep = b'/'
397 empty = b'' 383 curdir = b'.'
384 pardir = b'..'
398 else: 385 else:
399 sep = '/' 386 sep = '/'
400 empty = '' 387 curdir = '.'
401 if isabs(filename): 388 pardir = '..'
402 bits = [sep] + filename.split(sep)[1:] 389
403 else: 390 if isabs(rest):
404 bits = [empty] + filename.split(sep) 391 rest = rest[1:]
405 392 path = sep
406 for i in range(2, len(bits)+1): 393
407 component = join(*bits[0:i]) 394 while rest:
408 # Resolve symbolic links. 395 name, _, rest = rest.partition(sep)
409 if islink(component): 396 if not name or name == curdir:
410 resolved = _resolve_link(component) 397 # current dir
411 if resolved is None: 398 continue
412 # Infinite loop -- return original component + rest of the path 399 if name == pardir:
413 return abspath(join(*([component] + bits[i:]))) 400 # parent dir
401 if path:
402 path, name = split(path)
403 if name == pardir:
404 path = join(path, pardir, pardir)
414 else: 405 else:
415 newpath = join(*([resolved] + bits[i:])) 406 path = pardir
416 return realpath(newpath) 407 continue
417 408 newpath = join(path, name)
418 return abspath(filename) 409 if not islink(newpath):
419 410 path = newpath
420 411 continue
421 def _resolve_link(path): 412 # Resolve the symbolic link
422 """Internal helper function. Takes a path and follows symlinks 413 if newpath in seen:
423 until we either arrive at something that isn't a symlink, or 414 # Already seen this path
424 encounter a path we've seen before (meaning that there's a loop). 415 path = seen[newpath]
425 """ 416 if path is not None:
426 paths_seen = set() 417 # use cached value
427 while islink(path): 418 continue
428 if path in paths_seen: 419 # The symlink is not resolved, so we must have a symlink loop.
429 # Already seen this path, so we must have a symlink loop 420 # Return already resolved part + rest of the path unchanged.
430 return None 421 return join(newpath, rest), False
431 paths_seen.add(path) 422 seen[newpath] = None # not resolved symlink
432 # Resolve where the link points to 423 path, ok = _joinrealpath(path, os.readlink(newpath), seen)
433 resolved = os.readlink(path) 424 if not ok:
434 if not isabs(resolved): 425 return join(path, rest), False
435 dir = dirname(path) 426 seen[newpath] = path # resolved symlink
436 path = normpath(join(dir, resolved)) 427
437 else: 428 return path, True
438 path = normpath(resolved) 429
439 return path
440 430
441 supports_unicode_filenames = (sys.platform == 'darwin') 431 supports_unicode_filenames = (sys.platform == 'darwin')
442 432
443 def relpath(path, start=None): 433 def relpath(path, start=None):
444 """Return a relative version of a path""" 434 """Return a relative version of a path"""
445 435
446 if not path: 436 if not path:
447 raise ValueError("no path specified") 437 raise ValueError("no path specified")
448 438
449 if isinstance(path, bytes): 439 if isinstance(path, bytes):
(...skipping 22 matching lines...) Expand all
472 462
473 # Return the longest common sub-path of the sequence of paths given as input. 463 # Return the longest common sub-path of the sequence of paths given as input.
474 # The paths are not normalized before comparing them (this is the 464 # The paths are not normalized before comparing them (this is the
475 # responsibility of the caller). Any trailing separator is stripped from the 465 # responsibility of the caller). Any trailing separator is stripped from the
476 # returned path. 466 # returned path.
477 467
478 def commonpath(paths): 468 def commonpath(paths):
479 """Given a sequence of path names, returns the longest common sub-path.""" 469 """Given a sequence of path names, returns the longest common sub-path."""
480 470
481 if not paths: 471 if not paths:
482 return None 472 raise ValueError('commonpath() arg is an empty sequence')
483 473
484 if any(isabs(p) for p in paths) and any(not isabs(p) for p in paths): 474 if isinstance(paths[0], bytes):
485 # There is a mix of absolute and relative pathnames. 475 sep = b'/'
486 return None 476 curdir = b'.'
487 477 else:
488 sep = _get_sep(paths[0]) 478 sep = '/'
489 prefix = sep if isabs(paths[0]) else '' 479 curdir = '.'
490 480
491 try: 481 try:
492 split_paths = [path.split(sep) for path in paths] 482 split_paths = [path.split(sep) for path in paths]
493 except TypeError: 483 except TypeError:
494 valid_types = all(isinstance(p, (str, bytes, bytearray)) 484 valid_types = all(isinstance(p, (str, bytes)) for p in paths)
495 for p in paths)
496 if valid_types: 485 if valid_types:
497 # Must have a mixture of text and binary data 486 # Must have a mixture of text and binary data
498 raise TypeError("Can't mix strings and bytes in path " 487 raise TypeError("Can't mix strings and bytes in paths") from None
499 "components.") from None
500 raise 488 raise
501 489
490 if len(set(p[:1] == sep for p in paths)) != 1:
491 raise ValueError("Can't mix absolute and relative paths")
492
493 split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
502 s1 = min(split_paths) 494 s1 = min(split_paths)
503 s2 = max(split_paths) 495 s2 = max(split_paths)
504 common = s1 496 common = s1
505 for i, c in enumerate(s1): 497 for i, c in enumerate(s1):
506 if c != s2[i]: 498 if c != s2[i]:
507 common = s1[:i] 499 common = s1[:i]
508 break 500 break
509 501
502 prefix = sep if isabs(paths[0]) else sep[:0]
510 if not common: 503 if not common:
511 return prefix 504 return prefix
512 else: 505 else:
513 return prefix + join(*common).rstrip(sep) 506 return prefix + sep.join(common)
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+