Index: Doc/library/urlparse.rst =================================================================== --- Doc/library/urlparse.rst (revision 71354) +++ Doc/library/urlparse.rst (working copy) @@ -241,7 +241,19 @@ If there is no fragment identifier in *url*, returns *url* unmodified and an empty string. +.. function:: url_collapse_path_split(path) + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references. + + Implements something akin to (see :rfc:`2396`) 5.2 step 6 to parse relative paths. + + Returns: A tuple of (head, tail) where tail is everything after the final / + and head is everything before it. Head will always start with a '/' and, + if it contains anything else, never have a trailing '/'. + + Raises: IndexError if too many '..' occur within the path. + .. seealso:: :rfc:`1738` - Uniform Resource Locators (URL) Index: Lib/CGIHTTPServer.py =================================================================== --- Lib/CGIHTTPServer.py (revision 71354) +++ Lib/CGIHTTPServer.py (working copy) @@ -26,6 +26,7 @@ import os import sys import urllib +import urlparse import BaseHTTPServer import SimpleHTTPServer import select @@ -80,7 +81,7 @@ path begins with one of the strings in self.cgi_directories (and the next character is a '/' or the end of the string). """ - splitpath = _url_collapse_path_split(self.path) + splitpath = urlparse.url_collapse_path_split(self.path) if splitpath[0] in self.cgi_directories: self.cgi_info = splitpath return True @@ -322,47 +323,6 @@ else: self.log_message("CGI script exited OK") - -# TODO(gregory.p.smith): Move this into an appropriate library. -def _url_collapse_path_split(path): - """ - Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references. - - Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. - - Returns: A tuple of (head, tail) where tail is everything after the final / - and head is everything before it. Head will always start with a '/' and, - if it contains anything else, never have a trailing '/'. - - Raises: IndexError if too many '..' occur within the path. - """ - # Similar to os.path.split(os.path.normpath(path)) but specific to URL - # path semantics rather than local operating system semantics. - path_parts = [] - for part in path.split('/'): - if part == '.': - path_parts.append('') - else: - path_parts.append(part) - # Filter out blank non trailing parts before consuming the '..'. - path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:] - if path_parts: - tail_part = path_parts.pop() - else: - tail_part = '' - head_parts = [] - for part in path_parts: - if part == '..': - head_parts.pop() - else: - head_parts.append(part) - if tail_part and tail_part == '..': - head_parts.pop() - tail_part = '' - return ('/' + '/'.join(head_parts), tail_part) - - nobody = None def nobody_uid(): Index: Lib/urlparse.py =================================================================== --- Lib/urlparse.py (revision 71354) +++ Lib/urlparse.py (working copy) @@ -5,7 +5,8 @@ """ __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] + "urlsplit", "urlunsplit", "parse_qs", "parse_qsl", + "url_collapse_path_split"] # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', @@ -113,6 +114,46 @@ params = '' return ParseResult(scheme, netloc, url, params, query, fragment) +def url_collapse_path_split(path): + """ + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references. + + Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + + Returns: A tuple of (head, tail) where tail is everything after the final / + and head is everything before it. Head will always start with a '/' and, + if it contains anything else, never have a trailing '/'. + + Raises: IndexError if too many '..' occur within the path. + """ + # Similar to os.path.split(os.path.normpath(path)) but specific to URL + # path semantics rather than local operating system semantics. + path_parts = [] + for part in path.split('/'): + if part == '.': + path_parts.append('') + else: + path_parts.append(part) + # Filter out blank non trailing parts before consuming the '..'. + path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:] + if path_parts: + tail_part = path_parts.pop() + else: + tail_part = '' + head_parts = [] + for part in path_parts: + if part == '..': + head_parts.pop() + else: + head_parts.append(part) + if tail_part and tail_part == '..': + head_parts.pop() + tail_part = '' + return ('/' + '/'.join(head_parts), tail_part) + + + def _splitparams(url): if '/' in url: i = url.find(';', url.rfind('/'))