# HG changeset patch # Parent 56f71f02206ebe3413caa53b614403617ac544ed Issue #18828: Allow urljoin() to work with any scheme not in non_hierarchical diff -r 56f71f02206e Doc/library/urllib.parse.rst --- a/Doc/library/urllib.parse.rst Tue Dec 16 18:17:18 2014 -0800 +++ b/Doc/library/urllib.parse.rst Thu Mar 26 10:25:26 2015 +0000 @@ -270,7 +270,9 @@ .. versionchanged:: 3.5 - Behaviour updated to match the semantics defined in :rfc:`3986`. + Behaviour updated to match the semantics defined in :rfc:`3986`. Now + also supports arbitrary schemes other than ``hdl``, ``mailto``, + ``news``, ``sip``, ``sips``, ``snews``, ``tel``, and ``telnet``. .. function:: urldefrag(url) diff -r 56f71f02206e Lib/test/test_urlparse.py --- a/Lib/test/test_urlparse.py Tue Dec 16 18:17:18 2014 -0800 +++ b/Lib/test/test_urlparse.py Thu Mar 26 10:25:26 2015 +0000 @@ -375,6 +375,10 @@ self.checkJoin('', 'http://a/./g', 'http://a/./g') self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') + self.checkJoin('x-arbitrary-scheme://netloc/a', '/b', + 'x-arbitrary-scheme://netloc/b') + self.checkJoin('x-arbitrary-scheme://netloc/path', '?query', + 'x-arbitrary-scheme://netloc/path?query') # XXX: The following tests are no longer compatible with RFC3986 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') @@ -392,6 +396,35 @@ self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') + # Schemes previously listed as non-hierarchical, but joined anyway: + self.checkJoin('gopher://example/0path/old-file', 'new-file', + 'gopher://example/0path/new-file') + self.checkJoin('wais://host/database/wtype/old-path', 'new-path', + 'wais://host/database/wtype/new-path') + self.checkJoin('imap://joe@example.com/INBOX/;uid=20', ';UID=21', + 'imap://joe@example.com/INBOX/;UID=21') + + def test_urljoin_non_hier(self): + # Relative URL returned verbatim (without a scheme) for base schemes + # in non_hierarchical + tests = ( + ('hdl:cnri.dlib/december95', 'cnri.dlib/november95'), + ('mailto:infobot@example.com', '?subject=current-issue'), + ('news:comp.lang.python', 'message@id'), + ('telnet://host/', '//other-host/'), + ('snews://server/comp.lang.python', 'message@id'), + ('sip:user@host', '?Subject=foo'), + ('sips:user@host', '?Subject=foo'), + ('tel:+1-212-555-0101', '+1-800-555-0191'), + ) + schemes = set() + for base, relurl in tests: + schemes.add(urllib.parse.urlsplit(base).scheme) + with self.subTest((base, relurl)): + self.checkJoin(base, relurl, relurl) + # Ensure we tested each relevant scheme + self.assertCountEqual(urllib.parse.non_hierarchical, schemes) + def test_RFC2732(self): str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), diff -r 56f71f02206e Lib/urllib/parse.py --- a/Lib/urllib/parse.py Tue Dec 16 18:17:18 2014 -0800 +++ b/Lib/urllib/parse.py Thu Mar 26 10:25:26 2015 +0000 @@ -37,10 +37,6 @@ "unquote", "unquote_plus", "unquote_to_bytes"] # A classification of schemes ('' means apply by default) -uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', - 'wais', 'file', 'https', 'shttp', 'mms', - 'prospero', 'rtsp', 'rtspu', '', 'sftp', - 'svn', 'svn+ssh'] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', @@ -48,11 +44,15 @@ uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp', 'tel'] +non_hierarchical = ['hdl', 'mailto', 'news', + 'tel', 'telnet', 'snews', 'sip', 'sips'] # These are not actually used anymore, but should stay for backwards # compatibility. (They are undocumented, but have a public-looking name.) -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] +uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp', + 'svn', 'svn+ssh'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', @@ -416,13 +416,12 @@ scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) - if scheme != bscheme or scheme not in uses_relative: + if scheme != bscheme or scheme in non_hierarchical: return _coerce_result(url) - if scheme in uses_netloc: - if netloc: - return _coerce_result(urlunparse((scheme, netloc, path, - params, query, fragment))) - netloc = bnetloc + if netloc: + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + netloc = bnetloc if not path and not params: path = bpath