Index: Misc/NEWS =================================================================== --- Misc/NEWS (revision 86291) +++ Misc/NEWS (working copy) @@ -65,6 +65,11 @@ Library ------- +- Issue #2001: Add a new page header to pydoc html pages with Python version, + get, search, module index, topics, and keywords choices. + Removed tkinter search panel and -g option. + Add -b option to open a browser session. + - Issue #10126: Fix distutils' test_build when Python was built with --enable-shared. Index: Misc/ACKS =================================================================== --- Misc/ACKS (revision 86291) +++ Misc/ACKS (working copy) @@ -12,6 +12,7 @@ and the list is in rough alphabetical order by last names. David Abrahams +Ron Adam Jim Ahlstrom Farhan Ahmad Matthew Ahrens Index: Doc/whatsnew/3.2.rst =================================================================== --- Doc/whatsnew/3.2.rst (revision 86291) +++ Doc/whatsnew/3.2.rst (working copy) @@ -540,7 +540,14 @@ (Contributed by Neil Schemenauer and Nick Coghlan; :issue:`5178`.) +* The :mod:`pydoc` module now provides a much improved web server interface, + as well as a new command line option to automatically open a browser + window to display that server. The old TKinter GUI is no longer + available - the pydoc "GUI" is now the client web browser. + (Contributed by Ron Adam; :issue:`2001`.) + + Multi-threading =============== Index: Doc/library/pydoc.rst =================================================================== --- Doc/library/pydoc.rst (revision 86291) +++ Doc/library/pydoc.rst (working copy) @@ -51,12 +51,16 @@ You can also use :program:`pydoc` to start an HTTP server on the local machine that will serve documentation to visiting Web browsers. :program:`pydoc` -:option:`-p 1234` will start a HTTP server on port 1234, allowing you to browse +``-p 1234`` will start a HTTP server on port 1234, allowing you to browse the documentation at ``http://localhost:1234/`` in your preferred Web browser. -:program:`pydoc` :option:`-g` will start the server and additionally bring up a -small :mod:`tkinter`\ -based graphical interface to help you search for -documentation pages. +Specifying ``0`` as the port number will select an arbitrary unused port. +:program:`pydoc` ``-b`` will start the server and additionally open a web +browser to a module index page. Each served page has a navigation bar at the +top where you can 'get' help on an individual item, 'search' all modules with a +keyword in their synopsis line, and goto indexes for 'modules', 'topics' and +'keywords'. + When :program:`pydoc` generates documentation, it uses the current environment and path to locate modules. Thus, invoking :program:`pydoc` :option:`spam` documents precisely the version of the module you would get if you started the @@ -67,3 +71,7 @@ :envvar:`PYTHONDOCS` environment variable to a different URL or to a local directory containing the Library Reference Manual pages. +.. versionchanged:: 3.2 + + The ``-g`` Tkinter GUI option was removed in favour of the enhanced web + server and the ``-b`` option to automatically open a client web browser Index: Lib/urllib/parse.py =================================================================== --- Lib/urllib/parse.py (revision 86291) +++ Lib/urllib/parse.py (working copy) @@ -60,6 +60,7 @@ '0123456789' '+-.') +# XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -69,80 +70,202 @@ _safe_quoters.clear() -class ResultMixin(object): +# Helpers for bytes handling +# For 3.2, we deliberately require applications that +# handle improperly quoted URLs to do their own +# decoding and encoding. If valid use cases are +# presented, we may relax this by using latin-1 +# decoding internally for 3.3 +_implicit_encoding = 'ascii' +_implicit_errors = 'strict' + +def _noop(obj): + return obj + +def _encode_result(obj, encoding=_implicit_encoding, + errors=_implicit_errors): + return obj.encode(encoding, errors) + +def _decode_args(args, encoding=_implicit_encoding, + errors=_implicit_errors): + return tuple(x.decode(encoding, errors) if x else '' for x in args) + +def _coerce_args(*args): + # Invokes decode if necessary to create str args + # and returns the coerced inputs along with + # an appropriate result coercion function + # - noop for str inputs + # - encoding function otherwise + str_input = isinstance(args[0], str) + for arg in args[1:]: + # We special-case the empty string to support the + # "scheme=''" default argument to some functions + if arg and isinstance(arg, str) != str_input: + raise TypeError("Cannot mix str and non-str arguments") + if str_input: + return args + (_noop,) + return _decode_args(args) + (_encode_result,) + +# Result objects are more helpful than simple tuples +class _ResultMixinBase(object): """Shared methods for the parsed result objects.""" + __slots__ = () @property def username(self): - netloc = self.netloc - if "@" in netloc: - userinfo = netloc.rsplit("@", 1)[0] - if ":" in userinfo: - userinfo = userinfo.split(":", 1)[0] - return userinfo - return None + return self._userinfo[0] @property def password(self): - netloc = self.netloc - if "@" in netloc: - userinfo = netloc.rsplit("@", 1)[0] - if ":" in userinfo: - return userinfo.split(":", 1)[1] - return None + return self._userinfo[1] @property def hostname(self): - netloc = self.netloc.split('@')[-1] - if '[' in netloc and ']' in netloc: - return netloc.split(']')[0][1:].lower() - elif ':' in netloc: - return netloc.split(':')[0].lower() - elif netloc == '': - return None - else: - return netloc.lower() + hostname = self._hostinfo[0] + if not hostname: + hostname = None + elif hostname is not None: + hostname = hostname.lower() + return hostname @property def port(self): - netloc = self.netloc.split('@')[-1].split(']')[-1] - if ':' in netloc: - port = netloc.split(':')[1] - return int(port, 10) + port = self._hostinfo[1] + if port is not None: + port = int(port, 10) + return port + + +class ResultMixinStr(_ResultMixinBase): + """Shared methods for the parsed result objects.""" + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition('@') + if have_info: + username, have_password, password = userinfo.partition(':') + if not have_password: + password = None else: - return None + username = password = None + return username, password + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: + hostname, _, port = bracketed.partition(']') + _, have_port, port = port.partition(':') + else: + hostname, have_port, port = hostinfo.partition(':') + if not have_port: + port = None + return hostname, port + + def encode(self, encoding, errors): + return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + + +class ResultMixinBytes(_ResultMixinBase): + """Shared methods for the parsed result objects with bytes data.""" + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition(b'@') + if have_info: + username, have_password, password = userinfo.partition(b':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: + hostname, _, port = bracketed.partition(b']') + _, have_port, port = port.partition(b':') + else: + hostname, have_port, port = hostinfo.partition(b':') + if not have_port: + port = None + return hostname, port + + def decode(self, encoding, errors): + return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + + from collections import namedtuple -class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): +class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixinStr): __slots__ = () + def geturl(self): return urlunsplit(self) +class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixinStr): -class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): + __slots__ = () + def geturl(self): + return urlunparse(self) + + +class SplitResultBytes(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunsplit(self) + + +class ParseResultBytes(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixinBytes): + + __slots__ = () + + def geturl(self): return urlunparse(self) +# Set up the encode/decode result pairs +def _fix_result_transcoding(): + _result_pairs = ( + (SplitResult, SplitResultBytes), + (ParseResult, ParseResultBytes), + ) + for _decoded, _encoded in _result_pairs: + _decoded._encoded_counterpart = _encoded + _encoded._decoded_counterpart = _decoded +_fix_result_transcoding() +del _fix_result_transcoding + def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: :///;?# Return a 6-tuple: (scheme, netloc, path, params, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) tuple = urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = tuple if scheme in uses_params and ';' in url: url, params = _splitparams(url) else: params = '' - return ParseResult(scheme, netloc, url, params, query, fragment) + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) def _splitparams(url): if '/' in url: @@ -167,11 +290,12 @@ Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) if cached: - return cached + return _coerce_result(cached) if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() netloc = query = fragment = '' @@ -191,7 +315,7 @@ url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v - return v + return _coerce_result(v) if url.endswith(':') or not url[i+1].isdigit(): for c in url[:i]: if c not in scheme_chars: @@ -209,17 +333,18 @@ url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v - return v + return _coerce_result(v) def urlunparse(components): """Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had redundant delimiters, e.g. a ? with an empty query (the draft states that these are equivalent).""" - scheme, netloc, url, params, query, fragment = components + scheme, netloc, url, params, query, fragment, _coerce_result = ( + _coerce_args(*components)) if params: url = "%s;%s" % (url, params) - return urlunsplit((scheme, netloc, url, query, fragment)) + return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) def urlunsplit(components): """Combine the elements of a tuple as returned by urlsplit() into a @@ -227,7 +352,8 @@ This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had unnecessary delimiters (for example, a ? with an empty query; the RFC states that these are equivalent).""" - scheme, netloc, url, query, fragment = components + scheme, netloc, url, query, fragment, _coerce_result = ( + _coerce_args(*components)) if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url @@ -237,7 +363,7 @@ url = url + '?' + query if fragment: url = url + '#' + fragment - return url + return _coerce_result(url) def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute @@ -246,32 +372,33 @@ return url if not url: return base + base, url, _coerce_result = _coerce_args(base, url) bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ urlparse(base, '', allow_fragments) scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: - return url + return _coerce_result(url) if scheme in uses_netloc: if netloc: - return urlunparse((scheme, netloc, path, - params, query, fragment)) + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) netloc = bnetloc if path[:1] == '/': - return urlunparse((scheme, netloc, path, - params, query, fragment)) + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) if not path: path = bpath if not params: params = bparams else: path = path[:-1] - return urlunparse((scheme, netloc, path, - params, query, fragment)) + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) if not query: query = bquery - return urlunparse((scheme, netloc, path, - params, query, fragment)) + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) segments = bpath.split('/')[:-1] + path.split('/') # XXX The stuff below is bogus in various ways... if segments[-1] == '.': @@ -293,8 +420,8 @@ segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [''] - return urlunparse((scheme, netloc, '/'.join(segments), - params, query, fragment)) + return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), + params, query, fragment))) def urldefrag(url): """Removes any existing fragment from URL. @@ -303,12 +430,13 @@ the URL contained no fragments, the second element is the empty string. """ + url, _coerce_result = _coerce_args(url) if '#' in url: s, n, p, a, q, frag = urlparse(url) defrag = urlunparse((s, n, p, a, q, '')) - return defrag, frag + return _coerce_result(defrag), _coerce_result(frag) else: - return url, '' + return _coerce_result(url), _coerce_result('') def unquote_to_bytes(string): """unquote_to_bytes('abc%20def') -> b'abc def'.""" @@ -420,6 +548,7 @@ Returns a list, as G-d intended. """ + qs, _coerce_result = _coerce_args(qs) pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: @@ -435,10 +564,9 @@ else: continue if len(nv[1]) or keep_blank_values: - name = unquote(nv[0].replace('+', ' ')) - value = unquote(nv[1].replace('+', ' ')) + name = _coerce_result(unquote(nv[0].replace('+', ' '))) + value = _coerce_result(unquote(nv[1].replace('+', ' '))) r.append((name, value)) - return r def unquote_plus(string, encoding='utf-8', errors='replace'): Index: Lib/pydoc.py =================================================================== --- Lib/pydoc.py (revision 86291) +++ Lib/pydoc.py (working copy) @@ -15,11 +15,13 @@ Run "pydoc -k " to search for a keyword in the synopsis lines of all available modules. -Run "pydoc -p " to start an HTTP server on a given port on the -local machine to generate documentation web pages. +Run "pydoc -p " to start an HTTP server on the given port on the +local machine to generate documentation web pages. Port number 0 can +be used to get an arbitrary unused port. -For platforms without a command line, "pydoc -g" starts the HTTP server -and also pops up a little window for controlling it. +Run "pydoc -b" to start an HTTP server on an arbitrary unused port and open +a web browser to interactively browse documentation. The -p option can +be used with the -b option to explicitly specify the server port. Run "pydoc -w " to write out the HTML documentation for a module to a file named ".html". @@ -592,7 +594,7 @@ if sys.platform == 'win32': import nturl2path url = nturl2path.pathname2url(path) - filelink = '%s' % (url, path) + filelink = '%s' % (url, path) except TypeError: filelink = '(built-in)' info = [] @@ -612,7 +614,7 @@ docloc = '' result = self.heading( head, '#ffffff', '#7799ee', - 'index
' + filelink + docloc) + filelink + docloc) modules = inspect.getmembers(object, inspect.ismodule) @@ -1844,6 +1846,35 @@ 'Related help topics: ' + ', '.join(xrefs.split()) + '\n') self.output.write('\n%s\n' % buffer.getvalue()) + def _gettopic(self, topic, more_xrefs=''): + """ Returns unbuffered tuple of (topic, xrefs). + + If an error occurs, topic is the error message, and xrefs is ''. + This function duplicates the showtopic method but returns it's + result directly so it can be formatted for display in an html page. + """ + try: + import pydoc_data.topics + except ImportError: + return(''' +Sorry, topic and keyword documentation is not available because the +module "pydoc_data.topics" could not be found. +''' , '') + return + target = self.topics.get(topic, self.keywords.get(topic)) + if not target: + return 'no documentation found for %s' % repr(topic), '' + if type(target) is type(''): + return self._gettopic(target, more_xrefs) + label, xrefs = target + try: + doc = pydoc_data.topics.topics[label] + except KeyError: + return 'no documentation found for %s' % repr(topic), '' + if more_xrefs: + xrefs = (xrefs or '') + ' ' + more_xrefs + return doc, xrefs + def showsymbol(self, symbol): target = self.symbols[symbol] topic, _, xrefs = target.partition(' ') @@ -1925,6 +1956,14 @@ for importer, modname, ispkg in pkgutil.walk_packages(onerror=onerror): if self.quit: break + + # XXX Skipping this file is a get-around for bug that causes python + # to crash with a segfault. http://bugs.python.org/issue9319 + # + # TODO: Remove this once the bug is fixed. + if modname == "test.badsyntax_pep3120": + continue + if key is None: callback(None, modname, '') else: @@ -1975,269 +2014,391 @@ else: warnings.filterwarnings('ignore') # ignore problems during import ModuleScanner().run(callback, key, onerror=onerror) -# --------------------------------------------------- web browser interface -def serve(port, callback=None, completer=None): - import http.server, email.message, select +# ------------------------------------------------ Local text/html server - class DocHandler(http.server.BaseHTTPRequestHandler): - def send_document(self, title, contents): - try: - self.send_response(200) - self.send_header('Content-Type', 'text/html; charset=UTF-8') - self.end_headers() - self.wfile.write(html.page(title, contents).encode('utf-8')) - except IOError: pass +""" +A simple local HTML/text server thread. - def do_GET(self): - path = self.path - if path[-5:] == '.html': path = path[:-5] - if path[:1] == '/': path = path[1:] - if path and path != '.': - try: - obj = locate(path, forceload=1) - except ErrorDuringImport as value: - self.send_document(path, html.escape(str(value))) - return - if obj: - self.send_document(describe(obj), html.document(obj, path)) - else: - self.send_document(path, -'no Python documentation found for %s' % repr(path)) - else: - heading = html.heading( -'Python: Index of Modules', -'#ffffff', '#7799ee') - def bltinlink(name): - return '%s' % (name, name) - names = [x for x in sys.builtin_module_names if x != '__main__'] - contents = html.multicolumn(names, bltinlink) - indices = ['

' + html.bigsection( - 'Built-in Modules', '#ffffff', '#ee77aa', contents)] +Start an HTML/text server so HTML or text documents can be browsed +dynamically and interactively with a web browser. - seen = {} - for dir in sys.path: - indices.append(html.index(dir, seen)) - contents = heading + ' '.join(indices) + '''

- -pydoc by Ka-Ping Yee <ping@lfw.org>''' - self.send_document('Index of Modules', contents) +Any changes made to this file should be tested with pydoc. - def log_message(self, *args): pass + Example use + =========== - class DocServer(http.server.HTTPServer): - def __init__(self, port, callback): - host = 'localhost' - self.address = (host, port) - self.url = 'http://%s:%d/' % (host, port) - self.callback = callback - self.base.__init__(self, self.address, self.handler) + >>> import time + >>> import pydoc - def serve_until_quit(self): - import select - self.quit = False - while not self.quit: - rd, wr, ex = select.select([self.socket.fileno()], [], [], 1) - if rd: self.handle_request() + Define a URL handler. To determine what the client is asking + for check the URL and content_type. - def server_activate(self): - self.base.server_activate(self) - if self.callback: self.callback(self) + Then get or generate some text or HTML code and return it. - DocServer.base = http.server.HTTPServer - DocServer.handler = DocHandler - DocHandler.MessageClass = email.message.Message - try: - try: - DocServer(port, callback).serve_until_quit() - except (KeyboardInterrupt, select.error): - pass - finally: - if completer: completer() + >>> def my_url_handler(url, content_type): + ... text = 'the URL sent was: (%s, %s)' % (url, content_type) + ... return text -# ----------------------------------------------------- graphical interface + Start server thread on port 0. + If you use port 0, the server will pick a random port number. + You can then use serverthread.port to get the port number. -def gui(): - """Graphical interface (starts web server and pops up a control window).""" - class GUI: - def __init__(self, window, port=7464): - self.window = window - self.server = None - self.scanner = None + >>> port = 0 + >>> serverthread = pydoc._startserver(my_url_handler, port) - import tkinter - self.server_frm = tkinter.Frame(window) - self.title_lbl = tkinter.Label(self.server_frm, - text='Starting server...\n ') - self.open_btn = tkinter.Button(self.server_frm, - text='open browser', command=self.open, state='disabled') - self.quit_btn = tkinter.Button(self.server_frm, - text='quit serving', command=self.quit, state='disabled') + Check that the server is really started. If it is, open browser + and get first page. Use serverthread.url as the starting page. - self.search_frm = tkinter.Frame(window) - self.search_lbl = tkinter.Label(self.search_frm, text='Search for') - self.search_ent = tkinter.Entry(self.search_frm) - self.search_ent.bind('', self.search) - self.stop_btn = tkinter.Button(self.search_frm, - text='stop', pady=0, command=self.stop, state='disabled') - if sys.platform == 'win32': - # Trying to hide and show this button crashes under Windows. - self.stop_btn.pack(side='right') + >>> if serverthread.serving: + ... import webbrowser - self.window.title('pydoc') - self.window.protocol('WM_DELETE_WINDOW', self.quit) - self.title_lbl.pack(side='top', fill='x') - self.open_btn.pack(side='left', fill='x', expand=1) - self.quit_btn.pack(side='right', fill='x', expand=1) - self.server_frm.pack(side='top', fill='x') + #... webbrowser.open(serverthread.url) + #True - self.search_lbl.pack(side='left') - self.search_ent.pack(side='right', fill='x', expand=1) - self.search_frm.pack(side='top', fill='x') - self.search_ent.focus_set() + Let the server do it's thing. We just need to monitor it's status. + Use time.sleep so the loop doesn't hog the CPU. - font = ('helvetica', sys.platform == 'win32' and 8 or 10) - self.result_lst = tkinter.Listbox(window, font=font, height=6) - self.result_lst.bind('', self.select) - self.result_lst.bind('', self.goto) - self.result_scr = tkinter.Scrollbar(window, - orient='vertical', command=self.result_lst.yview) - self.result_lst.config(yscrollcommand=self.result_scr.set) + >>> starttime = time.time() + >>> timeout = 1 #seconds - self.result_frm = tkinter.Frame(window) - self.goto_btn = tkinter.Button(self.result_frm, - text='go to selected', command=self.goto) - self.hide_btn = tkinter.Button(self.result_frm, - text='hide results', command=self.hide) - self.goto_btn.pack(side='left', fill='x', expand=1) - self.hide_btn.pack(side='right', fill='x', expand=1) + This is a short timeout for testing purposes. - self.window.update() - self.minwidth = self.window.winfo_width() - self.minheight = self.window.winfo_height() - self.bigminheight = (self.server_frm.winfo_reqheight() + - self.search_frm.winfo_reqheight() + - self.result_lst.winfo_reqheight() + - self.result_frm.winfo_reqheight()) - self.bigwidth, self.bigheight = self.minwidth, self.bigminheight - self.expanded = 0 - self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight)) - self.window.wm_minsize(self.minwidth, self.minheight) - self.window.tk.willdispatch() + >>> while serverthread.serving: + ... time.sleep(.01) + ... if serverthread.serving and time.time() - starttime > timeout: + ... serverthread.stop() + ... break - import threading - threading.Thread( - target=serve, args=(port, self.ready, self.quit)).start() + Print any errors that may have occurred. - def ready(self, server): - self.server = server - self.title_lbl.config( - text='Python documentation server at\n' + server.url) - self.open_btn.config(state='normal') - self.quit_btn.config(state='normal') + >>> print(serverthread.error) + None - def open(self, event=None, url=None): - url = url or self.server.url - try: - import webbrowser - webbrowser.open(url) - except ImportError: # pre-webbrowser.py compatibility - if sys.platform == 'win32': - os.system('start "%s"' % url) - else: - rc = os.system('netscape -remote "openURL(%s)" &' % url) - if rc: os.system('netscape "%s" &' % url) +""" +import sys +import http.server +import email.message +import select +import threading +import time +import socket - def quit(self, event=None): - if self.server: - self.server.quit = 1 - self.window.quit() +class _DocHandler(http.server.BaseHTTPRequestHandler): + """ Handle server requests from browser. """ + def do_GET(self): + """ Process a request from a HTML browser. + The URL received is in self.path. + Get an HTML page from self.urlhandler and send it. + """ + if self.path.endswith('.css'): + content_type = 'text/css' + else: + content_type = 'text/html' + self.send_response(200) + self.send_header('Content-Type', content_type) + self.end_headers() + self.wfile.write(bytes(self.urlhandler(self.path, content_type), + 'UTF-8')) - def search(self, event=None): - key = self.search_ent.get() - self.stop_btn.pack(side='right') - self.stop_btn.config(state='normal') - self.search_lbl.config(text='Searching for "%s"...' % key) - self.search_ent.forget() - self.search_lbl.pack(side='left') - self.result_lst.delete(0, 'end') - self.goto_btn.config(state='disabled') - self.expand() + def log_message(self, *args): + # Don't log messages. + pass - import threading - if self.scanner: - self.scanner.quit = 1 - self.scanner = ModuleScanner() - threading.Thread(target=self.scanner.run, - args=(self.update, key, self.done)).start() +class _DocServer(http.server.HTTPServer): + def __init__(self, port, callback): + self.host = (sys.platform == 'mac') and '127.0.0.1' or 'localhost' + self.address = ('', port) + self.callback = callback + self.base.__init__(self, self.address, self.handler) + self.quit = False - def update(self, path, modname, desc): - if modname[-9:] == '.__init__': - modname = modname[:-9] + ' (package)' - self.result_lst.insert('end', - modname + ' - ' + (desc or '(no description)')) + def serve_until_quit(self): + while not self.quit: + rd, wr, ex = select.select([self.socket.fileno()], [], [], 1) + if rd: + self.handle_request() - def stop(self, event=None): - if self.scanner: - self.scanner.quit = 1 - self.scanner = None + def server_activate(self): + self.base.server_activate(self) + if self.callback: + self.callback(self) - def done(self): - self.scanner = None - self.search_lbl.config(text='Search for') - self.search_lbl.pack(side='left') - self.search_ent.pack(side='right', fill='x', expand=1) - if sys.platform != 'win32': self.stop_btn.forget() - self.stop_btn.config(state='disabled') +class _ServerThread(threading.Thread): + """ Use to start the server as a thread in an application. """ + def __init__(self, urlhandler, port): + self.urlhandler = urlhandler + self.port = int(port) + threading.Thread.__init__(self) + self.serving = False + self.error = None - def select(self, event=None): - self.goto_btn.config(state='normal') + def run(self): + """ Start the server. """ + try: + _DocServer.base = http.server.HTTPServer + _DocServer.handler = _DocHandler + _DocHandler.MessageClass = email.message.Message + _DocHandler.urlhandler = staticmethod(self.urlhandler) + docsvr = _DocServer(self.port, self.ready) + self.docserver = docsvr + docsvr.serve_until_quit() + except Exception as e: + self.error = e - def goto(self, event=None): - selection = self.result_lst.curselection() - if selection: - modname = self.result_lst.get(selection[0]).split()[0] - self.open(url=self.server.url + modname + '.html') + def ready(self, server): + self.serving = True + self.host = server.host + self.port = server.server_port + self.url = 'http://%s:%d/' % (self.host, self.port) - def collapse(self): - if not self.expanded: return - self.result_frm.forget() - self.result_scr.forget() - self.result_lst.forget() - self.bigwidth = self.window.winfo_width() - self.bigheight = self.window.winfo_height() - self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight)) - self.window.wm_minsize(self.minwidth, self.minheight) - self.expanded = 0 + def stop(self): + """ Stop the server and this thread nicely """ + self.docserver.quit = True + self.serving = False + self.url = None - def expand(self): - if self.expanded: return - self.result_frm.pack(side='bottom', fill='x') - self.result_scr.pack(side='right', fill='y') - self.result_lst.pack(side='top', fill='both', expand=1) - self.window.wm_geometry('%dx%d' % (self.bigwidth, self.bigheight)) - self.window.wm_minsize(self.minwidth, self.bigminheight) - self.expanded = 1 +def _startserver(urlhandler, port): + """ Start a HTTP server thread on a specific port. + Use address http://localhost:/ + """ + thread = _ServerThread(urlhandler, port) + thread.start() + # Wait until thread.serving is True to make sure we are + # really up before returning. + while not thread.error and not thread.serving: + time.sleep(.01) + return thread - def hide(self, event=None): - self.stop() - self.collapse() - import tkinter +def _test(): + import doctest + doctest.testmod() + + +# --------------------------------------------------- web browser interface + +def _html_navbar(): return \ +""" + + + + + +
Python %s
+ + +
+ + +
+ Index of Modules + : Topics + : Keywords +
+""" % sys.version + +def _html_index(): + """ Index of modules web page. """ + def bltinlink(name): + return '%s' % (name, name) + heading = html.heading( + 'Index of Modules', + '#ffffff', '#7799ee') + names = list(filter(lambda x: x != '__main__', + sys.builtin_module_names)) + contents = html.multicolumn(names, bltinlink) + indices = ['

' + html.bigsection( + 'Built-in Modules', '#ffffff', '#ee77aa', contents)] + seen = {} + for dir in sys.path: + indices.append(html.index(dir, seen)) + contents = heading + ''.join(indices) + \ +'''

+ pydoc by Ka-Ping Yee <ping@lfw.org>''' + return html.page('Index of Modules' ,contents) + +def _html_search(key): + """ Search results page. """ + # scan for modules + search_result = [] + def callback(path, modname, desc): + if modname[-9:] == '.__init__': + modname = modname[:-9] + ' (package)' + search_result.append((modname, desc and '- ' + desc)) + try: import warnings + except ImportError: pass + else: warnings.filterwarnings('ignore') # ignore problems during import + ModuleScanner().run(callback, key) + # format page + def bltinlink(name): + return '%s' % (name, name) + results = [] + heading = html.heading( + 'Search Results', + '#ffffff', '#7799ee') + for name, desc in search_result: + results.append(bltinlink(name) + desc) + contents = heading + html.bigsection( + 'key = %s' % key, '#ffffff', '#ee77aa', '
'.join(results)) + return html.page('Search Results', contents) + +def _html_getfile(path): + """ Get and display a source file listing safely. """ + path = os.sep + path.replace('%20', ' ') try: - root = tkinter.Tk() - # Tk will crash if pythonw.exe has an XP .manifest - # file and the root has is not destroyed explicitly. - # If the problem is ever fixed in Tk, the explicit - # destroy can go. + f = open(path, 'r') + lines = html.escape(f.read()) + finally: + f.close() + body = '

%s
' % lines + heading = html.heading( + 'File Listing', + '#ffffff', '#7799ee') + contents = heading + html.bigsection( + 'File: %s' % path, '#ffffff', '#ee77aa', body) + return html.page('getfile: %s' % path, contents) + +def _html_topics(): + """ Index of topic texts available. """ + def bltinlink(name): + return '%s' % (name, name) + heading = html.heading( + 'INDEX', + '#ffffff', '#7799ee') + names = sorted(Helper.topics.keys()) + def bltinlink(name): + return '%s' % (name, name) + contents = html.multicolumn(names, bltinlink) + contents = heading + html.bigsection( + 'Topics', '#ffffff', '#ee77aa', contents) + return html.page('Topics', contents) + +def _html_keywords(): + """ Index of keywords. """ + heading = html.heading( + 'INDEX', + '#ffffff', '#7799ee') + names = sorted(Helper.keywords.keys()) + def bltinlink(name): + return '%s' % (name, name) + contents = html.multicolumn(names, bltinlink) + contents = heading + html.bigsection( + 'Keywords', '#ffffff', '#ee77aa', contents) + return html.page('Keywords', contents) + +def _html_topicpage(topic): + """ Topic or keyword help page. """ + import io + buf = io.StringIO() + htmlhelp = Helper(buf, buf) + contents, xrefs = htmlhelp._gettopic(topic) + if topic in htmlhelp.keywords: + title = 'KEYWORD' + else: + title = 'TOPIC' + heading = html.heading( + '%s' % title, + '#ffffff', '#7799ee') + contents = '
%s
' % contents + contents = html.bigsection(topic , '#ffffff','#ee77aa', contents) + xrefs = sorted(xrefs.split()) + def bltinlink(name): + return '%s' % (name, name) + xrefs = html.multicolumn(xrefs, bltinlink) + xrefs = html.section('Related help topics: ', '#ffffff', '#ee77aa', xrefs) + return html.page('%s: %s' % (title, topic), heading + contents + xrefs) + +def _html_error(url): + heading = html.heading( + 'Error', + '#ffffff', '#ee0000') + return heading + url + +def _get_html_page(url): + """ Function url handler uses to get the html page to get + depending on the url. + """ + if url[-5:] == '.html': url = url[:-5] + if url[:1] == '/': url = url[1:] + if url.startswith("get?key="): + url = url[8:] + title = url + contents = '' + if url in ("", ".", "index"): + contents = _html_index() + elif url == "topics": + contents = _html_topics() + elif url == "keywords": + contents = _html_keywords() + elif url.startswith("search?key="): + contents = _html_search(url[11:]) + elif url.startswith("getfile?key="): + url = url[12:] try: - gui = GUI(root) - root.mainloop() + contents = _html_getfile(url) + except IOError as value: + contents = _html_error('could read file %s' % repr(url)) + else: + obj = None + try: + obj = locate(url, forceload=1) + except ErrorDuringImport as value: + contents = html.escape(str(value)) + if obj: + title = describe(obj) + contents = html.document(obj, url) + elif url in Helper.keywords or url in Helper.topics: + contents = _html_topicpage(url) + else: + contents = _html_error('no Python documentation found for %s' + % repr(url)) + return html.page(title, _html_navbar() + contents) + + +def gui(port=7464, browse=True): + """ Start pydoc server and web browser. """ + + import webbrowser + import time + + def url_handler(url, content_type): + """ html server html and style sheet requests. """ + if url.startswith('/'): + url = url[1:] + if content_type == 'text/css': + fp = open(os.path.join(path_here, url)) + css = ''.join(fp.readlines()) + fp.close() + return css + elif content_type == 'text/html': + return _get_html_page(url) + return 'Error: unknown content type ' + content_type + + serverthread = _startserver(url_handler, port) + if serverthread.error: + print(serverthread.error) + return + if serverthread.serving: + print('Python Version:', sys.version) + print('Server ready at:', serverthread.url) + print('Server commands: [b]rowser, [q]uit') + if browse: + webbrowser.open(serverthread.url) + try: + while serverthread.serving: + cmd = input('server>') + cmd = cmd.lower() + if cmd == 'q': + break + if cmd == 'b': + webbrowser.open(serverthread.url) finally: - root.destroy() - except KeyboardInterrupt: - pass + if serverthread.serving: + serverthread.stop() + print('Server stopped') + # -------------------------------------------------- command-line interface def ispath(x): @@ -2257,30 +2418,31 @@ sys.path.insert(0, '.') try: - opts, args = getopt.getopt(sys.argv[1:], 'gk:p:w') - writing = 0 + opts, args = getopt.getopt(sys.argv[1:], 'bk:p:w') + writing = False + startserver = False + browse = False + port = None for opt, val in opts: - if opt == '-g': - gui() - return + if opt == '-b': + startserver = True + browse = True if opt == '-k': apropos(val) return if opt == '-p': - try: - port = int(val) - except ValueError: - raise BadUsage - def ready(server): - print('pydoc server ready at %s' % server.url) - def stopped(): - print('pydoc server stopped') - serve(port, ready, stopped) - return + startserver = True + port = val if opt == '-w': - writing = 1 + writing = True + if startserver == True: + if port == None: + port = 0 + gui(port, browse) + return + if not args: raise BadUsage for arg in args: if ispath(arg) and not os.path.exists(arg): @@ -2315,10 +2477,13 @@ Search for a keyword in the synopsis lines of all available modules. %s -p - Start an HTTP server on the given port on the local machine. + Start an HTTP server on the given port on the local machine. Port + number 0 can be used to get an arbitrary unused port. -%s -g - Pop up a graphical interface for finding and serving documentation. +%s -b + Start an HTTP server on an arbitrary unused port and open a web browser + to interactively browse documentation. The -p option can be used with + the -b option to explicitly specify the server port. %s -w ... Write out the HTML documentation for a module to a file in the current @@ -2326,4 +2491,5 @@ it names a directory, documentation is written for all the contents. """ % (cmd, os.sep, cmd, cmd, cmd, cmd, os.sep)) -if __name__ == '__main__': cli() +if __name__ == '__main__': + cli() Index: Lib/test/test_pyclbr.py =================================================================== --- Lib/test/test_pyclbr.py (revision 86291) +++ Lib/test/test_pyclbr.py (working copy) @@ -161,8 +161,15 @@ cm('aifc', ignore=('openfp',)) # set with = in module cm('sre_parse', ignore=('dump',)) # from sre_constants import * cm('pdb') - cm('pydoc') + # XXX (ncoghlan): pyclbr uses pure text scans to find base + #classes but uses base.__name__ to find values to test + # against. This fails if the class definition uses a variable + # name or a fully qualified name (i.e. including module name) + # rather than something that exactly matches the __name__ + # attribute of the base class + # cm('pydoc') + # Tests for modules inside packages cm('email.parser') cm('test.test_pyclbr') Index: Lib/test/test_pydoc.py =================================================================== --- Lib/test/test_pydoc.py (revision 86291) +++ Lib/test/test_pydoc.py (working copy) @@ -92,7 +92,7 @@  
 
test.pydoc_mod (version 1.2.3.4)
index
%s%s
+>%s

This is a test module for test_pydoc

@@ -249,7 +249,7 @@ mod_url = nturl2path.pathname2url(mod_file) else: mod_url = mod_file - expected_html = expected_html_pattern % (mod_url, mod_file, doc_loc) + expected_html = expected_html_pattern % (mod_url, mod_file) if result != expected_html: print_diffs(expected_html, result) self.fail("outputs are not equal, see diff above") Index: Lib/test/test_urlparse.py =================================================================== --- Lib/test/test_urlparse.py (revision 86291) +++ Lib/test/test_urlparse.py (working copy) @@ -24,6 +24,17 @@ ("&a=b", [('a', 'b')]), ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ("a=1&a=2", [('a', '1'), ('a', '2')]), + (b"", []), + (b"&", []), + (b"&&", []), + (b"=", [(b'', b'')]), + (b"=a", [(b'', b'a')]), + (b"a", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"&a=b", [(b'a', b'b')]), + (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), + (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), ] class UrlParseTestCase(unittest.TestCase): @@ -86,7 +97,7 @@ def test_roundtrips(self): - testcases = [ + str_cases = [ ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), @@ -110,16 +121,21 @@ ('git+ssh', 'git@github.com','/user/project.git', '','',''), ('git+ssh', 'git@github.com','/user/project.git', - '', '')) + '', '')), ] - for url, parsed, split in testcases: + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + for url, parsed, split in str_cases + bytes_cases: self.checkRoundtrips(url, parsed, split) def test_http_roundtrips(self): # urllib.parse.urlsplit treats 'http:' as an optimized special case, # so we test both 'http:' and 'https:' in all the following. # Three cheers for white box knowledge! - testcases = [ + str_cases = [ ('://www.python.org', ('www.python.org', '', '', '', ''), ('www.python.org', '', '', '')), @@ -136,19 +152,34 @@ ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), ] - for scheme in ('http', 'https'): - for url, parsed, split in testcases: - url = scheme + url - parsed = (scheme,) + parsed - split = (scheme,) + split - self.checkRoundtrips(url, parsed, split) + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + str_schemes = ('http', 'https') + bytes_schemes = (b'http', b'https') + str_tests = str_schemes, str_cases + bytes_tests = bytes_schemes, bytes_cases + for schemes, test_cases in (str_tests, bytes_tests): + for scheme in schemes: + for url, parsed, split in test_cases: + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected): - self.assertEqual(urllib.parse.urljoin(base, relurl), expected, - (base, relurl, expected)) + str_components = (base, relurl, expected) + self.assertEqual(urllib.parse.urljoin(base, relurl), expected) + bytes_components = baseb, relurlb, expectedb = [ + x.encode('ascii') for x in str_components] + self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) def test_unparse_parse(self): - for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: + str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] + bytes_cases = [x.encode('ascii') for x in str_cases] + for u in str_cases + bytes_cases: self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) @@ -328,7 +359,7 @@ self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') def test_RFC2732(self): - for url, hostname, port in [ + str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), @@ -349,20 +380,26 @@ ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:12.34.56.78', None), - ]: + ] + def _encode(t): + return t[0].encode('ascii'), t[1].encode('ascii'), t[2] + bytes_cases = [_encode(x) for x in str_cases] + for url, hostname, port in str_cases + bytes_cases: urlparsed = urllib.parse.urlparse(url) self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) - for invalid_url in [ + str_cases = [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', - 'http://[::ffff:12.34.56.78']: + 'http://[::ffff:12.34.56.78'] + bytes_cases = [x.encode('ascii') for x in str_cases] + for invalid_url in str_cases + bytes_cases: self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) def test_urldefrag(self): - for url, defrag, frag in [ + str_cases = [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), @@ -373,7 +410,11 @@ ('http://python.org/p?q', 'http://python.org/p?q', ''), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), - ]: + ] + def _encode(t): + return type(t)(x.encode('ascii') for x in t) + bytes_cases = [_encode(x) for x in str_cases] + for url, defrag, frag in str_cases + bytes_cases: self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag)) def test_urlsplit_attributes(self): @@ -390,7 +431,8 @@ self.assertEqual(p.port, None) # geturl() won't return exactly the original URL in this case # since the scheme is always case-normalized - #self.assertEqual(p.geturl(), url) + # We handle this by ignoring the first 4 characters of the URL + self.assertEqual(p.geturl()[4:], url[4:]) url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" p = urllib.parse.urlsplit(url) @@ -422,7 +464,46 @@ self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) + # And check them all again, only with bytes this time + url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, None) + self.assertEqual(p.geturl()[4:], url[4:]) + url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User@example.com") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + def test_attributes_bad_port(self): """Check handling of non-integer ports.""" p = urllib.parse.urlsplit("http://www.example.net:foo") @@ -433,6 +514,15 @@ self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port) + # Once again, repeat ourselves to test bytes + p = urllib.parse.urlsplit(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + p = urllib.parse.urlparse(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled @@ -456,10 +546,30 @@ self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) + # You guessed it, repeating the test with bytes input + uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" + p = urllib.parse.urlsplit(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + p = urllib.parse.urlparse(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + def test_noslash(self): # Issue 1637: http://foo.com?query is legal self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), ('http', 'example.com', '', '', 'blahblah=/foo', '')) + self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), + (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) def test_withoutscheme(self): # Test urlparse without scheme @@ -472,6 +582,13 @@ ('','www.python.org:80','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + # Repeat for bytes input + self.assertEqual(urllib.parse.urlparse(b"path"), + (b'',b'',b'path',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), + (b'',b'www.python.org:80',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator @@ -481,6 +598,13 @@ self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + # As usual, need to check bytes input as well + self.assertEqual(urllib.parse.urlparse(b"path:80"), + (b'',b'',b'path:80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) def test_usingsys(self): # Issue 3314: sys module is used in the error @@ -492,7 +616,36 @@ ('s3', 'foo.com', '/stuff', '', '', '')) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + # And for bytes... + self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), + (b's3', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + def test_mixed_types_rejected(self): + # Several functions that process either strings or ASCII encoded bytes + # accept multiple arguments. Check they reject mixed type input + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlparse("www.python.org", b"http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlparse(b"www.python.org", "http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlsplit("www.python.org", b"http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlsplit(b"www.python.org", "http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunparse(("http", b"www.python.org","","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunsplit((b"http", "www.python.org","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunsplit(("http", b"www.python.org","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urljoin("http://python.org", b"http://python.org") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urljoin(b"http://python.org", "http://python.org") + def test_main(): support.run_unittest(UrlParseTestCase)