Index: wsgiref/wsgiref/handlers.py =================================================================== --- wsgiref/wsgiref/handlers.py (revision 2692) +++ wsgiref/wsgiref/handlers.py (working copy) @@ -6,7 +6,7 @@ import sys, os, time -__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler'] +__all__ = ['BaseHandler', 'SimpleHandler', 'BaseCGIHandler', 'CGIHandler', 'IISCGIHandler'] try: dict @@ -38,7 +38,83 @@ ) +# On Windows, use ctypes to read environment, if available +getenvw = None +if sys.platform == 'win32': + try: + import ctypes + except ImportError: + pass + else: + getenvw = ctypes.windll.kernel32.GetEnvironmentVariableW +def read_environ(): + """Read environment, fixing HTTP variables on Windows""" + + # Use native bytes environ where available + if sys.platform != 'win32': + return dict(os.environ.items()) + + enc = sys.getfilesystemencoding() + software = os.environ.get('SERVER_SOFTWARE', '').lower() + + # Take the basic environment from os.environ bytes. Attempt to fix up the + # variables that come from the HTTP request. + environ= {} + for k, v in os.environ.items(): + if k in ('SCRIPT_NAME', 'PATH_INFO') or k.startswith('HTTP_'): + + # Initially, re-decode the bytes Python fetched using from the + # Unicode environment variables using the stdlib, which will have + # encoded using the system codepage (mbcs). Characters not in the + # system codepage will already have been irretrievably mangled. + v = v.decode(enc, 'replace') + + # If we have ctypes, we can do better than that by fetching the + # Unicode value directly from the win32 environment. + if getenvw is not None: + kb= k.decode(enc, 'replace') + nbuf = getenvw(kb, None, 0) + nbuf = getenvw(k.decode(enc, 'replace'), None, 0) + if nbuf != 0: + buf = ctypes.create_unicode_buffer(u'\0' * nbuf) + getenvw(kb, buf, nbuf) + v = buf.value + + # Now re-encode the Unicode value using whichever encoding we + # think the server will have chosen to decode the original bytes + # it received from the request. This varies between servers. + + # On IIS, the HTTP request will be decoded as UTF-8 as long + # as the input is a valid UTF-8 sequence. Otherwise it is + # decoded using the system code page (mbcs), with no way to + # detect this has happened. Because UTF-8 is the more likely + # encoding, and mbcs is inherently unreliable (an mbcs string + # that happens to be valid UTF-8 will not be decoded as mbcs) + # always recreate the original bytes as UTF-8. + if software.startswith('microsoft-iis/'): + v = v.encode('utf-8') + + # Apache mod_cgi writes bytes-as-unicode (as if ISO-8859-1) direct + # to the Unicode environ. + elif software.startswith('apache/'): + v = v.encode('iso-8859-1') + + # Python 3's http.server.CGIHTTPRequestHandler decodes + # using the urllib.unquote default of UTF-8, amongst other issues. + elif software.startswith('simplehttp/') and 'python/3' in software: + v = v.encode('utf-8') + + # For any other servers, guess that they have written bytes to + # the environ using stdio byte-oriented interfaces, ending up + # with the system code page. + else: + v = v.encode(enc, 'replace') + + environ[k] = v + return environ + + class BaseHandler: """Manage the invocation of a WSGI application""" @@ -55,7 +131,7 @@ # os_environ is used to supply configuration from the OS environment: # by default it's a copy of 'os.environ' as of import time, but you can # override this in e.g. your __init__ method. - os_environ = dict(os.environ.items()) + os_environ = read_environ() # Collaborator classes wsgi_file_wrapper = FileWrapper # set to None to disable @@ -471,15 +547,45 @@ def __init__(self): BaseCGIHandler.__init__( - self, sys.stdin, sys.stdout, sys.stderr, dict(os.environ.items()), + self, sys.stdin, sys.stdout, sys.stderr, read_environ(), multithread=False, multiprocess=True ) +class IISCGIHandler(BaseCGIHandler): + """CGI-based invocation with workaround for IIS path bug + This handler should be used in preference to CGIHandler when deploying on + Microsoft IIS without having set the config allowPathInfo option (IIS>=7) + or metabase allowPathInfoForScriptMappings (IIS<7). + """ + wsgi_run_once = True + os_environ = {} + # By default, IIS gives a PATH_INFO that duplicates the SCRIPT_NAME at + # the front, causing problems for WSGI applications that wish to implement + # routing. This handler strips any such duplicated path. + # IIS can be configured to pass the correct PATH_INFO, but this causes + # another bug where PATH_TRANSLATED is wrong. Luckily this variable is + # rarely used and is not guaranteed by WSGI. On IIS<7, though, the + # setting can only be made on a vhost level, affecting all other script + # mappings, many of which break when exposed to the PATH_TRANSLATED bug. + # For this reason IIS<7 is almost never deployed with the fix. (Even IIS7 + # rarely uses it because there is still no UI for it.) + # There is no way for CGI code to tell whether the option was set, so a + # separate handler class is provided. + def __init__(self): + environ= read_environ() + path = environ.get('PATH_INFO', '') + script = environ.get('SCRIPT_NAME', '') + if (path+'/').startswith(script+'/'): + environ['PATH_INFO'] = path[len(script):] + BaseCGIHandler.__init__( + self, sys.stdin, sys.stdout, sys.stderr, environ, + multithread=False, multiprocess=True + ) @@ -489,4 +595,8 @@ + + + + #