Index: Doc/library/urllib.rst =================================================================== --- Doc/library/urllib.rst (revision 63861) +++ Doc/library/urllib.rst (working copy) @@ -231,7 +231,7 @@ of the sequence. When a sequence of two-element tuples is used as the *query* argument, the first element of each tuple is a key and the second is a value. The order of parameters in the encoded string will match the order of parameter - tuples in the sequence. The :mod:`cgi` module provides the functions + tuples in the sequence. The :mod:`urlparse` module provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used to parse query strings into Python data structures. Index: Lib/cgi.py =================================================================== --- Lib/cgi.py (revision 63861) +++ Lib/cgi.py (working copy) @@ -41,6 +41,8 @@ import mimetools import rfc822 import UserDict +import urlparse + try: from cStringIO import StringIO except ImportError: @@ -166,72 +168,17 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. + """Parse a query given as a string argument.This method is moved to + urlparse and is called directly from there. """ - dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): - if name in dict: - dict[name].append(value) - else: - dict[name] = [value] - return dict + return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): - """Parse a query given as a string argument. - - Arguments: - - qs: URL-encoded query string to be parsed - - keep_blank_values: flag indicating whether blank values in - URL encoded queries should be treated as blank strings. A - true value indicates that blanks should be retained as blank - strings. The default false value indicates that blank values - are to be ignored and treated as if they were not included. - - strict_parsing: flag indicating what to do with parsing errors. If - false (the default), errors are silently ignored. If true, - errors raise a ValueError exception. - - Returns a list, as G-d intended. + """Parse a query given as a string argument.This method is moved to + urlparse and is called directly from there. """ - pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] - r = [] - for name_value in pairs: - if not name_value and not strict_parsing: - continue - nv = name_value.split('=', 1) - if len(nv) != 2: - if strict_parsing: - raise ValueError, "bad query field: %r" % (name_value,) - # Handle case of a control-name with no equal sign - if keep_blank_values: - nv.append('') - else: - continue - if len(nv[1]) or keep_blank_values: - name = urllib.unquote(nv[0].replace('+', ' ')) - value = urllib.unquote(nv[1].replace('+', ' ')) - r.append((name, value)) + return urlparse.parse_qsl(qs, keep_blank_values, strict_parsing) - return r - - def parse_multipart(fp, pdict): """Parse multipart input. Index: Lib/urlparse.py =================================================================== --- Lib/urlparse.py (revision 63861) +++ Lib/urlparse.py (working copy) @@ -3,9 +3,8 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. """ - __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", - "urlsplit", "urlunsplit"] + "urlsplit", "urlunsplit","parse_qs","parse_qsl"] # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', @@ -256,7 +255,93 @@ else: return url, '' +# unquote method for parse_qs and parse_qsl +_hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) +_hextochr.update(('%02X' % i, chr(i)) for i in range(256)) + +def unquote(s): + """unquote('abc%20def') -> 'abc def'.""" + res = s.split('%') + for i in xrange(1, len(res)): + item = res[i] + try: + res[i] = _hextochr[item[:2]] + item[2:] + except KeyError: + res[i] = '%' + item + except UnicodeDecodeError: + res[i] = unichr(int(item[:2], 16)) + item[2:] + return "".join(res) + +def parse_qs(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + """ + dict = {} + for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): + if name in dict: + dict[name].append(value) + else: + dict[name] = [value] + return dict + +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a list, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError, "bad query field: %r" % (name_value,) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote(nv[0].replace('+', ' ')) + value = unquote(nv[1].replace('+', ' ')) + r.append((name, value)) + + return r + + + + test_input = """ http://a/b/c/d