diff -r 43ae2a243eca Doc/library/urllib.parse.rst --- a/Doc/library/urllib.parse.rst Thu Jul 26 00:47:15 2012 +0200 +++ b/Doc/library/urllib.parse.rst Sat Sep 15 15:19:48 2012 -0700 @@ -510,34 +510,42 @@ .. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None) - Convert a mapping object or a sequence of two-element tuples, which may - either be a :class:`str` or a :class:`bytes`, to a "percent-encoded" - string. If the resultant string is to be used as a *data* for POST - operation with :func:`urlopen` function, then it should be properly encoded - to bytes, otherwise it would result in a :exc:`TypeError`. + Convert an iterable of two-element tuples or a dictionary to a + "percent-encoded" query string. The resulting string is a series of ``key=value`` pairs separated by ``'&'`` - characters, where both *key* and *value* are quoted using :func:`quote_plus` - above. When a sequence of two-element tuples is used as the *query* - argument, the first element of each tuple is a key and the second is a - value. The value element in itself can be a sequence and in that case, if - the optional parameter *doseq* is evaluates to *True*, individual - ``key=value`` pairs separated by ``'&'`` are generated for each element of - the value sequence for the key. The order of parameters in the encoded - string will match the order of parameter tuples in the sequence. + characters, where both *key* and *value* are quoted using :func:`quote_plus`. + When a sequence of two-element tuples is used as the *query* argument, the + first element of each tuple is a key and the second is a value. + + If the value element is itself iterable and the optional *doseq* parameter + is *True*, individual ``key=value`` pairs separated by ``'&'`` will be + generated for each element of the value. - When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error* - parameters are passed down to :func:`quote_plus` for encoding. + Elements of the parameter pairs that are not -:class`bytes` values will be + passed to :func:`quote_plus` with the *safe*, *encoding* and *error* + parameters for encoding (after first being converted to a :class:`str` if + necessary). :class:`bytes` values will be passed to :func:`quote_plus` + without these extra encoding parameters. + + The order of parameters in the encoded string will match the order of + the parameter tuples in the *query* iterable. To reverse this encoding process, :func:`parse_qs` and :func:`parse_qsl` are provided in this module to parse query strings into Python data structures. + If the resultant string is to be used as *data* for a POST operation with the + :func:`urlopen` function, then it should be properly encoded to bytes, + otherwise it will result in a :exc:`TypeError`. + Refer to :ref:`urllib examples ` to find out how urlencode method can be used for generating query string for a URL or data for POST. .. versionchanged:: 3.2 Query parameter supports bytes and string objects. + .. versionchanged:: 3.4 + The query parameter can now be any iterable value of zero or more pairs. .. seealso:: diff -r 43ae2a243eca Lib/test/test_urllib.py --- a/Lib/test/test_urllib.py Thu Jul 26 00:47:15 2012 +0200 +++ b/Lib/test/test_urllib.py Sat Sep 15 15:19:48 2012 -0700 @@ -1072,6 +1072,24 @@ encoding="latin-1") self.assertEqual(expect, result) + def test_non_empty_strings(self): + self.assertRaises(TypeError, + urllib.parse.urlencode, "a=b") + + def test_bad_pairs(self): + self.assertRaises(TypeError, + urllib.parse.urlencode, [(1,)]) + self.assertRaises(TypeError, + urllib.parse.urlencode, [(1,2,3)]) + + def test_non_iterable(self): + self.assertRaises(TypeError, + urllib.parse.urlencode, 1) + + def test_generator(self): + self.assertEqual("i=0&i=1&i=2&i=3", + urllib.parse.urlencode(('i', i) for i in range(4))) + class Pathname_Tests(unittest.TestCase): """Test pathname2url() and url2pathname()""" diff -r 43ae2a243eca Lib/urllib/parse.py --- a/Lib/urllib/parse.py Thu Jul 26 00:47:15 2012 +0200 +++ b/Lib/urllib/parse.py Sat Sep 15 15:19:48 2012 -0700 @@ -727,82 +727,80 @@ return ''.join([quoter(char) for char in bs]) def urlencode(query, doseq=False, safe='', encoding=None, errors=None): - """Encode a sequence of two-element tuples or dictionary into a URL query string. + """Encode an iterable of two-element tuples or a dictionary into a URL query string. - If any values in the query arg are sequences and doseq is true, each - sequence element is converted to a separate parameter. + The name & value of each parameter in the output string will be encoded + by a call to quote_plus. Names & values that are not strings will be + implicitly converted to strings before being passed to quote_plus + with the given encoding. - If the query arg is a sequence of two-element tuples, the order of the + If any values in the query arg are iterable and doseq is true, each + element in the value is converted to a separate parameter in the + output string. + + If the query arg is an iterable of two-element tuples, the order of the parameters in the output will match the order of parameters in the input. - The query arg may be either a string or a bytes type. When query arg is a - string, the safe, encoding and error parameters are sent the quote_plus for - encoding. + You may also pass a zero-length string to urlencode and it will be + returned unchanged, but non-empty strings will cause a TypeError to + be raised. """ if hasattr(query, "items"): query = query.items() - else: - # It's a bother at times that strings and string-like objects are - # sequences. - try: - # non-sequence items should not work with len() - # non-empty strings will fail this - if len(query) and not isinstance(query[0], tuple): - raise TypeError - # Zero-length sequences of all types will get here and succeed, - # but that's a minor nit. Since the original implementation - # allowed empty dicts that type of behavior probably should be - # preserved for consistency - except TypeError: - ty, va, tb = sys.exc_info() - raise TypeError("not a valid non-string sequence " - "or mapping object").with_traceback(tb) + elif isinstance(query, (str, bytes)): + if len(query): + raise TypeError("only empty strings may be passed to urlencode") + else: + return query - l = [] - if not doseq: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) + try: + l = [] + if not doseq: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) - if isinstance(v, bytes): - v = quote_plus(v, safe) - else: - v = quote_plus(str(v), safe, encoding, errors) - l.append(k + '=' + v) - else: - for k, v in query: - if isinstance(k, bytes): - k = quote_plus(k, safe) - else: - k = quote_plus(str(k), safe, encoding, errors) + if isinstance(v, bytes): + v = quote_plus(v, safe) + else: + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) - if isinstance(v, bytes): - v = quote_plus(v, safe) - l.append(k + '=' + v) - elif isinstance(v, str): - v = quote_plus(v, safe, encoding, errors) - l.append(k + '=' + v) - else: - try: - # Is this a sufficient test for sequence-ness? - x = len(v) - except TypeError: - # not a sequence - v = quote_plus(str(v), safe, encoding, errors) + if isinstance(v, bytes): + v = quote_plus(v, safe) + l.append(k + '=' + v) + elif isinstance(v, str): + v = quote_plus(v, safe, encoding, errors) l.append(k + '=' + v) else: - # loop over the sequence - for elt in v: - if isinstance(elt, bytes): - elt = quote_plus(elt, safe) - else: - elt = quote_plus(str(elt), safe, encoding, errors) - l.append(k + '=' + elt) - return '&'.join(l) + try: + # Is this a sufficient test for iterable-ness? + iter(v) + except TypeError: + # not iterable + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_plus(elt, safe) + else: + elt = quote_plus(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) + return '&'.join(l) + except (TypeError, ValueError): + raise TypeError("query arg must be an iterable of pairs") # Utilities to parse URLs (most of these return None for missing parts): # unwrap('') --> 'type://host/path'