diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1132,9 +1132,11 @@ | | quotedprintable | | quoted printable | +--------------------+---------------------------+----------------+---------------------------+ | raw_unicode_escape | | Unicode string | Produce a string that is | -| | | | suitable as raw Unicode | -| | | | literal in Python source | -| | | | code | +| | | | suitable for the contents | +| | | | of a raw Unicode literal | +| | | | in Python source code. | +| | | | All single and double | +| | | | quotes are escaped. | +--------------------+---------------------------+----------------+---------------------------+ | rot_13 | rot13 | Unicode string | Returns the Caesar-cypher | | | | | encryption of the operand | @@ -1153,9 +1155,11 @@ | | | | is desired. | +--------------------+---------------------------+----------------+---------------------------+ | unicode_escape | | Unicode string | Produce a string that is | -| | | | suitable as Unicode | -| | | | literal in Python source | -| | | | code | +| | | | suitable for the contents | +| | | | of a Unicode literal in | +| | | | Python source code. All | +| | | | single and double quotes | +| | | | are escaped. | +--------------------+---------------------------+----------------+---------------------------+ | unicode_internal | | Unicode string | Return the internal | | | | | representation of the | @@ -1171,6 +1175,10 @@ .. versionadded:: 2.3 The ``idna`` and ``punycode`` encodings. +.. versionchanged:: 2.7 + Escaping for single and double quotes added to the ``unicode_escape`` + encoder. Escaping for single quotes, double quotes, and backslashes + added to the ``raw_unicode_escape`` encoder. :mod:`encodings.idna` --- Internationalized Domain Names in Applications ------------------------------------------------------------------------ diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -495,6 +495,10 @@ management protocol, so you can write ``with bz2.BZ2File(...) as f: ...``. (Contributed by Hagen Fuerstenau; :issue:`3860`.) +* :mod:`codecs`: The ``unicode_escape`` encoder now escapes all + single and double quotes. The ``raw_unicode_escape`` encoder now + escapes all single quotes, double quotes, and backslashes. + * New class: the :class:`Counter` class in the :mod:`collections` module is useful for tallying data. :class:`Counter` instances behave mostly like dictionaries but return zero for missing keys instead of @@ -1215,6 +1219,12 @@ nothing when a negative length is requested, as other file-like objects do. (:issue:`7348`). +* The ``unicode_escape`` encoder now escapes all single and double + quotes. The ``raw_unicode_escape`` encoder now escapes all single + quotes, double quotes, and backslashes. Any extra escaping applied + on top of one of these encoders should be reviewed to ensure + that the decoded value remains valid. + For C extensions: * C extensions that use integer format codes with the ``PyArg_Parse*`` diff --git a/Lib/pickle.py b/Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -495,9 +495,9 @@ n = len(encoding) self.write(BINUNICODE + pack(" 0) { Py_UNICODE ch = *s++; /* Escape quotes and backslashes */ - if ((quotes && - ch == (Py_UNICODE) PyString_AS_STRING(repr)[1]) || ch == '\\') { + if ((escape_single_quotes && ch == '\'') || + (escape_double_quotes && ch == '"') || + ch == '\\') { *p++ = '\\'; *p++ = (char) ch; continue; @@ -3121,7 +3130,7 @@ else *p++ = (char) ch; } - if (quotes) + if (enclose_in_quotes) *p++ = PyString_AS_STRING(repr)[1]; *p = '\0'; @@ -3332,8 +3341,8 @@ size++; } #endif - /* Map 16-bit characters to '\uxxxx' */ - if (ch >= 256) { + /* Map 16-bit characters, backslashes, and quotes to '\uxxxx' */ + if (ch >= 256 || ch == '\\' || ch == '\'' || ch == '"') { *p++ = '\\'; *p++ = 'u'; *p++ = hexdigit[(ch >> 12) & 0xf];