diff --git a/Lib/pickle.py b/Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -948,18 +948,14 @@ dispatch[BINFLOAT[0]] = load_binfloat def load_string(self): - orig = self.readline() - rep = orig[:-1] - for q in (b'"', b"'"): # double or single quote - if rep.startswith(q): - if not rep.endswith(q): - raise ValueError("insecure string pickle") - rep = rep[len(q):-len(q)] + s = self.readline()[:-1] + for quote in (b'"', b"'"): + if len(s) >= 2 and s.startswith(quote) and s.endswith(quote): + self.append(codecs.escape_decode(s[1:-1])[0] + .decode(self.encoding, self.errors)) break else: - raise ValueError("insecure string pickle: %r" % orig) - self.append(codecs.escape_decode(rep)[0] - .decode(self.encoding, self.errors)) + raise UnpicklingError("the STRING opcode argument must be quoted") dispatch[STRING[0]] = load_string def load_binstring(self): diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -601,22 +601,6 @@ self.assertRaises(KeyError, self.loads, b'g0\np0') self.assertEqual(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)]) - def test_insecure_strings(self): - # XXX Some of these tests are temporarily disabled - insecure = [b"abc", b"2 + 2", # not quoted - ## b"'abc' + 'def'", # not a single quoted string - b"'abc", # quote is not closed - b"'abc\"", # open quote and close quote don't match - b"'abc' ?", # junk after close quote - b"'\\'", # trailing backslash - # some tests of the quoting rules - ## b"'abc\"\''", - ## b"'\\\\a\'\'\'\\\'\\\\\''", - ] - for b in insecure: - buf = b"S" + b + b"\012p0\012." - self.assertRaises(ValueError, self.loads, buf) - def test_unicode(self): endcases = ['', '<\\u>', '<\\\u1234>', '<\n>', '<\\>', '<\\\U00012345>', @@ -1206,6 +1190,32 @@ dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.' self.assertRaises(ValueError, self.loads, dumped) + def test_badly_quoted_string(self): + # Issue #17710 + badpickles = [b"S'\n.", + b'S"\n.', + b'S\' \n.', + b'S" \n.', + b'S\'"\n.', + b'S"\'\n.', + b"S' ' \n.", + b'S" " \n.', + b"S ''\n.", + b'S ""\n.', + b'S \n.', + b'S\n.', + b'S.'] + for p in badpickles: + self.assertRaises(pickle.UnpicklingError, self.loads, p) + + def test_correctly_quoted_string(self): + goodpickles = [(b"S''\n.", ''), + (b'S""\n.', ''), + (b'S"\\n"\n.', '\n'), + (b"S'\\n'\n.", '\n')] + for p, expected in goodpickles: + self.assertEqual(self.loads(p), expected) + class BigmemPickleTests(unittest.TestCase): diff --git a/Modules/_pickle.c b/Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -4171,16 +4171,15 @@ if ((len = _Unpickler_Readline(self, &s)) < 0) return -1; - if (len < 3) + if (len < 3 || s[len - 1] != '\n') return bad_readline(); if ((s = strdup(s)) == NULL) { PyErr_NoMemory(); return -1; } - + /* Strip the newline */ + len--; /* Strip outermost quotes */ - while (s[len - 1] <= ' ') - len--; if (s[0] == '"' && s[len - 1] == '"') { s[len - 1] = '\0'; p = s + 1; @@ -4193,9 +4192,11 @@ } else { free(s); - PyErr_SetString(PyExc_ValueError, "insecure string pickle"); + PyErr_SetString(UnpicklingError, + "the STRING opcode argument must be quoted"); return -1; } + assert(len >= 0); /* Use the PyBytes API to decode the string, since that is what is used to encode, and then coerce the result to Unicode. */