diff -r f4981d8eb401 Doc/library/xmlrpclib.rst --- a/Doc/library/xmlrpclib.rst Fri May 24 13:45:27 2013 +0200 +++ b/Doc/library/xmlrpclib.rst Fri May 24 19:28:44 2013 +0300 @@ -108,13 +108,14 @@ built-in types, the xmlrpclib module currently does not marshal instances of such subclasses. - When passing strings, characters special to XML such as ``<``, ``>``, and ``&`` - will be automatically escaped. However, it's the caller's responsibility to - ensure that the string is free of characters that aren't allowed in XML, such as - the control characters with ASCII values between 0 and 31 (except, of course, - tab, newline and carriage return); failing to do this will result in an XML-RPC - request that isn't well-formed XML. If you have to pass arbitrary strings via - XML-RPC, use the :class:`Binary` wrapper class described below. + Passed strings and Unicode strings shouldn't contain characters forbidden in + XML, such as the control characters with ASCII values between 0 and 31 + (except, of course, tab, newline and carriage return), unpaired surrogates, + U+FFFE and U+FFFF. Strings should be decodable with specified encoding. If + you have to pass arbitrary strings via XML-RPC, use the :class:`Binary` + wrapper class described below. + + Passed integers should be in the range from -32768 to 32767. :class:`Server` is retained as an alias for :class:`ServerProxy` for backwards compatibility. New code should use :class:`ServerProxy`. @@ -127,6 +128,9 @@ *__dict__* attribute and don't have a base class that is marshalled in a special way. + .. versionchanged:: 2.7.6 + Sending strings and Unicode strings with characters that are ill-formed + in XML now raises ValueError. .. seealso:: diff -r f4981d8eb401 Lib/test/test_xmlrpc.py --- a/Lib/test/test_xmlrpc.py Fri May 24 13:45:27 2013 +0200 +++ b/Lib/test/test_xmlrpc.py Fri May 24 19:28:44 2013 +0300 @@ -25,14 +25,14 @@ else: have_unicode = True -alist = [{'astring': 'foo@bar.baz.spam', +alist = [{'astring': 'foo@bar.baz.spam\t\n<&>', 'afloat': 7283.43, 'anint': 2**20, 'ashortlong': 2L, 'anotherlist': ['.zyx.41'], 'abase64': xmlrpclib.Binary("my dog has fleas"), 'boolean': xmlrpclib.False, - 'unicode': u'\u4000\u6000\u8000', + 'unicode': u'\t\n<&>\u4000\u6000\u8000', u'ukey\u4000': 'regular value', 'datetime1': xmlrpclib.DateTime('20050210T11:41:23'), 'datetime2': xmlrpclib.DateTime( @@ -42,11 +42,16 @@ }] class XMLRPCTestCase(unittest.TestCase): + maxDiff = None def test_dump_load(self): self.assertEqual(alist, xmlrpclib.loads(xmlrpclib.dumps((alist,)))[0][0]) + def test_dump_load_encoding(self): + s = xmlrpclib.dumps((alist,), encoding='ascii') + self.assertEqual(alist, xmlrpclib.loads(s)[0][0]) + def test_dump_bare_datetime(self): # This checks that an unwrapped datetime.date object can be handled # by the marshalling code. This can't be done via test_dump_load() @@ -187,6 +192,34 @@ self.assertEqual(s, "abc \xc2\x95") self.assertEqual(items, [("def \xc2\x96", "ghi \xc2\x97")]) + def test_dump_invalid_string(self): + # ASCII control characters + for s in set(map(chr, range(32))) - set('\t\n\r'): + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + # UTF-8 encoded surrogates + for s in ('\xed\xa0\x80', '\xed\xa0\x80A', + '\xed\xa0\x80\xf0\x90\x80\x80', + '\xed\xb0\x80', 'A\xed\xb0\x80', + '\xf0\x90\x80\x80\xed\xb0\x80'): + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + # U+FFFE, and U+FFFF + for s in '\xef\xbf\xbe', '\xef\xbf\xbf': + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + # Invalid UTF-8 + for s in ('\x80', '\xc1\xbf', '\xc2', '\xe0\x9f\xbf', '\xe0\xa0', + '\xf0\x8f\xbf\xbf', '\xf0\x90\x80', '\xf4\x90\x80\x80'): + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + + @unittest.skipUnless(have_unicode, 'requires unicode support') + def test_dump_invalid_unicode(self): + for s in set(map(unichr, range(32))) - set(u'\t\n\r'): + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + for s in (u'\ud800', u'\ud800A', u'\ud800\U00010000', + u'\udc00', u'A\udc00', u'\U00010000\udc00'): + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + for i in u'\ufffe', u'\uffff': + self.assertRaises(ValueError, xmlrpclib.dumps, (s,)) + class HelperTestCase(unittest.TestCase): def test_escape(self): diff -r f4981d8eb401 Lib/xmlrpclib.py --- a/Lib/xmlrpclib.py Fri May 24 13:45:27 2013 +0200 +++ b/Lib/xmlrpclib.py Fri May 24 19:28:44 2013 +0300 @@ -142,6 +142,7 @@ import socket import errno import httplib +import sys try: import gzip except ImportError: @@ -171,6 +172,27 @@ data = unicode(data, encoding) return data +if unicode: + if sys.maxunicode >= 0x10000: + _invalid_chars_re = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1f' + u'\ud800-\udfff|\ufffe|\uffff]', re.S) + else: + _invalid_chars_re = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1f' + u'\ufffe|\uffff]|' + u'[\ud800-\udbff](?![\udc00-\udfff])|' + u'(?") write(escape(value)) write("\n") @@ -697,9 +720,10 @@ if unicode: def dump_unicode(self, value, write, escape=escape): - value = value.encode(self.encoding) + _check_unicode(value) + value = escape(value).encode(self.encoding, 'xmlcharrefreplace') write("") - write(escape(value)) + write(value) write("\n") dispatch[UnicodeType] = dump_unicode @@ -726,12 +750,15 @@ write("\n") for k, v in value.items(): write("\n") - if type(k) is not StringType: - if unicode and type(k) is UnicodeType: - k = k.encode(self.encoding) - else: - raise TypeError, "dictionary key must be string" - write("%s\n" % escape(k)) + if type(k) is StringType: + _check_str(k, self.encoding) + k = escape(k) + elif unicode and type(k) is UnicodeType: + _check_unicode(k) + k = escape(k).encode(self.encoding, 'xmlcharrefreplace') + else: + raise TypeError, "dictionary key must be string" + write("%s\n" % k) dump(v, write) write("\n") write("\n")