diff -r f4981d8eb401 Doc/library/xmlrpclib.rst
--- a/Doc/library/xmlrpclib.rst Fri May 24 13:45:27 2013 +0200
+++ b/Doc/library/xmlrpclib.rst Fri May 24 19:28:44 2013 +0300
@@ -108,13 +108,14 @@
built-in types, the xmlrpclib module currently does not marshal instances of such
subclasses.
- When passing strings, characters special to XML such as ``<``, ``>``, and ``&``
- will be automatically escaped. However, it's the caller's responsibility to
- ensure that the string is free of characters that aren't allowed in XML, such as
- the control characters with ASCII values between 0 and 31 (except, of course,
- tab, newline and carriage return); failing to do this will result in an XML-RPC
- request that isn't well-formed XML. If you have to pass arbitrary strings via
- XML-RPC, use the :class:`Binary` wrapper class described below.
+ Passed strings and Unicode strings shouldn't contain characters forbidden in
+ XML, such as the control characters with ASCII values between 0 and 31
+ (except, of course, tab, newline and carriage return), unpaired surrogates,
+ U+FFFE and U+FFFF. Strings should be decodable with specified encoding. If
+ you have to pass arbitrary strings via XML-RPC, use the :class:`Binary`
+ wrapper class described below.
+
+ Passed integers should be in the range from -32768 to 32767.
:class:`Server` is retained as an alias for :class:`ServerProxy` for backwards
compatibility. New code should use :class:`ServerProxy`.
@@ -127,6 +128,9 @@
*__dict__* attribute and don't have a base class that is marshalled in a
special way.
+ .. versionchanged:: 2.7.6
+ Sending strings and Unicode strings with characters that are ill-formed
+ in XML now raises ValueError.
.. seealso::
diff -r f4981d8eb401 Lib/test/test_xmlrpc.py
--- a/Lib/test/test_xmlrpc.py Fri May 24 13:45:27 2013 +0200
+++ b/Lib/test/test_xmlrpc.py Fri May 24 19:28:44 2013 +0300
@@ -25,14 +25,14 @@
else:
have_unicode = True
-alist = [{'astring': 'foo@bar.baz.spam',
+alist = [{'astring': 'foo@bar.baz.spam\t\n<&>',
'afloat': 7283.43,
'anint': 2**20,
'ashortlong': 2L,
'anotherlist': ['.zyx.41'],
'abase64': xmlrpclib.Binary("my dog has fleas"),
'boolean': xmlrpclib.False,
- 'unicode': u'\u4000\u6000\u8000',
+ 'unicode': u'\t\n<&>\u4000\u6000\u8000',
u'ukey\u4000': 'regular value',
'datetime1': xmlrpclib.DateTime('20050210T11:41:23'),
'datetime2': xmlrpclib.DateTime(
@@ -42,11 +42,16 @@
}]
class XMLRPCTestCase(unittest.TestCase):
+ maxDiff = None
def test_dump_load(self):
self.assertEqual(alist,
xmlrpclib.loads(xmlrpclib.dumps((alist,)))[0][0])
+ def test_dump_load_encoding(self):
+ s = xmlrpclib.dumps((alist,), encoding='ascii')
+ self.assertEqual(alist, xmlrpclib.loads(s)[0][0])
+
def test_dump_bare_datetime(self):
# This checks that an unwrapped datetime.date object can be handled
# by the marshalling code. This can't be done via test_dump_load()
@@ -187,6 +192,34 @@
self.assertEqual(s, "abc \xc2\x95")
self.assertEqual(items, [("def \xc2\x96", "ghi \xc2\x97")])
+ def test_dump_invalid_string(self):
+ # ASCII control characters
+ for s in set(map(chr, range(32))) - set('\t\n\r'):
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+ # UTF-8 encoded surrogates
+ for s in ('\xed\xa0\x80', '\xed\xa0\x80A',
+ '\xed\xa0\x80\xf0\x90\x80\x80',
+ '\xed\xb0\x80', 'A\xed\xb0\x80',
+ '\xf0\x90\x80\x80\xed\xb0\x80'):
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+ # U+FFFE, and U+FFFF
+ for s in '\xef\xbf\xbe', '\xef\xbf\xbf':
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+ # Invalid UTF-8
+ for s in ('\x80', '\xc1\xbf', '\xc2', '\xe0\x9f\xbf', '\xe0\xa0',
+ '\xf0\x8f\xbf\xbf', '\xf0\x90\x80', '\xf4\x90\x80\x80'):
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+
+ @unittest.skipUnless(have_unicode, 'requires unicode support')
+ def test_dump_invalid_unicode(self):
+ for s in set(map(unichr, range(32))) - set(u'\t\n\r'):
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+ for s in (u'\ud800', u'\ud800A', u'\ud800\U00010000',
+ u'\udc00', u'A\udc00', u'\U00010000\udc00'):
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+ for i in u'\ufffe', u'\uffff':
+ self.assertRaises(ValueError, xmlrpclib.dumps, (s,))
+
class HelperTestCase(unittest.TestCase):
def test_escape(self):
diff -r f4981d8eb401 Lib/xmlrpclib.py
--- a/Lib/xmlrpclib.py Fri May 24 13:45:27 2013 +0200
+++ b/Lib/xmlrpclib.py Fri May 24 19:28:44 2013 +0300
@@ -142,6 +142,7 @@
import socket
import errno
import httplib
+import sys
try:
import gzip
except ImportError:
@@ -171,6 +172,27 @@
data = unicode(data, encoding)
return data
+if unicode:
+ if sys.maxunicode >= 0x10000:
+ _invalid_chars_re = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1f'
+ u'\ud800-\udfff|\ufffe|\uffff]', re.S)
+ else:
+ _invalid_chars_re = re.compile(u'[\x00-\x08\x0b\x0c\x0e-\x1f'
+ u'\ufffe|\uffff]|'
+ u'[\ud800-\udbff](?![\udc00-\udfff])|'
+ u'(?")
write(escape(value))
write("\n")
@@ -697,9 +720,10 @@
if unicode:
def dump_unicode(self, value, write, escape=escape):
- value = value.encode(self.encoding)
+ _check_unicode(value)
+ value = escape(value).encode(self.encoding, 'xmlcharrefreplace')
write("")
- write(escape(value))
+ write(value)
write("\n")
dispatch[UnicodeType] = dump_unicode
@@ -726,12 +750,15 @@
write("\n")
for k, v in value.items():
write("\n")
- if type(k) is not StringType:
- if unicode and type(k) is UnicodeType:
- k = k.encode(self.encoding)
- else:
- raise TypeError, "dictionary key must be string"
- write("%s\n" % escape(k))
+ if type(k) is StringType:
+ _check_str(k, self.encoding)
+ k = escape(k)
+ elif unicode and type(k) is UnicodeType:
+ _check_unicode(k)
+ k = escape(k).encode(self.encoding, 'xmlcharrefreplace')
+ else:
+ raise TypeError, "dictionary key must be string"
+ write("%s\n" % k)
dump(v, write)
write("\n")
write("\n")