diff -r 5b33829badcc Lib/json/__init__.py --- a/Lib/json/__init__.py Mon Oct 24 23:49:51 2016 +0300 +++ b/Lib/json/__init__.py Thu Oct 27 21:07:28 2016 -0500 @@ -256,8 +256,9 @@ # 00 XX -- -- - utf-16-be return 'utf-16-be' if b[1] else 'utf-32-be' if not b[1]: + # XX 00 XX -- - utf-16-le + # XX 00 00 XX - utf-16-le # XX 00 00 00 - utf-32-le - # XX 00 XX XX - utf-16-le return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' elif len(b) == 2: if not b[0]: diff -r 5b33829badcc Lib/test/test_json/test_unicode.py --- a/Lib/test/test_json/test_unicode.py Mon Oct 24 23:49:51 2016 +0300 +++ b/Lib/test/test_json/test_unicode.py Thu Oct 27 21:07:28 2016 -0500 @@ -65,6 +65,19 @@ self.assertEqual(self.loads(bom + encoded), data) self.assertEqual(self.loads(encoded), data) self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]') + # RFC-7159 and ECMA-404 extend JSON to allow documents that + # consist of only a string, which can present a special case + # not covered by the encoding detection patterns specified in + # RFC-4627 for utf-16-le (XX 00 00 XX). + self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')), + '\u2600') + # Encoding detection for small (<4) bytes objects + # is implemented as a special case. RFC-7159 and ECMA-404 + # allow single codepoint JSON documents which are only two + # bytes in utf-16 encodings w/o BOM. + self.assertEqual(self.loads('5'.encode('utf-16-le')), 5) + self.assertEqual(self.loads('5'.encode('utf-16-be')), 5) + self.assertEqual(self.loads('25'.encode('utf-8')), 25) def test_object_pairs_hook_with_unicode(self): s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'