diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1131,6 +1131,8 @@ particular, the following variants typic
 +-----------------+--------------------------------+--------------------------------+
 | utf_8           | U8, UTF, utf8                  | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
+| utf_8_java      |                                | all languages                  |
++-----------------+--------------------------------+--------------------------------+
 | utf_8_sig       |                                | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -181,6 +181,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
+# define PyUnicode_DecodeUTF8JavaStateful PyUnicodeUCS2_DecodeUTF8JavaStateful
 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
@@ -191,6 +192,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
+# define PyUnicode_EncodeUTF8Java PyUnicodeUCS2_EncodeUTF8Java
 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
 # define PyUnicode_Find PyUnicodeUCS2_Find
 # define PyUnicode_Format PyUnicodeUCS2_Format
@@ -265,6 +267,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
+# define PyUnicode_DecodeUTF8JavaStateful PyUnicodeUCS4_DecodeUTF8JavaStateful
 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
@@ -275,6 +278,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
+# define PyUnicode_EncodeUTF8Java PyUnicodeUCS4_EncodeUTF8Java
 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
 # define PyUnicode_Find PyUnicodeUCS4_Find
 # define PyUnicode_Format PyUnicodeUCS4_Format
@@ -828,6 +832,13 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUT
     Py_ssize_t *consumed        /* bytes consumed */
     );
 
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8JavaStateful(
+    const char *string,         /* utf-8-java encoded string */
+    Py_ssize_t length,          /* size of string */
+    const char *errors,         /* error handling */
+    Py_ssize_t *consumed        /* bytes consumed */
+    );
+
 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
     PyObject *unicode           /* Unicode object */
     );
@@ -838,6 +849,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUT
     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
     const char *errors          /* error handling */
     );
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8Java(
+    const Py_UNICODE *data,     /* Unicode char buffer */
+    Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
+    const char *errors          /* error handling */
+    );
 #endif
 
 /* --- UTF-32 Codecs ------------------------------------------------------ */
diff --git a/Lib/encodings/utf_8_java.py b/Lib/encodings/utf_8_java.py
new file mode 100644
--- /dev/null
+++ b/Lib/encodings/utf_8_java.py
@@ -0,0 +1,36 @@
+""" Python 'utf-8-java' Codec
+"""
+import codecs
+
+### Codec APIs
+
+encode = codecs.utf_8_java_encode
+
+def decode(input, errors='strict'):
+    return codecs.utf_8_java_decode(input, errors, True)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return codecs.utf_8_java_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+    _buffer_decode = codecs.utf_8_java_decode
+
+class StreamWriter(codecs.StreamWriter):
+    encode = codecs.utf_8_java_encode
+
+class StreamReader(codecs.StreamReader):
+    decode = codecs.utf_8_java_decode
+
+### encodings module API
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='utf-8-java',
+        encode=encode,
+        decode=decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -578,7 +578,45 @@ class UTF16BETest(ReadTest):
         self.assertEqual(b'\xd8\x00\xde\x03'.decode(self.encoding),
                          "\U00010203")
 
-class UTF8Test(ReadTest):
+
+class BaseUTF8Test(ReadTest):
+    def test_decoder_state(self):
+        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
+        self.check_state_handling_decode(self.encoding,
+                                         u, u.encode(self.encoding))
+
+    def test_lone_surrogates(self):
+        self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding)
+        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, self.encoding)
+        self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"),
+                         b'[\\udc80]')
+        self.assertEqual("[\uDC80]".encode(self.encoding, "xmlcharrefreplace"),
+                         b'[&#56448;]')
+        self.assertEqual("[\uDC80]".encode(self.encoding, "surrogateescape"),
+                         b'[\x80]')
+        self.assertEqual("[\uDC80]".encode(self.encoding, "ignore"),
+                         b'[]')
+        self.assertEqual("[\uDC80]".encode(self.encoding, "replace"),
+                         b'[?]')
+
+    def test_surrogatepass_handler(self):
+        self.assertEqual("abc\ud800def".encode(self.encoding, "surrogatepass"),
+                         b"abc\xed\xa0\x80def")
+        self.assertEqual(b"abc\xed\xa0\x80def".decode(self.encoding, "surrogatepass"),
+                         "abc\ud800def")
+        self.assertTrue(codecs.lookup_error("surrogatepass"))
+
+    def test_invalid(self):
+        for invalid in (
+            b'\xC0\x81',
+            b'\xC0\xFF',
+            b'\xC1\x10',
+            b'\xC1\x80',
+        ):
+            with self.assertRaises(UnicodeDecodeError):
+                invalid.decode(self.encoding)
+
+class UTF8Test(BaseUTF8Test):
     encoding = "utf-8"
 
     def test_partial(self):
@@ -599,31 +637,35 @@ class UTF8Test(ReadTest):
             ]
         )
 
-    def test_decoder_state(self):
-        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
-        self.check_state_handling_decode(self.encoding,
-                                         u, u.encode(self.encoding))
+    def test_null_byte(self):
+        self.assertEqual('a\x00b'.encode(self.encoding), b'a\x00b')
+        self.assertEqual(b'a\x00b'.decode(self.encoding), 'a\x00b')
 
-    def test_lone_surrogates(self):
-        self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
-        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
-        self.assertEqual("[\uDC80]".encode("utf-8", "backslashreplace"),
-                         b'[\\udc80]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "xmlcharrefreplace"),
-                         b'[&#56448;]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "surrogateescape"),
-                         b'[\x80]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "ignore"),
-                         b'[]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "replace"),
-                         b'[?]')
+class UTF8JavaTest(BaseUTF8Test):
+    encoding = "utf-8-java"
 
-    def test_surrogatepass_handler(self):
-        self.assertEqual("abc\ud800def".encode("utf-8", "surrogatepass"),
-                         b"abc\xed\xa0\x80def")
-        self.assertEqual(b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass"),
-                         "abc\ud800def")
-        self.assertTrue(codecs.lookup_error("surrogatepass"))
+    def test_partial(self):
+        self.check_partial(
+            "\x00\xff\u07ff\u0800\uffff",
+            [
+                "",
+                "\x00",
+                "\x00",
+                "\x00\xff",
+                "\x00\xff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800\uffff",
+            ]
+        )
+
+    def test_null_byte(self):
+        self.assertEqual('a\x00b'.encode(self.encoding), b'a\xc0\x80b')
+        self.assertEqual(b'a\xc0\x80b'.decode(self.encoding), 'a\x00b')
 
 class UTF7Test(ReadTest):
     encoding = "utf-7"
@@ -1728,6 +1770,7 @@ def test_main():
         UTF16LETest,
         UTF16BETest,
         UTF8Test,
+        UTF8JavaTest,
         UTF8SigTest,
         UTF7Test,
         UTF16ExTest,
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -295,6 +295,29 @@ utf_8_decode(PyObject *self,
 }
 
 static PyObject *
+utf_8_java_decode(PyObject *self,
+                  PyObject *args)
+{
+    Py_buffer pbuf;
+    const char *errors = NULL;
+    int final = 0;
+    Py_ssize_t consumed;
+    PyObject *decoded = NULL;
+
+    if (!PyArg_ParseTuple(args, "y*|zi:utf_8_java_decode",
+                          &pbuf, &errors, &final))
+        return NULL;
+    consumed = pbuf.len;
+
+    decoded = PyUnicode_DecodeUTF8JavaStateful(pbuf.buf, pbuf.len, errors,
+                                               final ? NULL : &consumed);
+    PyBuffer_Release(&pbuf);
+    if (decoded == NULL)
+        return NULL;
+    return codec_tuple(decoded, consumed);
+}
+
+static PyObject *
 utf_16_decode(PyObject *self,
             PyObject *args)
 {
@@ -710,6 +733,28 @@ utf_8_encode(PyObject *self,
     return v;
 }
 
+static PyObject *
+utf_8_java_encode(PyObject *self,
+                  PyObject *args)
+{
+    PyObject *str, *v;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
+                          &str, &errors))
+        return NULL;
+
+    str = PyUnicode_FromObject(str);
+    if (str == NULL)
+        return NULL;
+    v = codec_tuple(PyUnicode_EncodeUTF8Java(PyUnicode_AS_UNICODE(str),
+                                             PyUnicode_GET_SIZE(str),
+                                             errors),
+                    PyUnicode_GET_SIZE(str));
+    Py_DECREF(str);
+    return v;
+}
+
 /* This version provides access to the byteorder parameter of the
    builtin UTF-16 codecs as optional third argument. It defaults to 0
    which means: use the native byte order and prepend the data with a
@@ -1071,6 +1116,8 @@ static PyMethodDef _codecs_functions[] =
     {"escape_decode",           escape_decode,                  METH_VARARGS},
     {"utf_8_encode",            utf_8_encode,                   METH_VARARGS},
     {"utf_8_decode",            utf_8_decode,                   METH_VARARGS},
+    {"utf_8_java_encode",       utf_8_java_encode,              METH_VARARGS},
+    {"utf_8_java_decode",       utf_8_java_decode,              METH_VARARGS},
     {"utf_7_encode",            utf_7_encode,                   METH_VARARGS},
     {"utf_7_decode",            utf_7_decode,                   METH_VARARGS},
     {"utf_16_encode",           utf_16_encode,                  METH_VARARGS},
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2567,6 +2567,27 @@ char utf8_code_length[256] = {
     4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /* F0-F4 + F5-FF */
 };
 
+char utf8java_code_length[256] = {
+    /* similar to utf8_code_length except that utf8java_code_length[0xC0] is 2
+       instead of 0 to decode {0xC0, 0x80} as U+0000 */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */
+    2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */
+    4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /* F0-F4 + F5-FF */
+};
+
 PyObject *
 PyUnicode_DecodeUTF8(const char *s,
 		     Py_ssize_t size,
@@ -2588,11 +2609,12 @@ PyUnicode_DecodeUTF8(const char *s,
 # error C 'long' size should be either 4 or 8!
 #endif
 
-PyObject *
-PyUnicode_DecodeUTF8Stateful(const char *s,
-			     Py_ssize_t size,
-			     const char *errors,
-			     Py_ssize_t *consumed)
+static PyObject *
+decode_utf8_stateful(const char *s,
+                     Py_ssize_t size,
+                     const char *errors,
+                     Py_ssize_t *consumed,
+                     int java)
 {
     const char *starts = s;
     int n;
@@ -2606,6 +2628,7 @@ PyUnicode_DecodeUTF8Stateful(const char 
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    char *code_length;
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
@@ -2623,6 +2646,11 @@ PyUnicode_DecodeUTF8Stateful(const char 
     e = s + size;
     aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
 
+    if (java)
+        code_length = utf8java_code_length;
+    else
+        code_length = utf8_code_length;
+
     while (s < e) {
         Py_UCS4 ch = (unsigned char)*s;
 
@@ -2672,7 +2700,7 @@ PyUnicode_DecodeUTF8Stateful(const char 
             continue;
         }
 
-        n = utf8_code_length[ch];
+        n = code_length[ch];
 
         if (s + n > e) {
             if (consumed)
@@ -2702,14 +2730,13 @@ PyUnicode_DecodeUTF8Stateful(const char 
             goto utf8Error;
 
         case 2:
-            if ((s[1] & 0xc0) != 0x80) {
+            ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
+            if ((ch <= 0x007F && (ch != 0x0000 || !java)) || ch > 0x07FF) {
                 errmsg = "invalid continuation byte";
                 startinpos = s-starts;
                 endinpos = startinpos + 1;
                 goto utf8Error;
             }
-            ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
-            assert ((ch > 0x007F) && (ch <= 0x07FF));
             *p++ = (Py_UNICODE)ch;
             break;
 
@@ -2787,7 +2814,8 @@ PyUnicode_DecodeUTF8Stateful(const char 
         outpos = p-PyUnicode_AS_UNICODE(unicode);
         if (unicode_decode_call_errorhandler(
                 errors, &errorHandler,
-                "utf8", errmsg,
+                java ? "utf-8-java" : "utf-8",
+                errmsg,
                 &starts, &e, &startinpos, &endinpos, &exc, &s,
                 &unicode, &outpos, &p))
             goto onError;
@@ -2811,6 +2839,25 @@ PyUnicode_DecodeUTF8Stateful(const char 
     return NULL;
 }
 
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+                             Py_ssize_t size,
+                             const char *errors,
+                             Py_ssize_t *consumed)
+{
+    return decode_utf8_stateful(s, size, errors, consumed, 0);
+}
+
+PyObject *
+PyUnicode_DecodeUTF8JavaStateful(const char *s,
+                                 Py_ssize_t size,
+                                 const char *errors,
+                                 Py_ssize_t *consumed)
+{
+    return decode_utf8_stateful(s, size, errors, consumed, 1);
+}
+
+
 #undef ASCII_CHAR_MASK
 
 #ifdef __APPLE__
@@ -2933,10 +2980,11 @@ _Py_DecodeUTF8_surrogateescape(const cha
    maximum possible needed (4 result bytes per Unicode character), and return
    the excess memory at the end.
 */
-PyObject *
-PyUnicode_EncodeUTF8(const Py_UNICODE *s,
+static PyObject *
+encode_utf8(const Py_UNICODE *s,
                      Py_ssize_t size,
-                     const char *errors)
+                     const char *errors,
+                     int java)
 {
 #define MAX_SHORT_UNICHARS 300  /* largest size we'll do on the stack */
 
@@ -2976,8 +3024,15 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s
         Py_UCS4 ch = s[i++];
 
         if (ch < 0x80)
-            /* Encode ASCII */
-            *p++ = (char) ch;
+            if (ch == 0x00 && java) {
+                /* Encode U+0000 as 0xC0 0x80 */
+                *p++ = 0xC0;
+                *p++ = 0x80;
+            }
+            else {
+                /* Encode ASCII */
+                *p++ = (char) ch;
+            }
 
         else if (ch < 0x0800) {
             /* Encode Latin-1 */
@@ -3003,7 +3058,9 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s
                 PyObject *rep;
                 Py_ssize_t repsize, k;
                 rep = unicode_encode_call_errorhandler
-                    (errors, &errorHandler, "utf-8", "surrogates not allowed",
+                    (errors, &errorHandler,
+                    java ? "utf-8-java" : "utf-8",
+                    "surrogates not allowed",
                      s, size, &exc, i-1, i, &newpos);
                 if (!rep)
                     goto error;
@@ -3050,7 +3107,9 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s
                     for(k=0; k<repsize; k++) {
                         c = prep[k];
                         if (0x80 <= c) {
-                            raise_encode_exception(&exc, "utf-8", s, size,
+                            raise_encode_exception(&exc,
+                                                   java?"utf-8-java" : "utf-8",
+                                                   s, size,
                                                    i-1, i, "surrogates not allowed");
                             goto error;
                         }
@@ -3099,6 +3158,24 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s
 }
 
 PyObject *
+PyUnicode_EncodeUTF8(const Py_UNICODE *s,
+                     Py_ssize_t size,
+                     const char *errors)
+{
+    return encode_utf8(s, size, errors, 0);
+
+}
+
+PyObject *
+PyUnicode_EncodeUTF8Java(const Py_UNICODE *s,
+                         Py_ssize_t size,
+                         const char *errors)
+{
+    return encode_utf8(s, size, errors, 1);
+
+}
+
+PyObject *
 PyUnicode_AsUTF8String(PyObject *unicode)
 {
     PyObject *utf8;