diff -r 2b7b203e3909 Doc/c-api/sys.rst
--- a/Doc/c-api/sys.rst	Wed Jan 11 20:18:03 2017 +0200
+++ b/Doc/c-api/sys.rst	Wed Jan 11 23:11:17 2017 +0100
@@ -87,6 +87,9 @@ Operating System Utilities
 
    .. versionadded:: 3.5
 
+   .. versionchanged:: 3.7
+      The function now supports the UTF-8 mode.
+
 
 .. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 
@@ -98,12 +101,15 @@ Operating System Utilities
    to free the memory. Return ``NULL`` on encoding error or memory allocation
    error
 
-   If error_pos is not ``NULL``, ``*error_pos`` is set to the index of the
-   invalid character on encoding error, or set to ``(size_t)-1`` otherwise.
+   If error_pos is not ``NULL``, ``*error_pos`` is set to ``(size_t)-1`` on
+   success,  or set to the index of the invalid character on encoding error.
 
    Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
    to a wide character string.
 
+   .. versionchanged:: 3.7
+      The function now supports the UTF-8 mode.
+
    .. seealso::
 
       The :c:func:`PyUnicode_EncodeFSDefault` and
@@ -111,6 +117,9 @@ Operating System Utilities
 
    .. versionadded:: 3.5
 
+   .. versionchanged:: 3.7
+      The function now supports the UTF-8 mode.
+
 
 .. _systemfunctions:
 
diff -r 2b7b203e3909 Doc/library/os.rst
--- a/Doc/library/os.rst	Wed Jan 11 20:18:03 2017 +0200
+++ b/Doc/library/os.rst	Wed Jan 11 23:11:17 2017 +0100
@@ -183,6 +183,9 @@ process and user.
       Support added to accept objects implementing the :class:`os.PathLike`
       interface.
 
+   .. versionchanged:: 3.7
+      The UTF-8 mode can now changes the encoding.
+
 
 .. function:: fsdecode(filename)
 
@@ -198,6 +201,9 @@ process and user.
       Support added to accept objects implementing the :class:`os.PathLike`
       interface.
 
+   .. versionchanged:: 3.7
+      The UTF-8 mode can now changes the encoding.
+
 
 .. function:: fspath(path)
 
diff -r 2b7b203e3909 Doc/library/sys.rst
--- a/Doc/library/sys.rst	Wed Jan 11 20:18:03 2017 +0200
+++ b/Doc/library/sys.rst	Wed Jan 11 23:11:17 2017 +0100
@@ -295,6 +295,7 @@ always available.
    :const:`bytes_warning`        :option:`-b`
    :const:`quiet`                :option:`-q`
    :const:`hash_randomization`   :option:`-R`
+   :const:`utf8mode`             :option:`-X utf8`
    ============================= =============================
 
    .. versionchanged:: 3.2
@@ -306,6 +307,9 @@ always available.
    .. versionchanged:: 3.3
       Removed obsolete ``division_warning`` attribute.
 
+   .. versionchanged:: 3.7
+      Added ``utf8mode`` attribute for the new :option:`-X utf8` flag.
+
 
 .. data:: float_info
 
@@ -451,7 +455,8 @@ always available.
 
    * On Mac OS X, the encoding is ``'utf-8'``.
 
-   * On Unix, the encoding is the locale encoding.
+   * On Unix, the encoding is ``utf-8`` in the UTF-8 mode, or the locale
+     encoding.
 
    * On Windows, the encoding may be ``'utf-8'`` or ``'mbcs'``, depending
      on user configuration.
@@ -463,6 +468,10 @@ always available.
       Windows is no longer guaranteed to return ``'mbcs'``. See :pep:`529`
       and :func:`_enablelegacywindowsfsencoding` for more information.
 
+   .. versionchanged:: 3.7
+      The UTF-8 mode can now changes the encoding.
+
+
 .. function:: getfilesystemencodeerrors()
 
    Return the name of the error mode used to convert between Unicode filenames
diff -r 2b7b203e3909 Doc/using/cmdline.rst
--- a/Doc/using/cmdline.rst	Wed Jan 11 20:18:03 2017 +0200
+++ b/Doc/using/cmdline.rst	Wed Jan 11 23:11:17 2017 +0100
@@ -405,6 +405,7 @@ Miscellaneous options
      :func:`tracemalloc.start` for more information.
    * ``-X showalloccount`` to enable the output of the total count of allocated
      objects for each type (only works when built with ``COUNT_ALLOCS`` defined);
+   * ``-X utf8`` to enable the UTF-8 mode.
 
    It also allows passing arbitrary values and retrieving them through the
    :data:`sys._xoptions` dictionary.
@@ -421,6 +422,9 @@ Miscellaneous options
    .. versionadded:: 3.6
       The ``-X showalloccount`` option.
 
+   .. versionchanged:: 3.7
+      The ``-X utf8`` option.
+
 
 Options you shouldn't use
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -711,6 +715,17 @@ conflict.
 
    .. versionadded:: 3.6
 
+.. envvar:: PYTHONUTF8
+
+   If set to ``1``, enable the UTF-8 mode.
+
+   If set to ``strict``, enable the UTF-8 mode in strict mode.
+
+   Any other value cause an error.
+
+   .. versionadded:: 3.7
+
+
 Debug-mode variables
 ~~~~~~~~~~~~~~~~~~~~
 
diff -r 2b7b203e3909 Include/fileobject.h
--- a/Include/fileobject.h	Wed Jan 11 20:18:03 2017 +0200
+++ b/Include/fileobject.h	Wed Jan 11 23:11:17 2017 +0100
@@ -28,6 +28,10 @@ PyAPI_DATA(const char *) Py_FileSystemDe
 #endif
 PyAPI_DATA(int) Py_HasFileSystemDefaultEncoding;
 
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000
+PyAPI_DATA(int) Py_UTF8Mode;
+#endif
+
 /* Internal API
 
    The std printer acts as a preliminary sys.stderr until the new io
diff -r 2b7b203e3909 Lib/_pyio.py
--- a/Lib/_pyio.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/_pyio.py	Wed Jan 11 23:11:17 2017 +0100
@@ -1914,19 +1914,34 @@ class TextIOWrapper(TextIOBase):
             raise TypeError("illegal newline type: %r" % (type(newline),))
         if newline not in (None, "", "\n", "\r", "\r\n"):
             raise ValueError("illegal newline value: %r" % (newline,))
+
+        if encoding and not errors:
+            errors = "strict"
+
         if encoding is None:
             try:
-                encoding = os.device_encoding(buffer.fileno())
-            except (AttributeError, UnsupportedOperation):
-                pass
-            if encoding is None:
+                utf8mode = sys.flags.utf8mode
+            except AttributeError:
+                # TextIOWrapper created during Python shutdown, sys.flags
+                # was already set to None. Consider that the UTF-8 mode is
+                # disabled.
+                utf8mode = False
+
+            if utf8mode:
+                encoding = "utf-8"
+            else:
                 try:
-                    import locale
-                except ImportError:
-                    # Importing locale may fail if Python is being built
-                    encoding = "ascii"
-                else:
-                    encoding = locale.getpreferredencoding(False)
+                    encoding = os.device_encoding(buffer.fileno())
+                except (AttributeError, UnsupportedOperation):
+                    pass
+                if encoding is None:
+                    try:
+                        import locale
+                    except ImportError:
+                        # Importing locale may fail if Python is being built
+                        encoding = "ascii"
+                    else:
+                        encoding = locale.getpreferredencoding(False)
 
         if not isinstance(encoding, str):
             raise ValueError("invalid encoding: %r" % encoding)
@@ -1937,7 +1952,12 @@ class TextIOWrapper(TextIOBase):
             raise LookupError(msg % encoding)
 
         if errors is None:
-            errors = "strict"
+            if sys.flags.utf8mode == 2:
+                errors = "strict"
+            elif sys.flags.utf8mode:
+                errors = "surrogateescape"
+            else:
+                errors = "strict"
         else:
             if not isinstance(errors, str):
                 raise ValueError("invalid errors: %r" % errors)
diff -r 2b7b203e3909 Lib/subprocess.py
--- a/Lib/subprocess.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/subprocess.py	Wed Jan 11 23:11:17 2017 +0100
@@ -251,6 +251,10 @@ def _args_from_interpreter_flags():
         v = getattr(sys.flags, flag)
         if v > 0:
             args.append('-' + opt * v)
+    if sys.flags.utf8mode == 2:
+        args.extend(('-X', 'utf8=strict'))
+    elif sys.flags.utf8mode:
+        args.extend(('-X', 'utf8'))
     for opt in sys.warnoptions:
         args.append('-W' + opt)
     return args
diff -r 2b7b203e3909 Lib/test/support/__init__.py
--- a/Lib/test/support/__init__.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/test/support/__init__.py	Wed Jan 11 23:11:17 2017 +0100
@@ -919,6 +919,7 @@ for name in (
         TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name
         break
 
+
 if FS_NONASCII:
     TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII
 else:
diff -r 2b7b203e3909 Lib/test/test_builtin.py
--- a/Lib/test/test_builtin.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/test/test_builtin.py	Wed Jan 11 23:11:17 2017 +0100
@@ -1002,6 +1002,7 @@ class BuiltinTest(unittest.TestCase):
             self.assertEqual(fp.read(300), 'XXX'*100)
             self.assertEqual(fp.read(1000), 'YYY'*100)
 
+    @unittest.skipIf(sys.flags.utf8mode, "utf-8 mode is enabled")
     def test_open_default_encoding(self):
         old_environ = dict(os.environ)
         try:
diff -r 2b7b203e3909 Lib/test/test_capi.py
--- a/Lib/test/test_capi.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/test/test_capi.py	Wed Jan 11 23:11:17 2017 +0100
@@ -401,6 +401,7 @@ class EmbeddingTests(unittest.TestCase):
             os.close(rp)
         return default_pipe_encoding
 
+    @unittest.skipIf(sys.flags.utf8mode, "utf-8 mode is enabled")
     def test_forced_io_encoding(self):
         # Checks forced configuration of embedded interpreter IO streams
         out, err = self.run_embedded_interpreter("forced_io_encoding")
diff -r 2b7b203e3909 Lib/test/test_io.py
--- a/Lib/test/test_io.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/test/test_io.py	Wed Jan 11 23:11:17 2017 +0100
@@ -2446,6 +2446,7 @@ class TextIOWrapperTest(unittest.TestCas
         t.write("A\rB")
         self.assertEqual(r.getvalue(), b"XY\nZA\rB")
 
+    @unittest.skipIf(sys.flags.utf8mode, "utf-8 mode is enabled")
     def test_default_encoding(self):
         old_environ = dict(os.environ)
         try:
@@ -2465,6 +2466,7 @@ class TextIOWrapperTest(unittest.TestCas
             os.environ.update(old_environ)
 
     @support.cpython_only
+    @unittest.skipIf(sys.flags.utf8mode, "utf-8 mode is enabled")
     def test_device_encoding(self):
         # Issue 15989
         import _testcapi
@@ -2982,6 +2984,7 @@ class TextIOWrapperTest(unittest.TestCas
             with self.open(filename, 'rb') as f:
                 self.assertEqual(f.read(), 'aaaxxx'.encode(charset))
 
+    @unittest.skipIf(sys.flags.utf8mode, "utf-8 mode is enabled")
     def test_errors_property(self):
         with self.open(support.TESTFN, "w") as f:
             self.assertEqual(f.errors, "strict")
diff -r 2b7b203e3909 Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Wed Jan 11 20:18:03 2017 +0200
+++ b/Lib/test/test_sys.py	Wed Jan 11 23:11:17 2017 +0100
@@ -559,6 +559,9 @@ class SysModuleTest(unittest.TestCase):
         self.assertTrue(repr(sys.flags))
         self.assertEqual(len(sys.flags), len(attrs))
 
+    def test_sys_flags_utf8mode(self):
+        self.assertIn(sys.flags.utf8mode, {0, 1, 2})
+
     def assert_raise_on_new_sys_type(self, sys_attr):
         # Users are intentionally prevented from creating new instances of
         # sys.flags, sys.version_info, and sys.getwindowsversion.
diff -r 2b7b203e3909 Lib/test/test_utf8mode.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Lib/test/test_utf8mode.py	Wed Jan 11 23:11:17 2017 +0100
@@ -0,0 +1,147 @@
+import os
+import textwrap
+import unittest
+from test.support.script_helper import assert_python_ok, assert_python_failure
+
+
+class UTF8ModeTests(unittest.TestCase):
+    def test_xoption(self):
+        code = 'import sys; print(sys.flags.utf8mode)'
+
+        # Use __cleanenv to ignore PYTHONUTF8
+        out = assert_python_ok('-X', 'utf8', '-c', code, __cleanenv=True)
+        self.assertEqual(out[1].rstrip(), b'1')
+
+        out = assert_python_ok('-X', 'utf8=strict', '-c', code, __cleanenv=True)
+        self.assertEqual(out[1].rstrip(), b'2')
+
+        out = assert_python_ok('-X', 'utf8=0', '-c', code, __cleanenv=True)
+        self.assertEqual(out[1].rstrip(), b'0')
+
+    def test_env_var(self):
+        code = 'import sys; print(sys.flags.utf8mode)'
+
+        out = assert_python_ok('-c', code, PYTHONUTF8='1')
+        self.assertEqual(out[1].rstrip(), b'1')
+
+        out = assert_python_ok('-c', code, PYTHONUTF8='strict')
+        self.assertEqual(out[1].rstrip(), b'2')
+
+        out = assert_python_ok('-c', code, PYTHONUTF8='0')
+        self.assertEqual(out[1].rstrip(), b'0')
+
+        # -X utf8 overrides env var
+        out = assert_python_ok('-X', 'utf8=strict', '-c', code, PYTHONUTF8='1')
+        self.assertEqual(out[1].rstrip(), b'2')
+
+        # invalid mode
+        out = assert_python_failure('-c', code, PYTHONUTF8='xxx')
+        self.assertIn(b'Error in PYTHONUTF8: invalid UTF-8 mode "xxx"!',
+                      out[2].rstrip())
+
+    def test_filesystemencoding(self):
+        code = 'import sys; print(sys.getfilesystemencoding(), sys.getfilesystemencodeerrors())'
+
+        out = assert_python_ok('-X', 'utf8', '-c', code)
+        self.assertEqual(out[1].rstrip(), b'utf-8 surrogateescape')
+
+        out = assert_python_ok('-X', 'utf8=strict', '-c', code)
+        self.assertEqual(out[1].rstrip(), b'utf-8 surrogateescape')
+
+    def test_stdio(self):
+        code = textwrap.dedent('''
+            import sys
+            print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
+            print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
+            print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
+        ''')
+
+        # Use __cleanenv to ignore PYTHONIOENCODING
+        out = assert_python_ok('-c', code,
+                               PYTHONUTF8='1', __cleanenv=True)
+        self.assertEqual(out[1].splitlines(),
+                         [b'stdin: utf-8/surrogateescape',
+                          b'stdout: utf-8/surrogateescape',
+                          b'stderr: utf-8/backslashreplace'])
+
+        out = assert_python_ok('-c', code,
+                               PYTHONUTF8='strict', __cleanenv=True)
+        self.assertEqual(out[1].splitlines(),
+                         [b'stdin: utf-8/strict',
+                          b'stdout: utf-8/strict',
+                          b'stderr: utf-8/backslashreplace'])
+
+        # PYTHONIOENCODING has the priority over PYTHONUTF8
+        out = assert_python_ok('-c', code,
+                               PYTHONUTF8='1', PYTHONIOENCODING="latin1")
+        self.assertEqual(out[1].splitlines(),
+                         [b'stdin: latin1/strict',
+                          b'stdout: latin1/strict',
+                          b'stderr: latin1/backslashreplace'])
+
+        out = assert_python_ok('-c', code,
+                               PYTHONUTF8='1', PYTHONIOENCODING=":namereplace")
+        self.assertEqual(out[1].splitlines(),
+                         [b'stdin: utf-8/namereplace',
+                          b'stdout: utf-8/namereplace',
+                          b'stderr: utf-8/backslashreplace'])
+
+    def test_io(self):
+        code = textwrap.dedent('''
+            import sys
+            filename = sys.argv[1]
+            with open(filename) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''')
+        filename = __file__
+
+        out = assert_python_ok('-c', code, filename, PYTHONUTF8='1')
+        self.assertEqual(out[1].rstrip(), b'utf-8/surrogateescape')
+
+        out = assert_python_ok('-c', code, filename, PYTHONUTF8='strict')
+        self.assertEqual(out[1].rstrip(), b'utf-8/strict')
+
+    def _check_io_encoding(self, module, encoding=None, errors=None):
+        filename = __file__
+
+        # Encoding explicitly set
+        args = []
+        if encoding:
+            args.append(f'encoding={encoding!r}')
+        if errors:
+            args.append(f'errors={errors!r}')
+        code = textwrap.dedent('''
+            import sys
+            from %s import open
+            filename = sys.argv[1]
+            with open(filename, %s) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''') % (module, ', '.join(args))
+        out = assert_python_ok('-c', code, filename,
+                               PYTHONUTF8='1')
+
+        if not encoding:
+            encoding = 'utf-8'
+        if not errors:
+            if encoding:
+                errors = 'strict'
+            else:
+                errors = 'surrogateescape'
+        self.assertEqual(out[1].rstrip().decode(),
+                         f'{encoding}/{errors}')
+
+    def check_io_encoding(self, module):
+        self._check_io_encoding(module, encoding="latin1")
+        self._check_io_encoding(module, errors="namereplace")
+        self._check_io_encoding(module,
+                                encoding="latin1", errors="namereplace")
+
+    def test_io_encoding(self):
+        self.check_io_encoding('io')
+
+    def test_io_encoding(self):
+        self.check_io_encoding('_pyio')
+
+
+if __name__ == "__main__":
+    unittest.main()
diff -r 2b7b203e3909 Modules/_io/textio.c
--- a/Modules/_io/textio.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Modules/_io/textio.c	Wed Jan 11 23:11:17 2017 +0100
@@ -864,7 +864,18 @@ static int
     self->encodefunc = NULL;
     self->b2cratio = 0.0;
 
-    if (encoding == NULL) {
+    if (encoding && !errors) {
+        errors = "strict";
+    }
+
+    if (encoding == NULL && Py_UTF8Mode) {
+        self->encoding = PyUnicode_FromString("utf-8");
+        if (self->encoding == NULL) {
+            goto error;
+        }
+    }
+
+    if (encoding == NULL && self->encoding == NULL) {
         /* Try os.device_encoding(fileno) */
         PyObject *fileno;
         state = IO_STATE();
@@ -895,6 +906,7 @@ static int
                 Py_CLEAR(self->encoding);
         }
     }
+
     if (encoding == NULL && self->encoding == NULL) {
         PyObject *locale_module = _PyIO_get_locale_module(state);
         if (locale_module == NULL)
@@ -946,8 +958,14 @@ static int
      * of the partially constructed object (like self->encoding)
      */
 
-    if (errors == NULL)
-        errors = "strict";
+    if (errors == NULL) {
+        if (Py_UTF8Mode == 2)
+            errors = "strict";
+        else if (Py_UTF8Mode)
+            errors = "surrogateescape";
+        else
+            errors = "strict";
+    }
     self->errors = PyBytes_FromString(errors);
     if (self->errors == NULL)
         goto error;
diff -r 2b7b203e3909 Modules/main.c
--- a/Modules/main.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Modules/main.c	Wed Jan 11 23:11:17 2017 +0100
@@ -350,14 +350,14 @@ Py_Main(int argc, wchar_t **argv)
     PyCompilerFlags cf;
     PyObject *warning_option = NULL;
     PyObject *warning_options = NULL;
+    int utf8mode = -1;
 
     cf.cf_flags = 0;
 
     orig_argc = argc;           /* For Py_GetArgcArgv() */
     orig_argv = argv;
 
-    /* Hash randomization needed early for all string operations
-       (including -W and -X options). */
+    /* Hash randomization and -X utf8 needed early */
     _PyOS_opterr = 0;  /* prevent printing the error in 1st pass */
     while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) {
         if (c == 'm' || c == 'c') {
@@ -367,7 +367,19 @@ Py_Main(int argc, wchar_t **argv)
         }
         if (c == 'E') {
             Py_IgnoreEnvironmentFlag++;
-            break;
+        }
+        else if (c == 'X') {
+            if (wcscmp(_PyOS_optarg, L"utf8") == 0) {
+                utf8mode = 1;
+            }
+            else if (wcscmp(_PyOS_optarg, L"utf8=strict") == 0) {
+                utf8mode = 2;
+            }
+            else if (wcscmp(_PyOS_optarg, L"utf8=0") == 0) {
+                utf8mode = 0;
+            }
+            /* other invalid values of "-X utf8" are rejected
+               in _PyUTF8Mode_Init() */
         }
     }
 
@@ -378,6 +390,35 @@ Py_Main(int argc, wchar_t **argv)
         exit(1);
     }
 
+    opt = Py_GETENV("PYTHONUTF8");
+    if (opt) {
+        int env_utf8mode = -1;
+
+        if (strcmp(opt, "1") == 0) {
+            env_utf8mode = 1;
+        }
+        else if (strcmp(opt, "strict") == 0) {
+            env_utf8mode = 2;
+        }
+        else if (strcmp(opt, "0") == 0) {
+            env_utf8mode = 0;
+        }
+        else {
+            fprintf(stderr,
+                    "Error in PYTHONUTF8: invalid UTF-8 mode \"%s\"!\n", opt);
+            exit(1);
+        }
+
+        /* -X utf8 has the priority over the PYTHONUTF8 environment variable */
+        if (utf8mode == -1) {
+            utf8mode = env_utf8mode;
+        }
+    }
+
+    if (utf8mode != -1) {
+        Py_UTF8Mode = utf8mode;
+    }
+
     Py_HashRandomizationFlag = 1;
     _PyRandom_Init();
 
diff -r 2b7b203e3909 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Objects/unicodeobject.c	Wed Jan 11 23:11:17 2017 +0100
@@ -5067,16 +5067,17 @@ onError:
     return NULL;
 }
 
-#if defined(__APPLE__) || defined(__ANDROID__)
-
-/* Simplified UTF-8 decoder using surrogateescape error handler,
-   used to decode the command line arguments on Mac OS X and Android.
-
-   Return a pointer to a newly allocated wide character string (use
-   PyMem_RawFree() to free the memory), or NULL on memory allocation error. */
-
+
+/* UTF-8 decoder using the surrogateescape error handler .
+
+   On success, return a pointer to a newly allocated wide character string (use
+   PyMem_RawFree() to free the memory) and write the output length (in number
+   of wchar_t units) into *p_wlen (if p_wlen is set).
+
+   On memory allocation failure, return -1 and write (size_t)-1 into *p_wlen
+   (if p_wlen is set). */
 wchar_t*
-_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
+_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
 {
     const char *e;
     wchar_t *unicode;
@@ -5084,11 +5085,20 @@ wchar_t*
 
     /* Note: size will always be longer than the resulting Unicode
        character count */
-    if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1))
-        return NULL;
+    if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
+        if (p_wlen) {
+            *p_wlen = (size_t)-1;
+        }
+        return NULL;
+    }
+
     unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
-    if (!unicode)
-        return NULL;
+    if (!unicode) {
+        if (p_wlen) {
+            *p_wlen = (size_t)-1;
+        }
+        return NULL;
+    }
 
     /* Unpack UTF-8 encoded data */
     e = s + size;
@@ -5118,10 +5128,12 @@ wchar_t*
         }
     }
     unicode[outpos] = L'\0';
+    if (p_wlen) {
+        *p_wlen = outpos;
+    }
     return unicode;
 }
 
-#endif /* __APPLE__ or __ANDROID__ */
 
 /* Primary internal function which creates utf8 encoded bytes objects.
 
diff -r 2b7b203e3909 Programs/python.c
--- a/Programs/python.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Programs/python.c	Wed Jan 11 23:11:17 2017 +0100
@@ -15,14 +15,22 @@ wmain(int argc, wchar_t **argv)
 }
 #else
 
+static void _Py_NO_RETURN
+fatal_error(const char *msg)
+{
+    fprintf(stderr, "Fatal Python error: %s\n", msg);
+    exit(1);
+}
+
 int
 main(int argc, char **argv)
 {
     wchar_t **argv_copy;
-    /* We need a second copy, as Python might modify the first one. */
+    /* We need a second copy to release the memory:
+       Py_Main() modifies argv_copy */
     wchar_t **argv_copy2;
     int i, res;
-    char *oldloc;
+    char *loc, *oldloc;
 
     /* Force malloc() allocator to bootstrap Python */
     (void)_PyMem_SetupAllocators("malloc");
@@ -30,7 +38,7 @@ main(int argc, char **argv)
     argv_copy = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
     argv_copy2 = (wchar_t **)PyMem_RawMalloc(sizeof(wchar_t*) * (argc+1));
     if (!argv_copy || !argv_copy2) {
-        fprintf(stderr, "out of memory\n");
+        fatal_error("out of memory");
         return 1;
     }
 
@@ -45,19 +53,25 @@ main(int argc, char **argv)
 
     oldloc = _PyMem_RawStrdup(setlocale(LC_ALL, NULL));
     if (!oldloc) {
-        fprintf(stderr, "out of memory\n");
-        return 1;
+        fatal_error("out of memory (failed to copy the LC_ALL locale)");
     }
 
     setlocale(LC_ALL, "");
+
+    loc = setlocale(LC_CTYPE, NULL);
+    if (!oldloc) {
+        fatal_error("failed to get the LC_CTYPE locale");
+    }
+    if (strcmp(loc, "C") == 0) {
+        /* The POSIX locale enables the UTF-8 mode */
+        Py_UTF8Mode = 1;
+    }
+
     for (i = 0; i < argc; i++) {
         argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
         if (!argv_copy[i]) {
             PyMem_RawFree(oldloc);
-            fprintf(stderr, "Fatal Python error: "
-                            "unable to decode the command line argument #%i\n",
-                            i + 1);
-            return 1;
+            fatal_error("unable to decode the command line arguments");
         }
         argv_copy2[i] = argv_copy[i];
     }
diff -r 2b7b203e3909 Python/bltinmodule.c
--- a/Python/bltinmodule.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Python/bltinmodule.c	Wed Jan 11 23:11:17 2017 +0100
@@ -33,6 +33,14 @@ const char *Py_FileSystemDefaultEncoding
 int Py_HasFileSystemDefaultEncoding = 0;
 #endif
 const char *Py_FileSystemDefaultEncodeErrors = "surrogateescape";
+/* UTF-8 mode (PEP 540):
+   0: UTF-8 mode disabled, use the locale encoding with strict or
+      surrogateescape error handler depending on the case
+   1: UTF-8 mode enabled, use UTF-8 with surrogateescape error handler by
+      default and ignore the locale
+   2: UTF-8 mode in strict mode, use UTF-8 with strict error handler by default
+      and ignore the locale */
+int Py_UTF8Mode = 0;
 
 _Py_IDENTIFIER(__builtins__);
 _Py_IDENTIFIER(__dict__);
diff -r 2b7b203e3909 Python/fileutils.c
--- a/Python/fileutils.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Python/fileutils.c	Wed Jan 11 23:11:17 2017 +0100
@@ -20,9 +20,8 @@ extern int winerror_to_errno(int);
 #include <fcntl.h>
 #endif /* HAVE_FCNTL_H */
 
-#if defined(__APPLE__) || defined(__ANDROID__)
-extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
-#endif
+extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
+                                               size_t *p_wlen);
 
 #ifdef O_CLOEXEC
 /* Does open() support the O_CLOEXEC flag? Possible values:
@@ -250,40 +249,9 @@ decode_ascii_surrogateescape(const char 
 }
 #endif
 
-
-/* Decode a byte string from the locale encoding with the
-   surrogateescape error handler: undecodable bytes are decoded as characters
-   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
-   character, escape the bytes using the surrogateescape error handler instead
-   of decoding them.
-
-   Return a pointer to a newly allocated wide character string, use
-   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
-   wide characters excluding the null character into *size
-
-   Return NULL on decoding error or memory allocation error. If *size* is not
-   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
-   decoding error.
-
-   Decoding errors should never happen, unless there is a bug in the C
-   library.
-
-   Use the Py_EncodeLocale() function to encode the character string back to a
-   byte string. */
-wchar_t*
-Py_DecodeLocale(const char* arg, size_t *size)
+static wchar_t*
+decode_locale(const char* arg, size_t *size)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
-    wchar_t *wstr;
-    wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
-    if (size != NULL) {
-        if (wstr != NULL)
-            *size = wcslen(wstr);
-        else
-            *size = (size_t)-1;
-    }
-    return wstr;
-#else
     wchar_t *res;
     size_t argsize;
     size_t count;
@@ -293,19 +261,6 @@ Py_DecodeLocale(const char* arg, size_t 
     mbstate_t mbs;
 #endif
 
-#ifndef MS_WINDOWS
-    if (force_ascii == -1)
-        force_ascii = check_force_ascii();
-
-    if (force_ascii) {
-        /* force ASCII encoding to workaround mbstowcs() issue */
-        res = decode_ascii_surrogateescape(arg, size);
-        if (res == NULL)
-            goto oom;
-        return res;
-    }
-#endif
-
 #ifdef HAVE_BROKEN_MBSTOWCS
     /* Some platforms have a broken implementation of
      * mbstowcs which does not count the characters that
@@ -402,43 +357,84 @@ Py_DecodeLocale(const char* arg, size_t 
         goto oom;
 #endif   /* HAVE_MBRTOWC */
     return res;
+
 oom:
-    if (size != NULL)
+    if (size != NULL) {
         *size = (size_t)-1;
+    }
     return NULL;
+}
+
+
+/* Decode a byte string from the locale encoding with the
+   surrogateescape error handler: undecodable bytes are decoded as characters
+   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
+   character, escape the bytes using the surrogateescape error handler instead
+   of decoding them.
+
+   Return a pointer to a newly allocated wide character string, use
+   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
+   wide characters excluding the null character into *size
+
+   Return NULL on decoding error or memory allocation error. If *size* is not
+   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
+   decoding error.
+
+   Decoding errors should never happen, unless there is a bug in the C
+   library.
+
+   Use the Py_EncodeLocale() function to encode the character string back to a
+   byte string. */
+wchar_t*
+Py_DecodeLocale(const char* arg, size_t *size)
+{
+#if defined(__APPLE__) || defined(__ANDROID__)
+    return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+#else
+    if (Py_UTF8Mode) {
+        return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+    }
+
+#ifndef MS_WINDOWS
+    if (force_ascii == -1)
+        force_ascii = check_force_ascii();
+
+    if (force_ascii) {
+        /* force ASCII encoding to workaround mbstowcs() issue */
+        wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
+        if (wstr == NULL) {
+            if (size != NULL) {
+                *size = (size_t)-1;
+            }
+            return NULL;
+        }
+        return wstr;
+    }
+#endif
+
+    return decode_locale(arg, size);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
 
-/* Encode a wide character string to the locale encoding with the
-   surrogateescape error handler: surrogate characters in the range
-   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
-
-   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
-   the memory. Return NULL on encoding or memory allocation error.
-
-   If error_pos is not NULL, *error_pos is set to the index of the invalid
-   character on encoding error, or set to (size_t)-1 otherwise.
-
-   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
-   character string. */
-char*
-Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
+static char*
+_Py_EncodeLocaleUTF8(const wchar_t *text, size_t *error_pos)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
     Py_ssize_t len;
     PyObject *unicode, *bytes = NULL;
     char *cpath;
 
     unicode = PyUnicode_FromWideChar(text, wcslen(text));
-    if (unicode == NULL)
+    if (unicode == NULL) {
         return NULL;
+    }
 
     bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
     Py_DECREF(unicode);
     if (bytes == NULL) {
         PyErr_Clear();
-        if (error_pos != NULL)
+        if (error_pos != NULL) {
             *error_pos = (size_t)-1;
+        }
         return NULL;
     }
 
@@ -447,27 +443,24 @@ Py_EncodeLocale(const wchar_t *text, siz
     if (cpath == NULL) {
         PyErr_Clear();
         Py_DECREF(bytes);
-        if (error_pos != NULL)
+        if (error_pos != NULL) {
             *error_pos = (size_t)-1;
+        }
         return NULL;
     }
     memcpy(cpath, PyBytes_AsString(bytes), len + 1);
     Py_DECREF(bytes);
     return cpath;
-#else   /* __APPLE__ */
+}
+
+static char*
+encode_locale(const wchar_t *text, size_t *error_pos)
+{
     const size_t len = wcslen(text);
     char *result = NULL, *bytes = NULL;
     size_t i, size, converted;
     wchar_t c, buf[2];
 
-#ifndef MS_WINDOWS
-    if (force_ascii == -1)
-        force_ascii = check_force_ascii();
-
-    if (force_ascii)
-        return encode_ascii_surrogateescape(text, error_pos);
-#endif
-
     /* The function works in two steps:
        1. compute the length of the output buffer in bytes (size)
        2. outputs the bytes */
@@ -522,6 +515,39 @@ Py_EncodeLocale(const wchar_t *text, siz
         bytes = result;
     }
     return result;
+}
+
+/* Encode a wide character string to the locale encoding with the
+   surrogateescape error handler: surrogate characters in the range
+   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
+
+   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
+   the memory. Return NULL on encoding or memory allocation error.
+
+   If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
+   to the index of the invalid character on encoding error.
+
+   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
+   character string. */
+char*
+Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
+{
+#if defined(__APPLE__) || defined(__ANDROID__)
+    return _Py_EncodeLocaleUTF8(text, error_pos);
+#else   /* __APPLE__ */
+    if (Py_UTF8Mode) {
+        return _Py_EncodeLocaleUTF8(text, error_pos);
+    }
+
+#ifndef MS_WINDOWS
+    if (force_ascii == -1)
+        force_ascii = check_force_ascii();
+
+    if (force_ascii)
+        return encode_ascii_surrogateescape(text, error_pos);
+#endif
+
+    return encode_locale(text, error_pos);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
 
diff -r 2b7b203e3909 Python/pylifecycle.c
--- a/Python/pylifecycle.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Python/pylifecycle.c	Wed Jan 11 23:11:17 2017 +0100
@@ -302,6 +302,49 @@ import_init(PyInterpreterState *interp, 
 }
 
 
+static int
+_PyUTF8Mode_Init(void)
+{
+    PyObject *xoptions, *key, *value;
+    int mode;
+
+    xoptions = PySys_GetXOptions();
+    if (xoptions == NULL)
+        return -1;
+
+    key = PyUnicode_FromString("utf8");
+    if (key == NULL)
+        return -1;
+
+    value = PyDict_GetItemWithError(xoptions, key);
+    Py_DECREF(key);
+    if (value == NULL && PyErr_Occurred()) {
+        return -1;
+    }
+    if (value == NULL) {
+        return 0;
+    }
+
+    if (value == Py_True) {
+        mode = 1;
+    }
+    else if (PyUnicode_CompareWithASCIIString(value, "strict") == 0) {
+        mode = 2;
+    }
+    else if (PyUnicode_CompareWithASCIIString(value, "0") == 0) {
+        mode = 0;
+    }
+    else {
+        fprintf(stderr, "Invalid UTF-8 mode (-X option)!\n");
+        return -1;
+    }
+
+    /* Py_Main() handles -X utf8 early: just make sure that it's consistent */
+    assert(Py_UTF8Mode == mode);
+    return 0;
+}
+
+
 void
 _Py_InitializeEx_Private(int install_sigs, int install_importlib)
 {
@@ -344,6 +387,10 @@ void
 
     _PyRandom_Init();
 
+    if (_PyUTF8Mode_Init() < 0) {
+        Py_FatalError("Py_Initialize: UTF-8 mode initialization failed");
+    }
+
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_Initialize: can't make first interpreter");
@@ -997,15 +1044,19 @@ initfsencoding(PyInterpreterState *inter
         Py_FileSystemDefaultEncodeErrors = "surrogatepass";
     }
 #else
-    if (Py_FileSystemDefaultEncoding == NULL)
-    {
-        Py_FileSystemDefaultEncoding = get_locale_encoding();
-        if (Py_FileSystemDefaultEncoding == NULL)
-            Py_FatalError("Py_Initialize: Unable to get the locale encoding");
+    if (Py_FileSystemDefaultEncoding == NULL) {
+        if (!Py_UTF8Mode) {
+            Py_FileSystemDefaultEncoding = get_locale_encoding();
+            if (Py_FileSystemDefaultEncoding == NULL)
+                Py_FatalError("Py_Initialize: Unable to get the locale encoding");
 
-        Py_HasFileSystemDefaultEncoding = 0;
-        interp->fscodec_initialized = 1;
-        return 0;
+            Py_HasFileSystemDefaultEncoding = 0;
+            interp->fscodec_initialized = 1;
+            return 0;
+        }
+
+        Py_FileSystemDefaultEncoding = "utf-8";
+        Py_HasFileSystemDefaultEncoding = 1;
     }
 #endif
 
@@ -1242,6 +1293,11 @@ initstdio(void)
                 encoding = pythonioencoding;
             }
         }
+        else if (Py_UTF8Mode) {
+            encoding = "utf-8";
+            errors = (Py_UTF8Mode == 2 ) ? "strict" : "surrogateescape";
+        }
+
         if (!errors && !(pythonioencoding && *pythonioencoding)) {
             /* When the LC_CTYPE locale is the POSIX locale ("C locale"),
                stdin and stdout use the surrogateescape error handler by
diff -r 2b7b203e3909 Python/sysmodule.c
--- a/Python/sysmodule.c	Wed Jan 11 20:18:03 2017 +0200
+++ b/Python/sysmodule.c	Wed Jan 11 23:11:17 2017 +0100
@@ -1713,6 +1713,7 @@ static PyStructSequence_Field flags_fiel
     {"quiet",                   "-q"},
     {"hash_randomization",      "-R"},
     {"isolated",                "-I"},
+    {"utf8mode",                "-X utf8"},
     {0}
 };
 
@@ -1751,6 +1752,7 @@ make_flags(void)
     SetFlag(Py_QuietFlag);
     SetFlag(Py_HashRandomizationFlag);
     SetFlag(Py_IsolatedFlag);
+    SetFlag(Py_UTF8Mode);
 #undef SetFlag
 
     if (PyErr_Occurred()) {