# Test essentials of command line Unicode handling # Data sources: # -c content # TODO: stdin # Command line argument # Environment variable # Directory listing # Data sinks: # stdout # stderr # Stream configurations # Subprocess default # TODO: Copied from current process (via PYTHONIOENCODING) # FAILS: Misconfigured via LANG=C import unittest import sys import os.path from test.support import temp_dir, verbose from test.support.script_helper import assert_python_ok # Test pattern from Ned Batchelder's Pragmatic Unicode TEST_TEXT = "ℙƴ☂ℌøἤ" class CmdLineUnicodeTest(unittest.TestCase): def adjust_env(self, env_vars): pass def get_expected_stdout(self): return sys.stdout.encoding, TEST_TEXT def get_expected_stderr(self): return sys.stderr.encoding, TEST_TEXT def _check_output(self, *run_args, **env_vars): self.adjust_env(env_vars) rc, out, err = assert_python_ok("-c", *run_args, **env_vars, __isolated=False) if verbose > 1: print("Output:", repr(out)) print("Error output:", repr(err)) stdout_encoding, expected_stdout = self.get_expected_stdout() stderr_encoding, expected_stderr = self.get_expected_stderr() self.assertEqual(out.decode(stdout_encoding).strip(), expected_stdout) self.assertEqual(err.decode(stderr_encoding).strip(), expected_stderr) def test_dash_c_unicode(self): cmd = (f"import sys;" f"print('{TEST_TEXT}');" f"print('{TEST_TEXT}', file=sys.stderr)") self._check_output(cmd) def test_cmdline_argument_unicode(self): cmd = ("import sys;" "print(sys.argv[1]);" "print(sys.argv[1], file=sys.stderr)") self._check_output(cmd, TEST_TEXT) def test_envvar_unicode(self): cmd = ("import sys, os;" "print(os.environ['PYTHON_TEST_TEXT']);" "print(os.environ['PYTHON_TEST_TEXT'], file=sys.stderr)") self._check_output(cmd, PYTHON_TEST_TEXT=TEST_TEXT) def test_direntry_unicode(self): cmd = ("import sys, os;" "listing = os.listdir(sys.argv[1]);" "print(listing[0]);" "print(listing[0], file=sys.stderr)") with temp_dir() as dirname: with open(os.path.join(dirname, TEST_TEXT), "wb"): pass self._check_output(cmd, dirname) class CmdLineMisconfiguredUnicodeTest(CmdLineUnicodeTest): def adjust_env(self, env_vars): env_vars["LANG"] = "C" def get_expected_stderr(self): encoded_in = os.fsencode(TEST_TEXT) decoded_in = encoded_in.decode("ascii", errors="surrogateescape") encoded_out = decoded_in.encode("ascii", errors="backslashreplace") return "ascii", encoded_out.decode("ascii")