On non-Windows platforms, escape all encodings of surrogate characters in command-line arguments using surrogateescape. Index: Modules/python.c =================================================================== --- Modules/python.c.orig +++ Modules/python.c @@ -38,8 +38,16 @@ if (!res) goto oom; count = mbstowcs(res, arg, argsize+1); - if (count != (size_t)-1) - return res; + if (count != (size_t)-1) { + wchar_t *tmp; + /* Only use the result if it contains no + surrogate characters. */ + for (tmp = res; *tmp != 0 && + (*tmp < 0xd800 || *tmp > 0xdfff); tmp++) + ; + if (*tmp == 0) + return res; + } PyMem_Free(res); } /* Conversion failed. Fall back to escaping with surrogateescape. */ @@ -75,6 +83,14 @@ memset(&mbs, 0, sizeof mbs); continue; } + if (*out >= 0xd800 && *out <= 0xdfff) { + /* Surrogate character. Escape the original + byte sequence with surrogateescape. */ + argsize -= converted; + while (converted--) + *out++ = 0xdc00 + *in++; + continue; + } /* successfully converted some bytes */ in += converted; argsize -= converted;