Index: Include/Python.h =================================================================== --- Include/Python.h (révision 83981) +++ Include/Python.h (copie de travail) @@ -126,11 +126,14 @@ #ifdef __cplusplus extern "C" { #endif + /* _Py_Mangle is defined in compile.c */ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); -/* _Py_char2wchar lives in main.c */ +/* _Py_char2wchar and _Py_wchar2char live in main.c */ PyAPI_FUNC(wchar_t *) _Py_char2wchar(char *); +PyAPI_FUNC(char*) _Py_wchar2char(const wchar_t *text); + #ifdef __cplusplus } #endif Index: Modules/getpath.c =================================================================== --- Modules/getpath.c (révision 83981) +++ Modules/getpath.c (copie de travail) @@ -139,13 +139,16 @@ static int _wstat(const wchar_t* path, struct stat *buf) { - char fname[PATH_MAX]; - size_t res = wcstombs(fname, path, sizeof(fname)); - if (res == (size_t)-1) { + int err; + char *fname; + fname = _Py_wchar2char(path); + if (fname == NULL) { errno = EINVAL; return -1; } - return stat(fname, buf); + err = stat(fname, buf); + PyMem_Free(fname); + return err; } #endif Index: Modules/main.c =================================================================== --- Modules/main.c (révision 83981) +++ Modules/main.c (copie de travail) @@ -105,20 +105,21 @@ static FILE* _wfopen(const wchar_t *path, const wchar_t *mode) { - char cpath[PATH_MAX]; + FILE *f; + char *cpath; char cmode[10]; size_t r; - r = wcstombs(cpath, path, PATH_MAX); - if (r == (size_t)-1 || r >= PATH_MAX) { - errno = EINVAL; - return NULL; - } r = wcstombs(cmode, mode, 10); if (r == (size_t)-1 || r >= 10) { errno = EINVAL; return NULL; } - return fopen(cpath, cmode); + cpath = _Py_wchar2char(path); + if (cpath == NULL) + return NULL; + f = fopen(cpath, cmode); + PyMem_Free(cpath); + return f; } #endif @@ -731,7 +732,85 @@ *argv = orig_argv; } +/* Encode a (wide) character string to the locale encoding with the + surrogateescape error handler (characters in range U+DC80..U+DCFF are + converted to bytes 0x80..0xFF). + This function is the reverse operation of _Py_char2wchar(). + + Return a pointer to a newly allocated byte string (use PyMem_Free() to free + the memory), or NULL on error (conversion error or memory error). */ +char* +_Py_wchar2char(const wchar_t *text) +{ + const size_t len = wcslen(text); + char *result = NULL, *bytes = NULL; + size_t i, size, converted; + wchar_t c, buf[2]; + + /* The function works in two steps: + 1. compute the length of the output buffer in bytes (size) + 2. write the output bytes */ + size = 0; + buf[1] = 0; + while (1) { + for (i=0; i < len; i++) { + c = text[i]; + if (c >= 0xdc80 && c <= 0xdcff) { + /* UTF-8b surrogate */ + if (bytes != NULL) { + *bytes++ = c - 0xdc00; + size--; + } + else + size++; + continue; + } + else { + buf[0] = c; + if (bytes != NULL) + converted = wcstombs(bytes, buf, size); + else + converted = wcstombs(NULL, buf, 0); + if (converted == (size_t)-1) { + if (result != NULL) + PyMem_Free(result); + return NULL; + } + if (bytes != NULL) { + bytes += converted; + size -= converted; + } + else + size += converted; + } + } + if (result != NULL) { + *bytes = 0; + break; + } + + size += 1; /* nul byte at the end */ + result = PyMem_Malloc(size); + if (result == NULL) + return NULL; + bytes = result; + } + return result; +} + +/* Decode a byte string from the locale encoding with the + surrogateescape error handler (undecodable bytes are decoded as characters + in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate + character, escape the bytes using the surrogateescape error handler instead + of decoding them. + + Use _Py_wchar2char() for the encode back the character string to a byte + string. + + Return a pointer to a newly allocated (wide) character string (use + PyMem_Free() to free the memory), or NULL on error (conversion error or + memory error). */ wchar_t* _Py_char2wchar(char* arg) {