# HG changeset patch # User Suzumizaki-Kimitaka # Date 1391346063 -32400 # Sun Feb 02 22:01:03 2014 +0900 # Branch 3.3local # Node ID a800679bf5583d94768e45b667a1a50213a07e83 # Parent 14e402d20d249ed574681cf4fd8748c211403a55 Enable importing NonASCII.pyd on Python 3.3. diff -r 14e402d20d24 -r a800679bf558 Lib/distutils/command/build_ext.py --- a/Lib/distutils/command/build_ext.py Sat Feb 01 19:02:42 2014 +0900 +++ b/Lib/distutils/command/build_ext.py Sun Feb 02 22:01:03 2014 +0900 @@ -13,6 +13,7 @@ from distutils.extension import Extension from distutils.util import get_platform from distutils import log +from distutils.command import szm62 # this keeps compatibility from 2.3 to 2.5 if sys.version < "2.6": @@ -687,7 +688,7 @@ provided, "PyInit_" + module_name. Only relevant on Windows, where the .pyd file (DLL) must export the module "PyInit_" function. """ - initfunc_name = "PyInit_" + ext.name.split('.')[-1] + initfunc_name = "PyInit_" + szm62.encode_if_required(ext.name.split('.')[-1]) if initfunc_name not in ext.export_symbols: ext.export_symbols.append(initfunc_name) return ext.export_symbols diff -r 14e402d20d24 -r a800679bf558 Lib/distutils/command/szm62.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/distutils/command/szm62.py Sun Feb 02 22:01:03 2014 +0900 @@ -0,0 +1,50 @@ +# -*- encoding: utf-8 -*- + +base62_chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +header2 = "23456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" +header3 = "abcdefghijklmnopqr" +header4 = "stuvw" +invalid_header = "xyz1" + +def get_base62(val): + if val < 0 or val > 62: + raise ValueError + return base62_chars[val] + +def get_base62x2(val): + if val < 0 or val > 62*62: + raise ValueError + return get_base62(val // 62) + get_base62(val % 62) + +def get_base62x3(val): + if val < 0 or val > 62*62*62: + raise ValueError + return get_base62(val // (62*62)) + get_base62x2(val % (62*62)) + +def encode_szm62(ustr): + rs = "" + for c in ustr: + v = ord(c) + if v <= 0x7f: + if c.isalnum(): + rs += '0' + c + else: + rs += c + elif v <= 0x7ff: + rs += header2[v//62] + get_base62(v % 62) + elif v <= 0xffff: + rs += header3[v // (62*62)] + get_base62x2(v % (62*62)) + elif v <= 0x10FFFF: + v -= 0x10000 + rs += header4[v // (62*62*62)] + get_base62x3(v % (62*62*62)) + else: + raise ValueError("v = 0x{0:X}({0:}) is not supported".format(v)) + return rs + +def encode_if_required(ustr): + try: + ustr.encode("ASCII") + except UnicodeError: + return encode_szm62(ustr) + return ustr + diff -r 14e402d20d24 -r a800679bf558 Python/importdl.c --- a/Python/importdl.c Sat Feb 01 19:02:42 2014 +0900 +++ b/Python/importdl.c Sun Feb 02 22:01:03 2014 +0900 @@ -1,125 +1,260 @@ - -/* Support for dynamic loading of extension modules */ - -#include "Python.h" - -/* ./configure sets HAVE_DYNAMIC_LOADING if dynamic loading of modules is - supported on this platform. configure will then compile and link in one - of the dynload_*.c files, as appropriate. We will call a function in - those modules to get a function pointer to the module's init function. -*/ -#ifdef HAVE_DYNAMIC_LOADING - -#include "importdl.h" - -#ifdef MS_WINDOWS -extern dl_funcptr _PyImport_GetDynLoadWindows(const char *shortname, - PyObject *pathname, FILE *fp); -#else -extern dl_funcptr _PyImport_GetDynLoadFunc(const char *shortname, - const char *pathname, FILE *fp); -#endif - -PyObject * -_PyImport_LoadDynamicModule(PyObject *name, PyObject *path, FILE *fp) -{ - PyObject *m = NULL; -#ifndef MS_WINDOWS - PyObject *pathbytes; -#endif - PyObject *nameascii; - char *namestr, *lastdot, *shortname, *packagecontext, *oldcontext; - dl_funcptr p0; - PyObject* (*p)(void); - struct PyModuleDef *def; - - m = _PyImport_FindExtensionObject(name, path); - if (m != NULL) { - Py_INCREF(m); - return m; - } - - /* name must be encodable to ASCII because dynamic module must have a - function called "PyInit_NAME", they are written in C, and the C language - doesn't accept non-ASCII identifiers. */ - nameascii = PyUnicode_AsEncodedString(name, "ascii", NULL); - if (nameascii == NULL) - return NULL; - - namestr = PyBytes_AS_STRING(nameascii); - if (namestr == NULL) - goto error; - - lastdot = strrchr(namestr, '.'); - if (lastdot == NULL) { - packagecontext = NULL; - shortname = namestr; - } - else { - packagecontext = namestr; - shortname = lastdot+1; - } - -#ifdef MS_WINDOWS - p0 = _PyImport_GetDynLoadWindows(shortname, path, fp); -#else - pathbytes = PyUnicode_EncodeFSDefault(path); - if (pathbytes == NULL) - goto error; - p0 = _PyImport_GetDynLoadFunc(shortname, - PyBytes_AS_STRING(pathbytes), fp); - Py_DECREF(pathbytes); -#endif - p = (PyObject*(*)(void))p0; - if (PyErr_Occurred()) - goto error; - if (p == NULL) { - PyObject *msg = PyUnicode_FromFormat("dynamic module does not define " - "init function (PyInit_%s)", - shortname); - PyErr_SetImportError(msg, name, path); - Py_DECREF(msg); - goto error; - } - oldcontext = _Py_PackageContext; - _Py_PackageContext = packagecontext; - m = (*p)(); - _Py_PackageContext = oldcontext; - if (m == NULL) - goto error; - - if (PyErr_Occurred()) { - PyErr_Format(PyExc_SystemError, - "initialization of %s raised unreported exception", - shortname); - goto error; - } - - /* Remember pointer to module init function. */ - def = PyModule_GetDef(m); - if (def == NULL) { - PyErr_Format(PyExc_SystemError, - "initialization of %s did not return an extension " - "module", shortname); - goto error; - } - def->m_base.m_init = p; - - /* Remember the filename as the __file__ attribute */ - if (PyModule_AddObject(m, "__file__", path) < 0) - PyErr_Clear(); /* Not important enough to report */ - else - Py_INCREF(path); - - if (_PyImport_FixupExtensionObject(m, name, path) < 0) - goto error; - Py_DECREF(nameascii); - return m; - -error: - Py_DECREF(nameascii); - Py_XDECREF(m); - return NULL; -} - -#endif /* HAVE_DYNAMIC_LOADING */ + +/* Support for dynamic loading of extension modules */ + +#include "Python.h" + +/* ./configure sets HAVE_DYNAMIC_LOADING if dynamic loading of modules is + supported on this platform. configure will then compile and link in one + of the dynload_*.c files, as appropriate. We will call a function in + those modules to get a function pointer to the module's init function. +*/ +#ifdef HAVE_DYNAMIC_LOADING + +#include "importdl.h" + +#ifdef MS_WINDOWS +extern dl_funcptr _PyImport_GetDynLoadWindows(const char *shortname, + PyObject *pathname, FILE *fp); +#else +extern dl_funcptr _PyImport_GetDynLoadFunc(const char *shortname, + const char *pathname, FILE *fp); +#endif + +/* szm62 encoding is only used to refer PyInit_yyy export symbol for + xxx.pyd, where xxx contains non-ASCII chararcters to convert with + this encoding to yyy. +*/ + +const char base62chars[63] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +const char *base62header2 = base62chars+2; /* points '2', to 'Z' */ +const char *base62header3 = base62chars+36; /* points 'a', to 'r' */ +const char *base62header4 = base62chars+54; /* points 's', to 'w' */ + +void get62x2(char* buf, const unsigned int val) +{ + *buf = base62chars[val / 62]; + *(buf+1) = base62chars[val % 62]; +} + +void get62x3(char* buf, const unsigned int val) +{ + *buf = base62chars[val / (62 * 62)]; + get62x2(buf + 1, val % (62 * 62)); +} + +PyObject * +get_szm62_as_object(PyObject* name) +{ + const Py_ssize_t length = PyUnicode_GetLength(name); + char* bufbase = (char*)PyMem_Malloc(length * 4 + 1); + char* buf = bufbase; + Py_ssize_t i; + PyObject* retval = NULL; + for (i = 0; i < length; i++) + { + const Py_UCS4 cpt = PyUnicode_ReadChar(name, i); + if (cpt <= 0x7f) + { + if (Py_UNICODE_ISALNUM(cpt)) + { + *buf++ = '0'; + *buf++ = (char)cpt; + } + else { + *buf++ = (char)cpt; + } + } + else if (cpt <= 0x7ff) { + *buf++ = base62header2[cpt / 62]; + *buf++ = base62chars[cpt % 62]; + } + else if (cpt <= 0xffff) { + *buf++ = base62header3[cpt / (62 * 62)]; + get62x2(buf, cpt % (62 * 62)); + buf += 2; + } + else if (cpt <= 0x10ffff) { + *buf++ = base62header4[cpt / (62 * 62 * 62)]; + get62x3(buf, cpt % (62 * 62 * 62)); + buf += 3; + } + } + retval = PyBytes_FromStringAndSize(bufbase, buf - bufbase); + PyMem_Free(bufbase); + return retval; +} + +PyObject * +_PyImport_LoadDynamicModule(PyObject *name, PyObject *path, FILE *fp) +{ + PyObject *m = NULL; +#ifndef MS_WINDOWS + PyObject *pathbytes; +#endif + PyObject *initname_obj = NULL; + PyObject *shortname_obj = NULL; + PyObject *fullname_obj = NULL; + Py_ssize_t pos, namelen; + char *namestr, *lastdot, *shortname, *packagecontext, *oldcontext; + dl_funcptr p0; + PyObject* (*p)(void); + struct PyModuleDef *def; + + m = _PyImport_FindExtensionObject(name, path); + if (m != NULL) { + Py_INCREF(m); + return m; + } + + /* We try to find PyInit_yyy function for xxx(.pyd, for example) + module. When xxx contains only 7 bit ASCII characters, yyy + is xxx. Otherwize, yyy is 'szm62' encoded string. + As given the name can be 'zzz.vvv.xxx' style, we should + use 'szm62' only with xxx, the last part of 'import'. + */ + fullname_obj = PyUnicode_AsASCIIString(name); + if (fullname_obj == NULL) { + /* if given name has non-ASCII characters */ + PyErr_Clear(); + + /* encode whole name with utf-8 at first */ + fullname_obj = PyUnicode_AsUTF8String(name); + if (fullname_obj == NULL) + return NULL; + + namestr = PyBytes_AS_STRING(fullname_obj); + if (namestr == NULL) + goto error; + + /* search '.' from end with name */ + namelen = PyUnicode_GetLength(name); + if (namelen == -1) + goto error; + + pos = PyUnicode_FindChar(name, '.', 0, namelen, -1); + if (pos == -2) + goto error; + else { + if (pos == -1) { + /* when '.' is not found */ + packagecontext = NULL; + + initname_obj = get_szm62_as_object(name); + if (initname_obj == NULL) + goto error; + } + else { + /* when '.' is found, */ + packagecontext = namestr; + + /* ...check last part contains non-ASCII characters */ + shortname_obj = PyUnicode_Substring(name, pos+1, namelen); + if (shortname_obj == NULL) + goto error; + + /* if last part have only 7 bit ASCII, stay it */ + initname_obj = PyUnicode_AsASCIIString(shortname_obj); + if (initname_obj == NULL) + { + PyErr_Clear(); + /* Only when last part contains non-ASCII, + we encode the part with szm62 + */ + initname_obj = get_szm62_as_object(shortname_obj); + if (initname_obj == NULL) + goto error; + } + } + shortname = PyBytes_AS_STRING(initname_obj); + if (shortname == NULL) + goto error; + } + } + else { + /* When given name contains ASCII 7bit characters only, + simple define whole the name and last part of the name. + */ + namestr = PyBytes_AS_STRING(fullname_obj); + if (namestr == NULL) + goto error; + + lastdot = strrchr(namestr, '.'); + if (lastdot == NULL) { + packagecontext = NULL; + shortname = namestr; + } + else { + packagecontext = namestr; + shortname = lastdot+1; + } + } + +#ifdef MS_WINDOWS + p0 = _PyImport_GetDynLoadWindows(shortname, path, fp); +#else + pathbytes = PyUnicode_EncodeFSDefault(path); + if (pathbytes == NULL) + goto error; + p0 = _PyImport_GetDynLoadFunc(shortname, + PyBytes_AS_STRING(pathbytes), fp); + Py_DECREF(pathbytes); +#endif + p = (PyObject*(*)(void))p0; + if (PyErr_Occurred()) + goto error; + if (p == NULL) { + PyObject *msg = PyUnicode_FromFormat("dynamic module does not define " + "init function (PyInit_%s)", + shortname); + PyErr_SetImportError(msg, name, path); + Py_DECREF(msg); + goto error; + } + + oldcontext = _Py_PackageContext; + _Py_PackageContext = packagecontext; + m = (*p)(); + _Py_PackageContext = oldcontext; + if (m == NULL) + goto error; + + if (PyErr_Occurred()) { + PyErr_Format(PyExc_SystemError, + "initialization of %s raised unreported exception", + namestr); + goto error; + } + + /* Remember pointer to module init function. */ + def = PyModule_GetDef(m); + if (def == NULL) { + PyErr_Format(PyExc_SystemError, + "initialization of %s did not return an extension " + "module", namestr); + goto error; + } + def->m_base.m_init = p; + + /* Remember the filename as the __file__ attribute */ + if (PyModule_AddObject(m, "__file__", path) < 0) + PyErr_Clear(); /* Not important enough to report */ + else + Py_INCREF(path); + + if (_PyImport_FixupExtensionObject(m, name, path) < 0) + goto error; + + Py_XDECREF(initname_obj); + Py_XDECREF(shortname_obj); + Py_XDECREF(fullname_obj); + return m; + +error: + Py_XDECREF(initname_obj); + Py_XDECREF(shortname_obj); + Py_XDECREF(fullname_obj); + Py_XDECREF(m); + return NULL; +} + +#endif /* HAVE_DYNAMIC_LOADING */