[?1034hdiff -r 0ce4ce919ccd Include/ucnhash.h --- a/Include/ucnhash.h Mon Jun 16 17:12:39 2014 +0200 +++ b/Include/ucnhash.h Mon Jun 16 18:12:56 2014 +0200 @@ -16,16 +16,16 @@ typedef struct { int size; /* Get name for a given character code. Returns non-zero if - success, zero if not. Does not set Python exceptions. + success, zero if not. Does not set Python exceptions. If self is NULL, data come from the default version of the database. If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ - int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen, - int with_alias_and_seq); + int (*getname)(PyObject *self, Py_UCS4 code, + char* buffer, Py_ssize_t buflen, int with_alias_and_seq); /* Get character code for a given name. Same error handling as for getname. */ - int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code, - int with_named_seq); + int (*getcode)(PyObject *self, const char* name, Py_ssize_t namelen, + Py_UCS4* code, int with_named_seq); } _PyUnicode_Name_CAPI; diff -r 0ce4ce919ccd Modules/unicodedata.c --- a/Modules/unicodedata.c Mon Jun 16 17:12:39 2014 +0200 +++ b/Modules/unicodedata.c Mon Jun 16 18:12:56 2014 +0200 @@ -13,6 +13,8 @@ ------------------------------------------------------------------------ */ +#define PY_SSIZE_T_CLEAN + #include "Python.h" #include "ucnhash.h" #include "structmember.h" @@ -917,9 +919,9 @@ unicodedata_normalize(PyObject *self, Py /* database code (cut and pasted from the unidb package) */ static unsigned long -_gethash(const char *s, int len, int scale) +_gethash(const char *s, Py_ssize_t len, int scale) { - int i; + Py_ssize_t i; unsigned long h = 0; unsigned long ix; for (i = 0; i < len; i++) { @@ -981,14 +983,14 @@ is_unified_ideograph(Py_UCS4 code) (cp < named_sequences_end)) static int -_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen, +_getucname(PyObject *self, Py_UCS4 code, char* buffer, Py_ssize_t buflen, int with_alias_and_seq) { /* Find the name associated with the given codepoint. * If with_alias_and_seq is 1, check for names in the Private Use Area 15 * that we are using for aliases and named sequences. */ - int offset; - int i; + unsigned int offset; + Py_ssize_t i; int word; unsigned char* w; @@ -1083,10 +1085,10 @@ static int } static int -_cmpname(PyObject *self, int code, const char* name, int namelen) +_cmpname(PyObject *self, int code, const char* name, Py_ssize_t namelen) { /* check if code corresponds to the given name */ - int i; + Py_ssize_t i; char buffer[NAME_MAXLEN]; if (!_getucname(self, code, buffer, sizeof(buffer), 1)) return 0; @@ -1133,7 +1135,7 @@ static int } static int -_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code, +_getcode(PyObject* self, const char* name, Py_ssize_t namelen, Py_UCS4* code, int with_named_seq) { /* Return the codepoint associated with the given name. @@ -1271,7 +1273,7 @@ unicodedata_lookup(PyObject* self, PyObj Py_UCS4 code; char* name; - int namelen; + Py_ssize_t namelen; unsigned int index; if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen)) return NULL;