diff -r 970f33dff5ca Modules/unicodedata.c --- a/Modules/unicodedata.c Sat Mar 21 17:24:50 2015 +0100 +++ b/Modules/unicodedata.c Sun Mar 22 08:29:21 2015 +0200 @@ -99,81 +99,64 @@ new_previous_version(const char*name, co } -static Py_UCS4 getuchar(PyUnicodeObject *obj) -{ - if (PyUnicode_READY(obj)) - return (Py_UCS4)-1; - if (PyUnicode_GET_LENGTH(obj) == 1) { - if (PyUnicode_READY(obj)) - return (Py_UCS4)-1; - return PyUnicode_READ_CHAR(obj, 0); - } - PyErr_SetString(PyExc_TypeError, - "need a single Unicode character as parameter"); - return (Py_UCS4)-1; -} - /* --- Module API --------------------------------------------------------- */ /*[clinic input] unicodedata.UCD.decimal - unichr: object(type='PyUnicodeObject *', subclass_of='&PyUnicode_Type') + self: self + chr: int(types='str') default: object=NULL / Converts a Unicode character into its equivalent decimal value. -Returns the decimal value assigned to the Unicode character unichr -as integer. If no such value is defined, default is returned, or, if -not given, ValueError is raised. +Returns the decimal value assigned to the character chr as integer. +If no such value is defined, default is returned, or, if not given, +ValueError is raised. [clinic start generated code]*/ PyDoc_STRVAR(unicodedata_UCD_decimal__doc__, -"decimal($self, unichr, default=None, /)\n" +"decimal($self, chr, default=None, /)\n" "--\n" "\n" "Converts a Unicode character into its equivalent decimal value.\n" "\n" -"Returns the decimal value assigned to the Unicode character unichr\n" -"as integer. If no such value is defined, default is returned, or, if\n" -"not given, ValueError is raised."); +"Returns the decimal value assigned to the character chr as integer.\n" +"If no such value is defined, default is returned, or, if not given,\n" +"ValueError is raised."); #define UNICODEDATA_UCD_DECIMAL_METHODDEF \ {"decimal", (PyCFunction)unicodedata_UCD_decimal, METH_VARARGS, unicodedata_UCD_decimal__doc__}, static PyObject * -unicodedata_UCD_decimal_impl(PreviousDBVersion *self, PyUnicodeObject *unichr, PyObject *default_value); +unicodedata_UCD_decimal_impl(PyObject *self, int chr, PyObject *default_value); static PyObject * -unicodedata_UCD_decimal(PreviousDBVersion *self, PyObject *args) +unicodedata_UCD_decimal(PyObject *self, PyObject *args) { PyObject *return_value = NULL; - PyUnicodeObject *unichr; + int chr; PyObject *default_value = NULL; if (!PyArg_ParseTuple(args, - "O!|O:decimal", - &PyUnicode_Type, &unichr, &default_value)) + "C|O:decimal", + &chr, &default_value)) goto exit; - return_value = unicodedata_UCD_decimal_impl(self, unichr, default_value); + return_value = unicodedata_UCD_decimal_impl(self, chr, default_value); exit: return return_value; } static PyObject * -unicodedata_UCD_decimal_impl(PreviousDBVersion *self, PyUnicodeObject *unichr, PyObject *default_value) -/*[clinic end generated code: output=8689669896d293df input=c25c9d2b4de076b1]*/ +unicodedata_UCD_decimal_impl(PyObject *self, int chr, PyObject *default_value) +/*[clinic end generated code: output=6f4ce89995202af1 input=0ae1e66e5d1c21a1]*/ { int have_old = 0; long rc; - Py_UCS4 c; - - c = getuchar(unichr); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -204,61 +187,131 @@ unicodedata_UCD_decimal_impl(PreviousDBV return PyLong_FromLong(rc); } -PyDoc_STRVAR(unicodedata_digit__doc__, -"digit(unichr[, default])\n\ -\n\ -Returns the digit value assigned to the Unicode character unichr as\n\ -integer. If no such value is defined, default is returned, or, if\n\ -not given, ValueError is raised."); +/*[clinic input] + +unicodedata.UCD.digit + + self: self + chr: int(types='str') + default: object=NULL + / + +Converts a Unicode character into its equivalent digit value. + +Returns the digit value assigned to the character chr as integer. +If no such value is defined, default is returned, or, if not given, +ValueError is raised. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_digit__doc__, +"digit($self, chr, default=None, /)\n" +"--\n" +"\n" +"Converts a Unicode character into its equivalent digit value.\n" +"\n" +"Returns the digit value assigned to the character chr as integer.\n" +"If no such value is defined, default is returned, or, if not given,\n" +"ValueError is raised."); + +#define UNICODEDATA_UCD_DIGIT_METHODDEF \ + {"digit", (PyCFunction)unicodedata_UCD_digit, METH_VARARGS, unicodedata_UCD_digit__doc__}, static PyObject * -unicodedata_digit(PyObject *self, PyObject *args) +unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value); + +static PyObject * +unicodedata_UCD_digit(PyObject *self, PyObject *args) { - PyUnicodeObject *v; - PyObject *defobj = NULL; + PyObject *return_value = NULL; + int chr; + PyObject *default_value = NULL; + + if (!PyArg_ParseTuple(args, + "C|O:digit", + &chr, &default_value)) + goto exit; + return_value = unicodedata_UCD_digit_impl(self, chr, default_value); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_digit_impl(PyObject *self, int chr, PyObject *default_value) +/*[clinic end generated code: output=b985bfbe77976eb4 input=5ba3e07a61262f69]*/ +{ long rc; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; rc = Py_UNICODE_TODIGIT(c); if (rc < 0) { - if (defobj == NULL) { + if (default_value == NULL) { PyErr_SetString(PyExc_ValueError, "not a digit"); return NULL; } else { - Py_INCREF(defobj); - return defobj; + Py_INCREF(default_value); + return default_value; } } return PyLong_FromLong(rc); } -PyDoc_STRVAR(unicodedata_numeric__doc__, -"numeric(unichr[, default])\n\ -\n\ -Returns the numeric value assigned to the Unicode character unichr\n\ -as float. If no such value is defined, default is returned, or, if\n\ -not given, ValueError is raised."); +/*[clinic input] + +unicodedata.UCD.numeric + + self: self + chr: int(types='str') + default: object=NULL + / + +Converts a Unicode character into its equivalent numeric value. + +Returns the numeric value assigned to the character chr as float. +If no such value is defined, default is returned, or, if not given, +ValueError is raised. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_numeric__doc__, +"numeric($self, chr, default=None, /)\n" +"--\n" +"\n" +"Converts a Unicode character into its equivalent numeric value.\n" +"\n" +"Returns the numeric value assigned to the character chr as float.\n" +"If no such value is defined, default is returned, or, if not given,\n" +"ValueError is raised."); + +#define UNICODEDATA_UCD_NUMERIC_METHODDEF \ + {"numeric", (PyCFunction)unicodedata_UCD_numeric, METH_VARARGS, unicodedata_UCD_numeric__doc__}, static PyObject * -unicodedata_numeric(PyObject *self, PyObject *args) +unicodedata_UCD_numeric_impl(PyObject *self, int chr, PyObject *default_value); + +static PyObject * +unicodedata_UCD_numeric(PyObject *self, PyObject *args) { - PyUnicodeObject *v; - PyObject *defobj = NULL; + PyObject *return_value = NULL; + int chr; + PyObject *default_value = NULL; + + if (!PyArg_ParseTuple(args, + "C|O:numeric", + &chr, &default_value)) + goto exit; + return_value = unicodedata_UCD_numeric_impl(self, chr, default_value); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_numeric_impl(PyObject *self, int chr, PyObject *default_value) +/*[clinic end generated code: output=0ef8fd0247c8e8fb input=ef5941a0c973d41b]*/ +{ int have_old = 0; double rc; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -276,37 +329,63 @@ unicodedata_numeric(PyObject *self, PyOb if (!have_old) rc = Py_UNICODE_TONUMERIC(c); if (rc == -1.0) { - if (defobj == NULL) { + if (default_value == NULL) { PyErr_SetString(PyExc_ValueError, "not a numeric character"); return NULL; } else { - Py_INCREF(defobj); - return defobj; + Py_INCREF(default_value); + return default_value; } } return PyFloat_FromDouble(rc); } -PyDoc_STRVAR(unicodedata_category__doc__, -"category(unichr)\n\ -\n\ -Returns the general category assigned to the Unicode character\n\ -unichr as string."); +/*[clinic input] + +unicodedata.UCD.category + + self: self + chr: int(types='str') + / + +Returns the general category assigned to the character chr as string. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_category__doc__, +"category($self, chr, /)\n" +"--\n" +"\n" +"Returns the general category assigned to the character chr as string."); + +#define UNICODEDATA_UCD_CATEGORY_METHODDEF \ + {"category", (PyCFunction)unicodedata_UCD_category, METH_VARARGS, unicodedata_UCD_category__doc__}, static PyObject * -unicodedata_category(PyObject *self, PyObject *args) +unicodedata_UCD_category_impl(PyObject *self, int chr); + +static PyObject * +unicodedata_UCD_category(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + + if (!PyArg_ParseTuple(args, + "C:category", + &chr)) + goto exit; + return_value = unicodedata_UCD_category_impl(self, chr); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_category_impl(PyObject *self, int chr) +/*[clinic end generated code: output=9ac981ca8360ecb2 input=f63b40d07b014b04]*/ +{ int index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:category", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->category; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -316,26 +395,55 @@ unicodedata_category(PyObject *self, PyO return PyUnicode_FromString(_PyUnicode_CategoryNames[index]); } -PyDoc_STRVAR(unicodedata_bidirectional__doc__, -"bidirectional(unichr)\n\ -\n\ -Returns the bidirectional class assigned to the Unicode character\n\ -unichr as string. If no such value is defined, an empty string is\n\ -returned."); +/*[clinic input] + +unicodedata.UCD.bidirectional + + self: self + chr: int(types='str') + / + +Returns the bidirectional class assigned to the character chr as string. + +If no such value is defined, an empty string is returned. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_bidirectional__doc__, +"bidirectional($self, chr, /)\n" +"--\n" +"\n" +"Returns the bidirectional class assigned to the character chr as string.\n" +"\n" +"If no such value is defined, an empty string is returned."); + +#define UNICODEDATA_UCD_BIDIRECTIONAL_METHODDEF \ + {"bidirectional", (PyCFunction)unicodedata_UCD_bidirectional, METH_VARARGS, unicodedata_UCD_bidirectional__doc__}, static PyObject * -unicodedata_bidirectional(PyObject *self, PyObject *args) +unicodedata_UCD_bidirectional_impl(PyObject *self, int chr); + +static PyObject * +unicodedata_UCD_bidirectional(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + + if (!PyArg_ParseTuple(args, + "C:bidirectional", + &chr)) + goto exit; + return_value = unicodedata_UCD_bidirectional_impl(self, chr); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_bidirectional_impl(PyObject *self, int chr) +/*[clinic end generated code: output=1ebcca51238a6014 input=4bb7f87e3909920a]*/ +{ int index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:bidirectional", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->bidirectional; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -347,55 +455,123 @@ unicodedata_bidirectional(PyObject *self return PyUnicode_FromString(_PyUnicode_BidirectionalNames[index]); } -PyDoc_STRVAR(unicodedata_combining__doc__, -"combining(unichr)\n\ -\n\ -Returns the canonical combining class assigned to the Unicode\n\ -character unichr as integer. Returns 0 if no combining class is\n\ -defined."); +/*[clinic input] + +unicodedata.UCD.combining -> int + + self: self + chr: int(types='str') + / + +Returns the canonical combining class assigned to the character chr as integer. + +Returns 0 if no combining class is defined. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_combining__doc__, +"combining($self, chr, /)\n" +"--\n" +"\n" +"Returns the canonical combining class assigned to the character chr as integer.\n" +"\n" +"Returns 0 if no combining class is defined."); + +#define UNICODEDATA_UCD_COMBINING_METHODDEF \ + {"combining", (PyCFunction)unicodedata_UCD_combining, METH_VARARGS, unicodedata_UCD_combining__doc__}, + +static int +unicodedata_UCD_combining_impl(PyObject *self, int chr); static PyObject * -unicodedata_combining(PyObject *self, PyObject *args) +unicodedata_UCD_combining(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + int _return_value; + + if (!PyArg_ParseTuple(args, + "C:combining", + &chr)) + goto exit; + _return_value = unicodedata_UCD_combining_impl(self, chr); + if ((_return_value == -1) && PyErr_Occurred()) + goto exit; + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +static int +unicodedata_UCD_combining_impl(PyObject *self, int chr) +/*[clinic end generated code: output=149f21dd6b394ea5 input=bd107589e91670a1]*/ +{ int index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:combining", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->combining; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); if (old->category_changed == 0) index = 0; /* unassigned */ } - return PyLong_FromLong(index); + return index; } -PyDoc_STRVAR(unicodedata_mirrored__doc__, -"mirrored(unichr)\n\ -\n\ -Returns the mirrored property assigned to the Unicode character\n\ -unichr as integer. Returns 1 if the character has been identified as\n\ -a \"mirrored\" character in bidirectional text, 0 otherwise."); +/*[clinic input] + +unicodedata.UCD.mirrored -> int + + self: self + chr: int(types='str') + / + +Returns the mirrored property assigned to the character chr as integer. + +Returns 1 if the character has been identified as a "mirrored" +character in bidirectional text, 0 otherwise. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_mirrored__doc__, +"mirrored($self, chr, /)\n" +"--\n" +"\n" +"Returns the mirrored property assigned to the character chr as integer.\n" +"\n" +"Returns 1 if the character has been identified as a \"mirrored\"\n" +"character in bidirectional text, 0 otherwise."); + +#define UNICODEDATA_UCD_MIRRORED_METHODDEF \ + {"mirrored", (PyCFunction)unicodedata_UCD_mirrored, METH_VARARGS, unicodedata_UCD_mirrored__doc__}, + +static int +unicodedata_UCD_mirrored_impl(PyObject *self, int chr); static PyObject * -unicodedata_mirrored(PyObject *self, PyObject *args) +unicodedata_UCD_mirrored(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + int _return_value; + + if (!PyArg_ParseTuple(args, + "C:mirrored", + &chr)) + goto exit; + _return_value = unicodedata_UCD_mirrored_impl(self, chr); + if ((_return_value == -1) && PyErr_Occurred()) + goto exit; + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +static int +unicodedata_UCD_mirrored_impl(PyObject *self, int chr) +/*[clinic end generated code: output=478ef86760ad3445 input=5a8d99cf9b388a9f]*/ +{ int index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:mirrored", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->mirrored; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -404,28 +580,54 @@ unicodedata_mirrored(PyObject *self, PyO else if (old->mirrored_changed != 0xFF) index = old->mirrored_changed; } - return PyLong_FromLong(index); + return index; } -PyDoc_STRVAR(unicodedata_east_asian_width__doc__, -"east_asian_width(unichr)\n\ -\n\ -Returns the east asian width assigned to the Unicode character\n\ -unichr as string."); +/*[clinic input] + +unicodedata.UCD.east_asian_width + + self: self + chr: int(types='str') + / + +Returns the east asian width assigned to the character chr as string. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_east_asian_width__doc__, +"east_asian_width($self, chr, /)\n" +"--\n" +"\n" +"Returns the east asian width assigned to the character chr as string."); + +#define UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF \ + {"east_asian_width", (PyCFunction)unicodedata_UCD_east_asian_width, METH_VARARGS, unicodedata_UCD_east_asian_width__doc__}, static PyObject * -unicodedata_east_asian_width(PyObject *self, PyObject *args) +unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr); + +static PyObject * +unicodedata_UCD_east_asian_width(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + + if (!PyArg_ParseTuple(args, + "C:east_asian_width", + &chr)) + goto exit; + return_value = unicodedata_UCD_east_asian_width_impl(self, chr); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr) +/*[clinic end generated code: output=55f4fb9b91a12e02 input=d3da5270eef791a0]*/ +{ int index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:east_asian_width", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; index = (int) _getrecord_ex(c)->east_asian_width; if (self && UCD_Check(self)) { const change_record *old = get_old_record(self, c); @@ -435,29 +637,58 @@ unicodedata_east_asian_width(PyObject *s return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]); } -PyDoc_STRVAR(unicodedata_decomposition__doc__, -"decomposition(unichr)\n\ -\n\ -Returns the character decomposition mapping assigned to the Unicode\n\ -character unichr as string. An empty string is returned in case no\n\ -such mapping is defined."); +/*[clinic input] + +unicodedata.UCD.decomposition + + self: self + chr: int(types='str') + / + +Returns the character decomposition mapping assigned to the character chr as string. + +An empty string is returned in case no such mapping is defined. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_decomposition__doc__, +"decomposition($self, chr, /)\n" +"--\n" +"\n" +"Returns the character decomposition mapping assigned to the character chr as string.\n" +"\n" +"An empty string is returned in case no such mapping is defined."); + +#define UNICODEDATA_UCD_DECOMPOSITION_METHODDEF \ + {"decomposition", (PyCFunction)unicodedata_UCD_decomposition, METH_VARARGS, unicodedata_UCD_decomposition__doc__}, static PyObject * -unicodedata_decomposition(PyObject *self, PyObject *args) +unicodedata_UCD_decomposition_impl(PyObject *self, int chr); + +static PyObject * +unicodedata_UCD_decomposition(PyObject *self, PyObject *args) { - PyUnicodeObject *v; + PyObject *return_value = NULL; + int chr; + + if (!PyArg_ParseTuple(args, + "C:decomposition", + &chr)) + goto exit; + return_value = unicodedata_UCD_decomposition_impl(self, chr); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_decomposition_impl(PyObject *self, int chr) +/*[clinic end generated code: output=6213fc1451f7ae92 input=b3cd8150625bebe6]*/ +{ char decomp[256]; int code, index, count; size_t i; unsigned int prefix_index; - Py_UCS4 c; - - if (!PyArg_ParseTuple(args, "O!:decomposition", - &PyUnicode_Type, &v)) - return NULL; - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; code = (int)c; @@ -858,22 +1089,55 @@ is_normalized(PyObject *self, PyObject * return 1; /* certainly normalized */ } -PyDoc_STRVAR(unicodedata_normalize__doc__, -"normalize(form, unistr)\n\ -\n\ -Return the normal form 'form' for the Unicode string unistr. Valid\n\ -values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."); +/*[clinic input] -static PyObject* -unicodedata_normalize(PyObject *self, PyObject *args) +unicodedata.UCD.normalize + + self: self + form: str + unistr as input: object(subclass_of='&PyUnicode_Type') + / + +Return the normal form 'form' for the Unicode string unistr. + +Valid values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_normalize__doc__, +"normalize($self, form, unistr, /)\n" +"--\n" +"\n" +"Return the normal form \'form\' for the Unicode string unistr.\n" +"\n" +"Valid values for form are \'NFC\', \'NFKC\', \'NFD\', and \'NFKD\'."); + +#define UNICODEDATA_UCD_NORMALIZE_METHODDEF \ + {"normalize", (PyCFunction)unicodedata_UCD_normalize, METH_VARARGS, unicodedata_UCD_normalize__doc__}, + +static PyObject * +unicodedata_UCD_normalize_impl(PyObject *self, const char *form, PyObject *input); + +static PyObject * +unicodedata_UCD_normalize(PyObject *self, PyObject *args) { - char *form; + PyObject *return_value = NULL; + const char *form; PyObject *input; - if(!PyArg_ParseTuple(args, "sO!:normalize", - &form, &PyUnicode_Type, &input)) - return NULL; + if (!PyArg_ParseTuple(args, + "sO!:normalize", + &form, &PyUnicode_Type, &input)) + goto exit; + return_value = unicodedata_UCD_normalize_impl(self, form, input); +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_normalize_impl(PyObject *self, const char *form, PyObject *input) +/*[clinic end generated code: output=7ee0b3d2a91aeded input=70b14ad423426fa6]*/ +{ if (PyUnicode_READY(input) == -1) return NULL; @@ -1232,64 +1496,132 @@ static const _PyUnicode_Name_CAPI hashAP /* -------------------------------------------------------------------- */ /* Python bindings */ -PyDoc_STRVAR(unicodedata_name__doc__, -"name(unichr[, default])\n\ -Returns the name assigned to the Unicode character unichr as a\n\ -string. If no name is defined, default is returned, or, if not\n\ -given, ValueError is raised."); +/*[clinic input] + +unicodedata.UCD.name + + self: self + chr: int(types='str') + default: object=NULL + / + +Returns the name assigned to the character chr as a string. + +If no name is defined, default is returned, or, if not given, +ValueError is raised. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_name__doc__, +"name($self, chr, default=None, /)\n" +"--\n" +"\n" +"Returns the name assigned to the character chr as a string.\n" +"\n" +"If no name is defined, default is returned, or, if not given,\n" +"ValueError is raised."); + +#define UNICODEDATA_UCD_NAME_METHODDEF \ + {"name", (PyCFunction)unicodedata_UCD_name, METH_VARARGS, unicodedata_UCD_name__doc__}, static PyObject * -unicodedata_name(PyObject* self, PyObject* args) +unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value); + +static PyObject * +unicodedata_UCD_name(PyObject *self, PyObject *args) +{ + PyObject *return_value = NULL; + int chr; + PyObject *default_value = NULL; + + if (!PyArg_ParseTuple(args, + "C|O:name", + &chr, &default_value)) + goto exit; + return_value = unicodedata_UCD_name_impl(self, chr, default_value); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value) +/*[clinic end generated code: output=abe5b9faf1be0980 input=7f092c4861613da0]*/ { char name[NAME_MAXLEN]; - Py_UCS4 c; - - PyUnicodeObject* v; - PyObject* defobj = NULL; - if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj)) - return NULL; - - c = getuchar(v); - if (c == (Py_UCS4)-1) - return NULL; + Py_UCS4 c = (Py_UCS4)chr; if (!_getucname(self, c, name, sizeof(name), 0)) { - if (defobj == NULL) { + if (default_value == NULL) { PyErr_SetString(PyExc_ValueError, "no such name"); return NULL; } else { - Py_INCREF(defobj); - return defobj; + Py_INCREF(default_value); + return default_value; } } return PyUnicode_FromString(name); } -PyDoc_STRVAR(unicodedata_lookup__doc__, -"lookup(name)\n\ -\n\ -Look up character by name. If a character with the\n\ -given name is found, return the corresponding Unicode\n\ -character. If not found, KeyError is raised."); +/*[clinic input] + +unicodedata.UCD.lookup + + self: self + name: str(length=True) + / + +Look up character by name. + +If a character with the given name is found, return the +corresponding character. If not found, KeyError is raised. +[clinic start generated code]*/ + +PyDoc_STRVAR(unicodedata_UCD_lookup__doc__, +"lookup($self, name, /)\n" +"--\n" +"\n" +"Look up character by name.\n" +"\n" +"If a character with the given name is found, return the\n" +"corresponding character. If not found, KeyError is raised."); + +#define UNICODEDATA_UCD_LOOKUP_METHODDEF \ + {"lookup", (PyCFunction)unicodedata_UCD_lookup, METH_VARARGS, unicodedata_UCD_lookup__doc__}, static PyObject * -unicodedata_lookup(PyObject* self, PyObject* args) +unicodedata_UCD_lookup_impl(PyObject *self, const char *name, Py_ssize_clean_t name_length); + +static PyObject * +unicodedata_UCD_lookup(PyObject *self, PyObject *args) +{ + PyObject *return_value = NULL; + const char *name; + Py_ssize_clean_t name_length; + + if (!PyArg_ParseTuple(args, + "s#:lookup", + &name, &name_length)) + goto exit; + return_value = unicodedata_UCD_lookup_impl(self, name, name_length); + +exit: + return return_value; +} + +static PyObject * +unicodedata_UCD_lookup_impl(PyObject *self, const char *name, Py_ssize_clean_t name_length) +/*[clinic end generated code: output=b4f920a9876f84ec input=e07e8de94d2790a8]*/ { Py_UCS4 code; - - char* name; - Py_ssize_t namelen; unsigned int index; - if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen)) - return NULL; - if (namelen > INT_MAX) { + if (name_length > INT_MAX) { PyErr_SetString(PyExc_KeyError, "name too long"); return NULL; } - if (!_getcode(self, name, (int)namelen, &code, 1)) { + if (!_getcode(self, name, (int)name_length, &code, 1)) { PyErr_Format(PyExc_KeyError, "undefined character name '%s'", name); return NULL; } @@ -1308,24 +1640,17 @@ unicodedata_lookup(PyObject* self, PyObj static PyMethodDef unicodedata_functions[] = { UNICODEDATA_UCD_DECIMAL_METHODDEF - {"digit", unicodedata_digit, METH_VARARGS, unicodedata_digit__doc__}, - {"numeric", unicodedata_numeric, METH_VARARGS, unicodedata_numeric__doc__}, - {"category", unicodedata_category, METH_VARARGS, - unicodedata_category__doc__}, - {"bidirectional", unicodedata_bidirectional, METH_VARARGS, - unicodedata_bidirectional__doc__}, - {"combining", unicodedata_combining, METH_VARARGS, - unicodedata_combining__doc__}, - {"mirrored", unicodedata_mirrored, METH_VARARGS, - unicodedata_mirrored__doc__}, - {"east_asian_width", unicodedata_east_asian_width, METH_VARARGS, - unicodedata_east_asian_width__doc__}, - {"decomposition", unicodedata_decomposition, METH_VARARGS, - unicodedata_decomposition__doc__}, - {"name", unicodedata_name, METH_VARARGS, unicodedata_name__doc__}, - {"lookup", unicodedata_lookup, METH_VARARGS, unicodedata_lookup__doc__}, - {"normalize", unicodedata_normalize, METH_VARARGS, - unicodedata_normalize__doc__}, + UNICODEDATA_UCD_DIGIT_METHODDEF + UNICODEDATA_UCD_NUMERIC_METHODDEF + UNICODEDATA_UCD_CATEGORY_METHODDEF + UNICODEDATA_UCD_BIDIRECTIONAL_METHODDEF + UNICODEDATA_UCD_COMBINING_METHODDEF + UNICODEDATA_UCD_MIRRORED_METHODDEF + UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF + UNICODEDATA_UCD_DECOMPOSITION_METHODDEF + UNICODEDATA_UCD_NAME_METHODDEF + UNICODEDATA_UCD_LOOKUP_METHODDEF + UNICODEDATA_UCD_NORMALIZE_METHODDEF {NULL, NULL} /* sentinel */ };