Bug Summary

File:Modules/unicodedata.c
Location:line 521, column 13
Description:Assigned value is garbage or undefined

Annotated Source Code

1/* ------------------------------------------------------------------------
2
3 unicodedata -- Provides access to the Unicode 5.2 data base.
4
5 Data was extracted from the Unicode 5.2 UnicodeData.txt file.
6
7 Written by Marc-Andre Lemburg (mal@lemburg.com).
8 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
9 Modified by Martin v. Löwis (martin@v.loewis.de)
10
11 Copyright (c) Corporation for National Research Initiatives.
12
13 ------------------------------------------------------------------------ */
14
15#include "Python.h"
16#include "ucnhash.h"
17#include "structmember.h"
18
19/* character properties */
20
21typedef struct {
22 const unsigned char category; /* index into
23 _PyUnicode_CategoryNames */
24 const unsigned char combining; /* combining class value 0 - 255 */
25 const unsigned char bidirectional; /* index into
26 _PyUnicode_BidirectionalNames */
27 const unsigned char mirrored; /* true if mirrored in bidir mode */
28 const unsigned char east_asian_width; /* index into
29 _PyUnicode_EastAsianWidth */
30 const unsigned char normalization_quick_check; /* see is_normalized() */
31} _PyUnicode_DatabaseRecord;
32
33typedef struct change_record {
34 /* sequence of fields should be the same as in merge_old_version */
35 const unsigned char bidir_changed;
36 const unsigned char category_changed;
37 const unsigned char decimal_changed;
38 const unsigned char mirrored_changed;
39 const double numeric_changed;
40} change_record;
41
42/* data file generated by Tools/unicode/makeunicodedata.py */
43#include "unicodedata_db.h"
44
45static const _PyUnicode_DatabaseRecord*
46_getrecord_ex(Py_UCS4 code)
47{
48 int index;
49 if (code >= 0x110000)
50 index = 0;
51 else {
52 index = index1[(code>>SHIFT7)];
53 index = index2[(index<<SHIFT7)+(code&((1<<SHIFT7)-1))];
54 }
55
56 return &_PyUnicode_Database_Records[index];
57}
58
59/* ------------- Previous-version API ------------------------------------- */
60typedef struct previous_version {
61 PyObject_HEADPyObject ob_base;
62 const char *name;
63 const change_record* (*getrecord)(Py_UCS4);
64 Py_UCS4 (*normalization)(Py_UCS4);
65} PreviousDBVersion;
66
67#define get_old_record(self, v)((((PreviousDBVersion*)self)->getrecord)(v)) ((((PreviousDBVersion*)self)->getrecord)(v))
68
69static PyMemberDef DB_members[] = {
70 {"unidata_version", T_STRING5, offsetof(PreviousDBVersion, name)__builtin_offsetof(PreviousDBVersion, name), READONLY1},
71 {NULL((void*)0)}
72};
73
74/* forward declaration */
75static PyTypeObject UCD_Type;
76#define UCD_Check(o)((((PyObject*)(o))->ob_type)==&UCD_Type) (Py_TYPE(o)(((PyObject*)(o))->ob_type)==&UCD_Type)
77
78static PyObject*
79new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4),
80 Py_UCS4 (*normalization)(Py_UCS4))
81{
82 PreviousDBVersion *self;
83 self = PyObject_New(PreviousDBVersion, &UCD_Type)( (PreviousDBVersion *) _PyObject_New(&UCD_Type) );
84 if (self == NULL((void*)0))
85 return NULL((void*)0);
86 self->name = name;
87 self->getrecord = getrecord;
88 self->normalization = normalization;
89 return (PyObject*)self;
90}
91
92
93static Py_UCS4 getuchar(PyUnicodeObject *obj)
94{
95 Py_UNICODE *v = PyUnicode_AS_UNICODE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 95
, "PyUnicode_Check(obj)") : (void)0),(((PyUnicodeObject *)(obj
))->str))
;
96
97 if (PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 97
, "PyUnicode_Check(obj)") : (void)0),(((PyUnicodeObject *)(obj
))->length))
== 1)
98 return *v;
99#ifndef Py_UNICODE_WIDE
100 else if ((PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 100
, "PyUnicode_Check(obj)") : (void)0),(((PyUnicodeObject *)(obj
))->length))
== 2) &&
101 (0xD800 <= v[0] && v[0] <= 0xDBFF) &&
102 (0xDC00 <= v[1] && v[1] <= 0xDFFF))
103 return (((v[0] & 0x3FF)<<10) | (v[1] & 0x3FF)) + 0x10000;
104#endif
105 PyErr_SetString(PyExc_TypeError,
106 "need a single Unicode character as parameter");
107 return (Py_UCS4)-1;
108}
109
110/* --- Module API --------------------------------------------------------- */
111
112PyDoc_STRVAR(unicodedata_decimal__doc__,static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
113"decimal(unichr[, default])\n\static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
114\n\static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
115Returns the decimal value assigned to the Unicode character unichr\n\static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
116as integer. If no such value is defined, default is returned, or, if\n\static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
117not given, ValueError is raised.")static char unicodedata_decimal__doc__[] = "decimal(unichr[, default])\n\nReturns the decimal value assigned to the Unicode character unichr\nas integer. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised.";
118
119static PyObject *
120unicodedata_decimal(PyObject *self, PyObject *args)
121{
122 PyUnicodeObject *v;
123 PyObject *defobj = NULL((void*)0);
124 int have_old = 0;
125 long rc;
126 Py_UCS4 c;
127
128 if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj))
129 return NULL((void*)0);
130 c = getuchar(v);
131 if (c == (Py_UCS4)-1)
132 return NULL((void*)0);
133
134 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
135 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
136 if (old->category_changed == 0) {
137 /* unassigned */
138 have_old = 1;
139 rc = -1;
140 }
141 else if (old->decimal_changed != 0xFF) {
142 have_old = 1;
143 rc = old->decimal_changed;
144 }
145 }
146
147 if (!have_old)
148 rc = Py_UNICODE_TODECIMAL(c)_PyUnicode_ToDecimalDigit(c);
149 if (rc < 0) {
150 if (defobj == NULL((void*)0)) {
151 PyErr_SetString(PyExc_ValueError,
152 "not a decimal");
153 return NULL((void*)0);
154 }
155 else {
156 Py_INCREF(defobj)( _Py_RefTotal++ , ((PyObject*)(defobj))->ob_refcnt++);
157 return defobj;
158 }
159 }
160 return PyLong_FromLong(rc);
161}
162
163PyDoc_STRVAR(unicodedata_digit__doc__,static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
164"digit(unichr[, default])\n\static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
165\n\static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
166Returns the digit value assigned to the Unicode character unichr as\n\static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
167integer. If no such value is defined, default is returned, or, if\n\static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
168not given, ValueError is raised.")static char unicodedata_digit__doc__[] = "digit(unichr[, default])\n\nReturns the digit value assigned to the Unicode character unichr as\ninteger. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised.";
169
170static PyObject *
171unicodedata_digit(PyObject *self, PyObject *args)
172{
173 PyUnicodeObject *v;
174 PyObject *defobj = NULL((void*)0);
175 long rc;
176 Py_UCS4 c;
177
178 if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj))
179 return NULL((void*)0);
180 c = getuchar(v);
181 if (c == (Py_UCS4)-1)
182 return NULL((void*)0);
183 rc = Py_UNICODE_TODIGIT(c)_PyUnicode_ToDigit(c);
184 if (rc < 0) {
185 if (defobj == NULL((void*)0)) {
186 PyErr_SetString(PyExc_ValueError, "not a digit");
187 return NULL((void*)0);
188 }
189 else {
190 Py_INCREF(defobj)( _Py_RefTotal++ , ((PyObject*)(defobj))->ob_refcnt++);
191 return defobj;
192 }
193 }
194 return PyLong_FromLong(rc);
195}
196
197PyDoc_STRVAR(unicodedata_numeric__doc__,static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
198"numeric(unichr[, default])\n\static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
199\n\static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
200Returns the numeric value assigned to the Unicode character unichr\n\static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
201as float. If no such value is defined, default is returned, or, if\n\static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised."
202not given, ValueError is raised.")static char unicodedata_numeric__doc__[] = "numeric(unichr[, default])\n\nReturns the numeric value assigned to the Unicode character unichr\nas float. If no such value is defined, default is returned, or, if\nnot given, ValueError is raised.";
203
204static PyObject *
205unicodedata_numeric(PyObject *self, PyObject *args)
206{
207 PyUnicodeObject *v;
208 PyObject *defobj = NULL((void*)0);
209 int have_old = 0;
210 double rc;
211 Py_UCS4 c;
212
213 if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj))
214 return NULL((void*)0);
215 c = getuchar(v);
216 if (c == (Py_UCS4)-1)
217 return NULL((void*)0);
218
219 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
220 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
221 if (old->category_changed == 0) {
222 /* unassigned */
223 have_old = 1;
224 rc = -1.0;
225 }
226 else if (old->decimal_changed != 0xFF) {
227 have_old = 1;
228 rc = old->decimal_changed;
229 }
230 }
231
232 if (!have_old)
233 rc = Py_UNICODE_TONUMERIC(c)_PyUnicode_ToNumeric(c);
234 if (rc == -1.0) {
235 if (defobj == NULL((void*)0)) {
236 PyErr_SetString(PyExc_ValueError, "not a numeric character");
237 return NULL((void*)0);
238 }
239 else {
240 Py_INCREF(defobj)( _Py_RefTotal++ , ((PyObject*)(defobj))->ob_refcnt++);
241 return defobj;
242 }
243 }
244 return PyFloat_FromDouble(rc);
245}
246
247PyDoc_STRVAR(unicodedata_category__doc__,static char unicodedata_category__doc__[] = "category(unichr)\n\nReturns the general category assigned to the Unicode character\nunichr as string."
248"category(unichr)\n\static char unicodedata_category__doc__[] = "category(unichr)\n\nReturns the general category assigned to the Unicode character\nunichr as string."
249\n\static char unicodedata_category__doc__[] = "category(unichr)\n\nReturns the general category assigned to the Unicode character\nunichr as string."
250Returns the general category assigned to the Unicode character\n\static char unicodedata_category__doc__[] = "category(unichr)\n\nReturns the general category assigned to the Unicode character\nunichr as string."
251unichr as string.")static char unicodedata_category__doc__[] = "category(unichr)\n\nReturns the general category assigned to the Unicode character\nunichr as string.";
252
253static PyObject *
254unicodedata_category(PyObject *self, PyObject *args)
255{
256 PyUnicodeObject *v;
257 int index;
258 Py_UCS4 c;
259
260 if (!PyArg_ParseTuple(args, "O!:category",
261 &PyUnicode_Type, &v))
262 return NULL((void*)0);
263 c = getuchar(v);
264 if (c == (Py_UCS4)-1)
265 return NULL((void*)0);
266 index = (int) _getrecord_ex(c)->category;
267 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
268 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
269 if (old->category_changed != 0xFF)
270 index = old->category_changed;
271 }
272 return PyUnicode_FromStringPyUnicodeUCS2_FromString(_PyUnicode_CategoryNames[index]);
273}
274
275PyDoc_STRVAR(unicodedata_bidirectional__doc__,static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned."
276"bidirectional(unichr)\n\static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned."
277\n\static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned."
278Returns the bidirectional category assigned to the Unicode character\n\static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned."
279unichr as string. If no such value is defined, an empty string is\n\static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned."
280returned.")static char unicodedata_bidirectional__doc__[] = "bidirectional(unichr)\n\nReturns the bidirectional category assigned to the Unicode character\nunichr as string. If no such value is defined, an empty string is\nreturned.";
281
282static PyObject *
283unicodedata_bidirectional(PyObject *self, PyObject *args)
284{
285 PyUnicodeObject *v;
286 int index;
287 Py_UCS4 c;
288
289 if (!PyArg_ParseTuple(args, "O!:bidirectional",
290 &PyUnicode_Type, &v))
291 return NULL((void*)0);
292 c = getuchar(v);
293 if (c == (Py_UCS4)-1)
294 return NULL((void*)0);
295 index = (int) _getrecord_ex(c)->bidirectional;
296 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
297 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
298 if (old->category_changed == 0)
299 index = 0; /* unassigned */
300 else if (old->bidir_changed != 0xFF)
301 index = old->bidir_changed;
302 }
303 return PyUnicode_FromStringPyUnicodeUCS2_FromString(_PyUnicode_BidirectionalNames[index]);
304}
305
306PyDoc_STRVAR(unicodedata_combining__doc__,static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined."
307"combining(unichr)\n\static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined."
308\n\static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined."
309Returns the canonical combining class assigned to the Unicode\n\static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined."
310character unichr as integer. Returns 0 if no combining class is\n\static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined."
311defined.")static char unicodedata_combining__doc__[] = "combining(unichr)\n\nReturns the canonical combining class assigned to the Unicode\ncharacter unichr as integer. Returns 0 if no combining class is\ndefined.";
312
313static PyObject *
314unicodedata_combining(PyObject *self, PyObject *args)
315{
316 PyUnicodeObject *v;
317 int index;
318 Py_UCS4 c;
319
320 if (!PyArg_ParseTuple(args, "O!:combining",
321 &PyUnicode_Type, &v))
322 return NULL((void*)0);
323 c = getuchar(v);
324 if (c == (Py_UCS4)-1)
325 return NULL((void*)0);
326 index = (int) _getrecord_ex(c)->combining;
327 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
328 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
329 if (old->category_changed == 0)
330 index = 0; /* unassigned */
331 }
332 return PyLong_FromLong(index);
333}
334
335PyDoc_STRVAR(unicodedata_mirrored__doc__,static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise."
336"mirrored(unichr)\n\static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise."
337\n\static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise."
338Returns the mirrored property assigned to the Unicode character\n\static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise."
339unichr as integer. Returns 1 if the character has been identified as\n\static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise."
340a \"mirrored\" character in bidirectional text, 0 otherwise.")static char unicodedata_mirrored__doc__[] = "mirrored(unichr)\n\nReturns the mirrored property assigned to the Unicode character\nunichr as integer. Returns 1 if the character has been identified as\na \"mirrored\" character in bidirectional text, 0 otherwise.";
341
342static PyObject *
343unicodedata_mirrored(PyObject *self, PyObject *args)
344{
345 PyUnicodeObject *v;
346 int index;
347 Py_UCS4 c;
348
349 if (!PyArg_ParseTuple(args, "O!:mirrored",
350 &PyUnicode_Type, &v))
351 return NULL((void*)0);
352 c = getuchar(v);
353 if (c == (Py_UCS4)-1)
354 return NULL((void*)0);
355 index = (int) _getrecord_ex(c)->mirrored;
356 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
357 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
358 if (old->category_changed == 0)
359 index = 0; /* unassigned */
360 else if (old->mirrored_changed != 0xFF)
361 index = old->mirrored_changed;
362 }
363 return PyLong_FromLong(index);
364}
365
366PyDoc_STRVAR(unicodedata_east_asian_width__doc__,static char unicodedata_east_asian_width__doc__[] = "east_asian_width(unichr)\n\nReturns the east asian width assigned to the Unicode character\nunichr as string."
367"east_asian_width(unichr)\n\static char unicodedata_east_asian_width__doc__[] = "east_asian_width(unichr)\n\nReturns the east asian width assigned to the Unicode character\nunichr as string."
368\n\static char unicodedata_east_asian_width__doc__[] = "east_asian_width(unichr)\n\nReturns the east asian width assigned to the Unicode character\nunichr as string."
369Returns the east asian width assigned to the Unicode character\n\static char unicodedata_east_asian_width__doc__[] = "east_asian_width(unichr)\n\nReturns the east asian width assigned to the Unicode character\nunichr as string."
370unichr as string.")static char unicodedata_east_asian_width__doc__[] = "east_asian_width(unichr)\n\nReturns the east asian width assigned to the Unicode character\nunichr as string.";
371
372static PyObject *
373unicodedata_east_asian_width(PyObject *self, PyObject *args)
374{
375 PyUnicodeObject *v;
376 int index;
377 Py_UCS4 c;
378
379 if (!PyArg_ParseTuple(args, "O!:east_asian_width",
380 &PyUnicode_Type, &v))
381 return NULL((void*)0);
382 c = getuchar(v);
383 if (c == (Py_UCS4)-1)
384 return NULL((void*)0);
385 index = (int) _getrecord_ex(c)->east_asian_width;
386 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
387 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
388 if (old->category_changed == 0)
389 index = 0; /* unassigned */
390 }
391 return PyUnicode_FromStringPyUnicodeUCS2_FromString(_PyUnicode_EastAsianWidthNames[index]);
392}
393
394PyDoc_STRVAR(unicodedata_decomposition__doc__,static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined."
395"decomposition(unichr)\n\static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined."
396\n\static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined."
397Returns the character decomposition mapping assigned to the Unicode\n\static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined."
398character unichr as string. An empty string is returned in case no\n\static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined."
399such mapping is defined.")static char unicodedata_decomposition__doc__[] = "decomposition(unichr)\n\nReturns the character decomposition mapping assigned to the Unicode\ncharacter unichr as string. An empty string is returned in case no\nsuch mapping is defined.";
400
401static PyObject *
402unicodedata_decomposition(PyObject *self, PyObject *args)
403{
404 PyUnicodeObject *v;
405 char decomp[256];
406 int code, index, count;
407 size_t i;
408 unsigned int prefix_index;
409 Py_UCS4 c;
410
411 if (!PyArg_ParseTuple(args, "O!:decomposition",
412 &PyUnicode_Type, &v))
413 return NULL((void*)0);
414 c = getuchar(v);
415 if (c == (Py_UCS4)-1)
416 return NULL((void*)0);
417
418 code = (int)c;
419
420 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
421 const change_record *old = get_old_record(self, c)((((PreviousDBVersion*)self)->getrecord)(c));
422 if (old->category_changed == 0)
423 return PyUnicode_FromStringPyUnicodeUCS2_FromString(""); /* unassigned */
424 }
425
426 if (code < 0 || code >= 0x110000)
427 index = 0;
428 else {
429 index = decomp_index1[(code>>DECOMP_SHIFT8)];
430 index = decomp_index2[(index<<DECOMP_SHIFT8)+
431 (code&((1<<DECOMP_SHIFT8)-1))];
432 }
433
434 /* high byte is number of hex bytes (usually one or two), low byte
435 is prefix code (from*/
436 count = decomp_data[index] >> 8;
437
438 /* XXX: could allocate the PyString up front instead
439 (strlen(prefix) + 5 * count + 1 bytes) */
440
441 /* Based on how index is calculated above and decomp_data is generated
442 from Tools/unicode/makeunicodedata.py, it should not be possible
443 to overflow decomp_prefix. */
444 prefix_index = decomp_data[index] & 255;
445 assert(prefix_index < (sizeof(decomp_prefix)/sizeof(*decomp_prefix)))(__builtin_expect(!(prefix_index < (sizeof(decomp_prefix)/
sizeof(*decomp_prefix))), 0) ? __assert_rtn(__func__, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c"
, 445, "prefix_index < (sizeof(decomp_prefix)/sizeof(*decomp_prefix))"
) : (void)0)
;
446
447 /* copy prefix */
448 i = strlen(decomp_prefix[prefix_index]);
449 memcpy(decomp, decomp_prefix[prefix_index], i)((__builtin_object_size (decomp, 0) != (size_t) -1) ? __builtin___memcpy_chk
(decomp, decomp_prefix[prefix_index], i, __builtin_object_size
(decomp, 0)) : __inline_memcpy_chk (decomp, decomp_prefix[prefix_index
], i))
;
450
451 while (count-- > 0) {
452 if (i)
453 decomp[i++] = ' ';
454 assert(i < sizeof(decomp))(__builtin_expect(!(i < sizeof(decomp)), 0) ? __assert_rtn
(__func__, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c"
, 454, "i < sizeof(decomp)") : (void)0)
;
455 PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
456 decomp_data[++index]);
457 i += strlen(decomp + i);
458 }
459 return PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(decomp, i);
460}
461
462static void
463get_decomp_record(PyObject *self, Py_UCS4 code, int *index, int *prefix, int *count)
464{
465 if (code >= 0x110000) {
466 *index = 0;
467 } else if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type) &&
468 get_old_record(self, code)((((PreviousDBVersion*)self)->getrecord)(code))->category_changed==0) {
469 /* unassigned in old version */
470 *index = 0;
471 }
472 else {
473 *index = decomp_index1[(code>>DECOMP_SHIFT8)];
474 *index = decomp_index2[(*index<<DECOMP_SHIFT8)+
475 (code&((1<<DECOMP_SHIFT8)-1))];
476 }
477
478 /* high byte is number of hex bytes (usually one or two), low byte
479 is prefix code (from*/
480 *count = decomp_data[*index] >> 8;
481 *prefix = decomp_data[*index] & 255;
482
483 (*index)++;
484}
485
486#define SBase0xAC00 0xAC00
487#define LBase0x1100 0x1100
488#define VBase0x1161 0x1161
489#define TBase0x11A7 0x11A7
490#define LCount19 19
491#define VCount21 21
492#define TCount28 28
493#define NCount(21*28) (VCount21*TCount28)
494#define SCount(19*(21*28)) (LCount19*NCount(21*28))
495
496static PyObject*
497nfd_nfkd(PyObject *self, PyObject *input, int k)
498{
499 PyObject *result;
500 Py_UNICODE *i, *end, *o;
501 /* Longest decomposition in Unicode 3.2: U+FDFA */
502 Py_UNICODE stack[20];
503 Py_ssize_t space, isize;
504 int index, prefix, count, stackptr;
505 unsigned char prev, cur;
506
507 stackptr = 0;
508 isize = PyUnicode_GET_SIZE(input)((__builtin_expect(!(((((((PyObject*)(input))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 508
, "PyUnicode_Check(input)") : (void)0),(((PyUnicodeObject *)(
input))->length))
;
509 /* Overallocate atmost 10 characters. */
510 space = (isize > 10 ? 10 : isize) + isize;
1
'?' condition is false
511 result = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void*)0), space);
512 if (!result)
2
Taking false branch
513 return NULL((void*)0);
514 i = PyUnicode_AS_UNICODE(input)((__builtin_expect(!(((((((PyObject*)(input))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 514
, "PyUnicode_Check(input)") : (void)0),(((PyUnicodeObject *)(
input))->str))
;
515 end = i + isize;
516 o = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 516
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
;
517
518 while (i < end) {
3
Loop condition is true. Entering loop body
519 stack[stackptr++] = *i++;
520 while(stackptr) {
4
Loop condition is true. Entering loop body
521 Py_UNICODE code = stack[--stackptr];
5
Assigned value is garbage or undefined
522 /* Hangul Decomposition adds three characters in
523 a single step, so we need atleast that much room. */
524 if (space < 3) {
525 Py_ssize_t newsize = PyUnicode_GET_SIZE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 525
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->length))
+ 10;
526 space += 10;
527 if (PyUnicode_ResizePyUnicodeUCS2_Resize(&result, newsize) == -1)
528 return NULL((void*)0);
529 o = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 529
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
+ newsize - space;
530 }
531 /* Hangul Decomposition. */
532 if (SBase0xAC00 <= code && code < (SBase0xAC00+SCount(19*(21*28)))) {
533 int SIndex = code - SBase0xAC00;
534 int L = LBase0x1100 + SIndex / NCount(21*28);
535 int V = VBase0x1161 + (SIndex % NCount(21*28)) / TCount28;
536 int T = TBase0x11A7 + SIndex % TCount28;
537 *o++ = L;
538 *o++ = V;
539 space -= 2;
540 if (T != TBase0x11A7) {
541 *o++ = T;
542 space --;
543 }
544 continue;
545 }
546 /* normalization changes */
547 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
548 Py_UCS4 value = ((PreviousDBVersion*)self)->normalization(code);
549 if (value != 0) {
550 stack[stackptr++] = value;
551 continue;
552 }
553 }
554
555 /* Other decompositions. */
556 get_decomp_record(self, code, &index, &prefix, &count);
557
558 /* Copy character if it is not decomposable, or has a
559 compatibility decomposition, but we do NFD. */
560 if (!count || (prefix && !k)) {
561 *o++ = code;
562 space--;
563 continue;
564 }
565 /* Copy decomposition onto the stack, in reverse
566 order. */
567 while(count) {
568 code = decomp_data[index + (--count)];
569 stack[stackptr++] = code;
570 }
571 }
572 }
573
574 /* Drop overallocation. Cannot fail. */
575 PyUnicode_ResizePyUnicodeUCS2_Resize(&result, PyUnicode_GET_SIZE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 575
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->length))
- space);
576
577 /* Sort canonically. */
578 i = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 578
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
;
579 prev = _getrecord_ex(*i)->combining;
580 end = i + PyUnicode_GET_SIZE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 580
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->length))
;
581 for (i++; i < end; i++) {
582 cur = _getrecord_ex(*i)->combining;
583 if (prev == 0 || cur == 0 || prev <= cur) {
584 prev = cur;
585 continue;
586 }
587 /* Non-canonical order. Need to switch *i with previous. */
588 o = i - 1;
589 while (1) {
590 Py_UNICODE tmp = o[1];
591 o[1] = o[0];
592 o[0] = tmp;
593 o--;
594 if (o < PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 594
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
)
595 break;
596 prev = _getrecord_ex(*o)->combining;
597 if (prev == 0 || prev <= cur)
598 break;
599 }
600 prev = _getrecord_ex(*i)->combining;
601 }
602 return result;
603}
604
605static int
606find_nfc_index(PyObject *self, struct reindex* nfc, Py_UNICODE code)
607{
608 int index;
609 for (index = 0; nfc[index].start; index++) {
610 int start = nfc[index].start;
611 if (code < start)
612 return -1;
613 if (code <= start + nfc[index].count) {
614 int delta = code - start;
615 return nfc[index].index + delta;
616 }
617 }
618 return -1;
619}
620
621static PyObject*
622nfc_nfkc(PyObject *self, PyObject *input, int k)
623{
624 PyObject *result;
625 Py_UNICODE *i, *i1, *o, *end;
626 int f,l,index,index1,comb;
627 Py_UNICODE code;
628 Py_UNICODE *skipped[20];
629 int cskipped = 0;
630
631 result = nfd_nfkd(self, input, k);
632 if (!result)
633 return NULL((void*)0);
634
635 /* We are going to modify result in-place.
636 If nfd_nfkd is changed to sometimes return the input,
637 this code needs to be reviewed. */
638 assert(result != input)(__builtin_expect(!(result != input), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 638
, "result != input") : (void)0)
;
639
640 i = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 640
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
;
641 end = i + PyUnicode_GET_SIZE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 641
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->length))
;
642 o = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 642
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
;
643
644 again:
645 while (i < end) {
646 for (index = 0; index < cskipped; index++) {
647 if (skipped[index] == i) {
648 /* *i character is skipped.
649 Remove from list. */
650 skipped[index] = skipped[cskipped-1];
651 cskipped--;
652 i++;
653 goto again; /* continue while */
654 }
655 }
656 /* Hangul Composition. We don't need to check for <LV,T>
657 pairs, since we always have decomposed data. */
658 if (LBase0x1100 <= *i && *i < (LBase0x1100+LCount19) &&
659 i + 1 < end &&
660 VBase0x1161 <= i[1] && i[1] <= (VBase0x1161+VCount21)) {
661 int LIndex, VIndex;
662 LIndex = i[0] - LBase0x1100;
663 VIndex = i[1] - VBase0x1161;
664 code = SBase0xAC00 + (LIndex*VCount21+VIndex)*TCount28;
665 i+=2;
666 if (i < end &&
667 TBase0x11A7 <= *i && *i <= (TBase0x11A7+TCount28)) {
668 code += *i-TBase0x11A7;
669 i++;
670 }
671 *o++ = code;
672 continue;
673 }
674
675 f = find_nfc_index(self, nfc_first, *i);
676 if (f == -1) {
677 *o++ = *i++;
678 continue;
679 }
680 /* Find next unblocked character. */
681 i1 = i+1;
682 comb = 0;
683 while (i1 < end) {
684 int comb1 = _getrecord_ex(*i1)->combining;
685 if (comb) {
686 if (comb1 == 0)
687 break;
688 if (comb >= comb1) {
689 /* Character is blocked. */
690 i1++;
691 continue;
692 }
693 }
694 l = find_nfc_index(self, nfc_last, *i1);
695 /* *i1 cannot be combined with *i. If *i1
696 is a starter, we don't need to look further.
697 Otherwise, record the combining class. */
698 if (l == -1) {
699 not_combinable:
700 if (comb1 == 0)
701 break;
702 comb = comb1;
703 i1++;
704 continue;
705 }
706 index = f*TOTAL_LAST55 + l;
707 index1 = comp_index[index >> COMP_SHIFT2];
708 code = comp_data[(index1<<COMP_SHIFT2)+
709 (index&((1<<COMP_SHIFT2)-1))];
710 if (code == 0)
711 goto not_combinable;
712
713 /* Replace the original character. */
714 *i = code;
715 /* Mark the second character unused. */
716 assert(cskipped < 20)(__builtin_expect(!(cskipped < 20), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 716
, "cskipped < 20") : (void)0)
;
717 skipped[cskipped++] = i1;
718 i1++;
719 f = find_nfc_index(self, nfc_first, *i);
720 if (f == -1)
721 break;
722 }
723 *o++ = *i++;
724 }
725 if (o != end)
726 PyUnicode_ResizePyUnicodeUCS2_Resize(&result, o - PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 726
, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject *)
(result))->str))
);
727 return result;
728}
729
730/* Return 1 if the input is certainly normalized, 0 if it might not be. */
731static int
732is_normalized(PyObject *self, PyObject *input, int nfc, int k)
733{
734 Py_UNICODE *i, *end;
735 unsigned char prev_combining = 0, quickcheck_mask;
736
737 /* An older version of the database is requested, quickchecks must be
738 disabled. */
739 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type))
740 return 0;
741
742 /* The two quickcheck bits at this shift mean 0=Yes, 1=Maybe, 2=No,
743 as described in http://unicode.org/reports/tr15/#Annex8. */
744 quickcheck_mask = 3 << ((nfc ? 4 : 0) + (k ? 2 : 0));
745
746 i = PyUnicode_AS_UNICODE(input)((__builtin_expect(!(((((((PyObject*)(input))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 746
, "PyUnicode_Check(input)") : (void)0),(((PyUnicodeObject *)(
input))->str))
;
747 end = i + PyUnicode_GET_SIZE(input)((__builtin_expect(!(((((((PyObject*)(input))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "/Users/brett/Dev/python/3.x/py3k/Modules/unicodedata.c", 747
, "PyUnicode_Check(input)") : (void)0),(((PyUnicodeObject *)(
input))->length))
;
748 while (i < end) {
749 const _PyUnicode_DatabaseRecord *record = _getrecord_ex(*i++);
750 unsigned char combining = record->combining;
751 unsigned char quickcheck = record->normalization_quick_check;
752
753 if (quickcheck & quickcheck_mask)
754 return 0; /* this string might need normalization */
755 if (combining && prev_combining > combining)
756 return 0; /* non-canonical sort order, not normalized */
757 prev_combining = combining;
758 }
759 return 1; /* certainly normalized */
760}
761
762PyDoc_STRVAR(unicodedata_normalize__doc__,static char unicodedata_normalize__doc__[] = "normalize(form, unistr)\n\nReturn the normal form 'form' for the Unicode string unistr. Valid\nvalues for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."
763"normalize(form, unistr)\n\static char unicodedata_normalize__doc__[] = "normalize(form, unistr)\n\nReturn the normal form 'form' for the Unicode string unistr. Valid\nvalues for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."
764\n\static char unicodedata_normalize__doc__[] = "normalize(form, unistr)\n\nReturn the normal form 'form' for the Unicode string unistr. Valid\nvalues for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."
765Return the normal form 'form' for the Unicode string unistr. Valid\n\static char unicodedata_normalize__doc__[] = "normalize(form, unistr)\n\nReturn the normal form 'form' for the Unicode string unistr. Valid\nvalues for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'."
766values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.")static char unicodedata_normalize__doc__[] = "normalize(form, unistr)\n\nReturn the normal form 'form' for the Unicode string unistr. Valid\nvalues for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.";
767
768static PyObject*
769unicodedata_normalize(PyObject *self, PyObject *args)
770{
771 char *form;
772 PyObject *input;
773
774 if(!PyArg_ParseTuple(args, "sO!:normalize",
775 &form, &PyUnicode_Type, &input))
776 return NULL((void*)0);
777
778 if (PyUnicode_GetSizePyUnicodeUCS2_GetSize(input) == 0) {
779 /* Special case empty input strings, since resizing
780 them later would cause internal errors. */
781 Py_INCREF(input)( _Py_RefTotal++ , ((PyObject*)(input))->ob_refcnt++);
782 return input;
783 }
784
785 if (strcmp(form, "NFC") == 0) {
786 if (is_normalized(self, input, 1, 0)) {
787 Py_INCREF(input)( _Py_RefTotal++ , ((PyObject*)(input))->ob_refcnt++);
788 return input;
789 }
790 return nfc_nfkc(self, input, 0);
791 }
792 if (strcmp(form, "NFKC") == 0) {
793 if (is_normalized(self, input, 1, 1)) {
794 Py_INCREF(input)( _Py_RefTotal++ , ((PyObject*)(input))->ob_refcnt++);
795 return input;
796 }
797 return nfc_nfkc(self, input, 1);
798 }
799 if (strcmp(form, "NFD") == 0) {
800 if (is_normalized(self, input, 0, 0)) {
801 Py_INCREF(input)( _Py_RefTotal++ , ((PyObject*)(input))->ob_refcnt++);
802 return input;
803 }
804 return nfd_nfkd(self, input, 0);
805 }
806 if (strcmp(form, "NFKD") == 0) {
807 if (is_normalized(self, input, 0, 1)) {
808 Py_INCREF(input)( _Py_RefTotal++ , ((PyObject*)(input))->ob_refcnt++);
809 return input;
810 }
811 return nfd_nfkd(self, input, 1);
812 }
813 PyErr_SetString(PyExc_ValueError, "invalid normalization form");
814 return NULL((void*)0);
815}
816
817/* -------------------------------------------------------------------- */
818/* unicode character name tables */
819
820/* data file generated by Tools/unicode/makeunicodedata.py */
821#include "unicodename_db.h"
822
823/* -------------------------------------------------------------------- */
824/* database code (cut and pasted from the unidb package) */
825
826static unsigned long
827_gethash(const char *s, int len, int scale)
828{
829 int i;
830 unsigned long h = 0;
831 unsigned long ix;
832 for (i = 0; i < len; i++) {
833 h = (h * scale) + (unsigned char) toupper(Py_CHARMASK(s[i]))towupper(btowc(((unsigned char)((s[i]) & 0xff))));
834 ix = h & 0xff000000;
835 if (ix)
836 h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
837 }
838 return h;
839}
840
841static char *hangul_syllables[][3] = {
842 { "G", "A", "" },
843 { "GG", "AE", "G" },
844 { "N", "YA", "GG" },
845 { "D", "YAE", "GS" },
846 { "DD", "EO", "N", },
847 { "R", "E", "NJ" },
848 { "M", "YEO", "NH" },
849 { "B", "YE", "D" },
850 { "BB", "O", "L" },
851 { "S", "WA", "LG" },
852 { "SS", "WAE", "LM" },
853 { "", "OE", "LB" },
854 { "J", "YO", "LS" },
855 { "JJ", "U", "LT" },
856 { "C", "WEO", "LP" },
857 { "K", "WE", "LH" },
858 { "T", "WI", "M" },
859 { "P", "YU", "B" },
860 { "H", "EU", "BS" },
861 { 0, "YI", "S" },
862 { 0, "I", "SS" },
863 { 0, 0, "NG" },
864 { 0, 0, "J" },
865 { 0, 0, "C" },
866 { 0, 0, "K" },
867 { 0, 0, "T" },
868 { 0, 0, "P" },
869 { 0, 0, "H" }
870};
871
872/* These ranges need to match makeunicodedata.py:cjk_ranges. */
873static int
874is_unified_ideograph(Py_UCS4 code)
875{
876 return
877 (0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
878 (0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */
879 (0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
880 (0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
881 (0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */
882}
883
884static int
885_getucname(PyObject *self, Py_UCS4 code, char* buffer, int buflen)
886{
887 int offset;
888 int i;
889 int word;
890 unsigned char* w;
891
892 if (code >= 0x110000)
893 return 0;
894
895 if (self && UCD_Check(self)((((PyObject*)(self))->ob_type)==&UCD_Type)) {
896 const change_record *old = get_old_record(self, code)((((PreviousDBVersion*)self)->getrecord)(code));
897 if (old->category_changed == 0) {
898 /* unassigned */
899 return 0;
900 }
901 }
902
903 if (SBase0xAC00 <= code && code < SBase0xAC00+SCount(19*(21*28))) {
904 /* Hangul syllable. */
905 int SIndex = code - SBase0xAC00;
906 int L = SIndex / NCount(21*28);
907 int V = (SIndex % NCount(21*28)) / TCount28;
908 int T = SIndex % TCount28;
909
910 if (buflen < 27)
911 /* Worst case: HANGUL SYLLABLE <10chars>. */
912 return 0;
913 strcpy(buffer, "HANGUL SYLLABLE ")((__builtin_object_size (buffer, 0) != (size_t) -1) ? __builtin___strcpy_chk
(buffer, "HANGUL SYLLABLE ", __builtin_object_size (buffer, 2
> 1)) : __inline_strcpy_chk (buffer, "HANGUL SYLLABLE "))
;
914 buffer += 16;
915 strcpy(buffer, hangul_syllables[L][0])((__builtin_object_size (buffer, 0) != (size_t) -1) ? __builtin___strcpy_chk
(buffer, hangul_syllables[L][0], __builtin_object_size (buffer
, 2 > 1)) : __inline_strcpy_chk (buffer, hangul_syllables[
L][0]))
;
916 buffer += strlen(hangul_syllables[L][0]);
917 strcpy(buffer, hangul_syllables[V][1])((__builtin_object_size (buffer, 0) != (size_t) -1) ? __builtin___strcpy_chk
(buffer, hangul_syllables[V][1], __builtin_object_size (buffer
, 2 > 1)) : __inline_strcpy_chk (buffer, hangul_syllables[
V][1]))
;
918 buffer += strlen(hangul_syllables[V][1]);
919 strcpy(buffer, hangul_syllables[T][2])((__builtin_object_size (buffer, 0) != (size_t) -1) ? __builtin___strcpy_chk
(buffer, hangul_syllables[T][2], __builtin_object_size (buffer
, 2 > 1)) : __inline_strcpy_chk (buffer, hangul_syllables[
T][2]))
;
920 buffer += strlen(hangul_syllables[T][2]);
921 *buffer = '\0';
922 return 1;
923 }
924
925 if (is_unified_ideograph(code)) {
926 if (buflen < 28)
927 /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
928 return 0;
929 sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code)__builtin___sprintf_chk (buffer, 0, __builtin_object_size (buffer
, 2 > 1), "CJK UNIFIED IDEOGRAPH-%X", code)
;
930 return 1;
931 }
932
933 /* get offset into phrasebook */
934 offset = phrasebook_offset1[(code>>phrasebook_shift7)];
935 offset = phrasebook_offset2[(offset<<phrasebook_shift7) +
936 (code&((1<<phrasebook_shift7)-1))];
937 if (!offset)
938 return 0;
939
940 i = 0;
941
942 for (;;) {
943 /* get word index */
944 word = phrasebook[offset] - phrasebook_short211;
945 if (word >= 0) {
946 word = (word << 8) + phrasebook[offset+1];
947 offset += 2;
948 } else
949 word = phrasebook[offset++];
950 if (i) {
951 if (i > buflen)
952 return 0; /* buffer overflow */
953 buffer[i++] = ' ';
954 }
955 /* copy word string from lexicon. the last character in the
956 word has bit 7 set. the last word in a string ends with
957 0x80 */
958 w = lexicon + lexicon_offset[word];
959 while (*w < 128) {
960 if (i >= buflen)
961 return 0; /* buffer overflow */
962 buffer[i++] = *w++;
963 }
964 if (i >= buflen)
965 return 0; /* buffer overflow */
966 buffer[i++] = *w & 127;
967 if (*w == 128)
968 break; /* end of word */
969 }
970
971 return 1;
972}
973
974static int
975_cmpname(PyObject *self, int code, const char* name, int namelen)
976{
977 /* check if code corresponds to the given name */
978 int i;
979 char buffer[NAME_MAXLEN256];
980 if (!_getucname(self, code, buffer, sizeof(buffer)))
981 return 0;
982 for (i = 0; i < namelen; i++) {
983 if (toupper(Py_CHARMASK(name[i]))towupper(btowc(((unsigned char)((name[i]) & 0xff)))) != buffer[i])
984 return 0;
985 }
986 return buffer[namelen] == '\0';
987}
988
989static void
990find_syllable(const char *str, int *len, int *pos, int count, int column)
991{
992 int i, len1;
993 *len = -1;
994 for (i = 0; i < count; i++) {
995 char *s = hangul_syllables[i][column];
996 len1 = strlen(s);
997 if (len1 <= *len)
998 continue;
999 if (strncmp(str, s, len1) == 0) {
1000 *len = len1;
1001 *pos = i;
1002 }
1003 }
1004 if (*len == -1) {
1005 *len = 0;
1006 }
1007}
1008
1009static int
1010_getcode(PyObject* self, const char* name, int namelen, Py_UCS4* code)
1011{
1012 unsigned int h, v;
1013 unsigned int mask = code_size32768-1;
1014 unsigned int i, incr;
1015
1016 /* Check for hangul syllables. */
1017 if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
1018 int len, L = -1, V = -1, T = -1;
1019 const char *pos = name + 16;
1020 find_syllable(pos, &len, &L, LCount19, 0);
1021 pos += len;
1022 find_syllable(pos, &len, &V, VCount21, 1);
1023 pos += len;
1024 find_syllable(pos, &len, &T, TCount28, 2);
1025 pos += len;
1026 if (L != -1 && V != -1 && T != -1 && pos-name == namelen) {
1027 *code = SBase0xAC00 + (L*VCount21+V)*TCount28 + T;
1028 return 1;
1029 }
1030 /* Otherwise, it's an illegal syllable name. */
1031 return 0;
1032 }
1033
1034 /* Check for unified ideographs. */
1035 if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
1036 /* Four or five hexdigits must follow. */
1037 v = 0;
1038 name += 22;
1039 namelen -= 22;
1040 if (namelen != 4 && namelen != 5)
1041 return 0;
1042 while (namelen--) {
1043 v *= 16;
1044 if (*name >= '0' && *name <= '9')
1045 v += *name - '0';
1046 else if (*name >= 'A' && *name <= 'F')
1047 v += *name - 'A' + 10;
1048 else
1049 return 0;
1050 name++;
1051 }
1052 if (!is_unified_ideograph(v))
1053 return 0;
1054 *code = v;
1055 return 1;
1056 }
1057
1058 /* the following is the same as python's dictionary lookup, with
1059 only minor changes. see the makeunicodedata script for more
1060 details */
1061
1062 h = (unsigned int) _gethash(name, namelen, code_magic47);
1063 i = (~h) & mask;
1064 v = code_hash[i];
1065 if (!v)
1066 return 0;
1067 if (_cmpname(self, v, name, namelen)) {
1068 *code = v;
1069 return 1;
1070 }
1071 incr = (h ^ (h >> 3)) & mask;
1072 if (!incr)
1073 incr = mask;
1074 for (;;) {
1075 i = (i + incr) & mask;
1076 v = code_hash[i];
1077 if (!v)
1078 return 0;
1079 if (_cmpname(self, v, name, namelen)) {
1080 *code = v;
1081 return 1;
1082 }
1083 incr = incr << 1;
1084 if (incr > mask)
1085 incr = incr ^ code_poly32771;
1086 }
1087}
1088
1089static const _PyUnicode_Name_CAPI hashAPI =
1090{
1091 sizeof(_PyUnicode_Name_CAPI),
1092 _getucname,
1093 _getcode
1094};
1095
1096/* -------------------------------------------------------------------- */
1097/* Python bindings */
1098
1099PyDoc_STRVAR(unicodedata_name__doc__,static char unicodedata_name__doc__[] = "name(unichr[, default])\nReturns the name assigned to the Unicode character unichr as a\nstring. If no name is defined, default is returned, or, if not\ngiven, ValueError is raised."
1100"name(unichr[, default])\n\static char unicodedata_name__doc__[] = "name(unichr[, default])\nReturns the name assigned to the Unicode character unichr as a\nstring. If no name is defined, default is returned, or, if not\ngiven, ValueError is raised."
1101Returns the name assigned to the Unicode character unichr as a\n\static char unicodedata_name__doc__[] = "name(unichr[, default])\nReturns the name assigned to the Unicode character unichr as a\nstring. If no name is defined, default is returned, or, if not\ngiven, ValueError is raised."
1102string. If no name is defined, default is returned, or, if not\n\static char unicodedata_name__doc__[] = "name(unichr[, default])\nReturns the name assigned to the Unicode character unichr as a\nstring. If no name is defined, default is returned, or, if not\ngiven, ValueError is raised."
1103given, ValueError is raised.")static char unicodedata_name__doc__[] = "name(unichr[, default])\nReturns the name assigned to the Unicode character unichr as a\nstring. If no name is defined, default is returned, or, if not\ngiven, ValueError is raised.";
1104
1105static PyObject *
1106unicodedata_name(PyObject* self, PyObject* args)
1107{
1108 char name[NAME_MAXLEN256];
1109 Py_UCS4 c;
1110
1111 PyUnicodeObject* v;
1112 PyObject* defobj = NULL((void*)0);
1113 if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj))
1114 return NULL((void*)0);
1115
1116 c = getuchar(v);
1117 if (c == (Py_UCS4)-1)
1118 return NULL((void*)0);
1119
1120 if (!_getucname(self, c, name, sizeof(name))) {
1121 if (defobj == NULL((void*)0)) {
1122 PyErr_SetString(PyExc_ValueError, "no such name");
1123 return NULL((void*)0);
1124 }
1125 else {
1126 Py_INCREF(defobj)( _Py_RefTotal++ , ((PyObject*)(defobj))->ob_refcnt++);
1127 return defobj;
1128 }
1129 }
1130
1131 return PyUnicode_FromStringPyUnicodeUCS2_FromString(name);
1132}
1133
1134PyDoc_STRVAR(unicodedata_lookup__doc__,static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised."
1135"lookup(name)\n\static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised."
1136\n\static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised."
1137Look up character by name. If a character with the\n\static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised."
1138given name is found, return the corresponding Unicode\n\static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised."
1139character. If not found, KeyError is raised.")static char unicodedata_lookup__doc__[] = "lookup(name)\n\nLook up character by name. If a character with the\ngiven name is found, return the corresponding Unicode\ncharacter. If not found, KeyError is raised.";
1140
1141static PyObject *
1142unicodedata_lookup(PyObject* self, PyObject* args)
1143{
1144 Py_UCS4 code;
1145 Py_UNICODE str[2];
1146
1147 char* name;
1148 int namelen;
1149 if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen))
1150 return NULL((void*)0);
1151
1152 if (!_getcode(self, name, namelen, &code)) {
1153 PyErr_Format(PyExc_KeyError, "undefined character name '%s'",
1154 name);
1155 return NULL((void*)0);
1156 }
1157
1158#ifndef Py_UNICODE_WIDE
1159 if (code >= 0x10000) {
1160 str[0] = 0xd800 + ((code - 0x10000) >> 10);
1161 str[1] = 0xdc00 + ((code - 0x10000) & 0x3ff);
1162 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(str, 2);
1163 }
1164#endif
1165 str[0] = (Py_UNICODE) code;
1166 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(str, 1);
1167}
1168
1169/* XXX Add doc strings. */
1170
1171static PyMethodDef unicodedata_functions[] = {
1172 {"decimal", unicodedata_decimal, METH_VARARGS0x0001, unicodedata_decimal__doc__},
1173 {"digit", unicodedata_digit, METH_VARARGS0x0001, unicodedata_digit__doc__},
1174 {"numeric", unicodedata_numeric, METH_VARARGS0x0001, unicodedata_numeric__doc__},
1175 {"category", unicodedata_category, METH_VARARGS0x0001,
1176 unicodedata_category__doc__},
1177 {"bidirectional", unicodedata_bidirectional, METH_VARARGS0x0001,
1178 unicodedata_bidirectional__doc__},
1179 {"combining", unicodedata_combining, METH_VARARGS0x0001,
1180 unicodedata_combining__doc__},
1181 {"mirrored", unicodedata_mirrored, METH_VARARGS0x0001,
1182 unicodedata_mirrored__doc__},
1183 {"east_asian_width", unicodedata_east_asian_width, METH_VARARGS0x0001,
1184 unicodedata_east_asian_width__doc__},
1185 {"decomposition", unicodedata_decomposition, METH_VARARGS0x0001,
1186 unicodedata_decomposition__doc__},
1187 {"name", unicodedata_name, METH_VARARGS0x0001, unicodedata_name__doc__},
1188 {"lookup", unicodedata_lookup, METH_VARARGS0x0001, unicodedata_lookup__doc__},
1189 {"normalize", unicodedata_normalize, METH_VARARGS0x0001,
1190 unicodedata_normalize__doc__},
1191 {NULL((void*)0), NULL((void*)0)} /* sentinel */
1192};
1193
1194static PyTypeObject UCD_Type = {
1195 /* The ob_type field must be initialized in the module init function
1196 * to be portable to Windows without using C++. */
1197 PyVarObject_HEAD_INIT(NULL, 0){ { 0, 0, 1, ((void*)0) }, 0 },
1198 "unicodedata.UCD", /*tp_name*/
1199 sizeof(PreviousDBVersion), /*tp_basicsize*/
1200 0, /*tp_itemsize*/
1201 /* methods */
1202 (destructor)PyObject_Del_PyObject_DebugFree, /*tp_dealloc*/
1203 0, /*tp_print*/
1204 0, /*tp_getattr*/
1205 0, /*tp_setattr*/
1206 0, /*tp_reserved*/
1207 0, /*tp_repr*/
1208 0, /*tp_as_number*/
1209 0, /*tp_as_sequence*/
1210 0, /*tp_as_mapping*/
1211 0, /*tp_hash*/
1212 0, /*tp_call*/
1213 0, /*tp_str*/
1214 PyObject_GenericGetAttr,/*tp_getattro*/
1215 0, /*tp_setattro*/
1216 0, /*tp_as_buffer*/
1217 Py_TPFLAGS_DEFAULT( 0 | (1L<<18) | 0), /*tp_flags*/
1218 0, /*tp_doc*/
1219 0, /*tp_traverse*/
1220 0, /*tp_clear*/
1221 0, /*tp_richcompare*/
1222 0, /*tp_weaklistoffset*/
1223 0, /*tp_iter*/
1224 0, /*tp_iternext*/
1225 unicodedata_functions, /*tp_methods*/
1226 DB_members, /*tp_members*/
1227 0, /*tp_getset*/
1228 0, /*tp_base*/
1229 0, /*tp_dict*/
1230 0, /*tp_descr_get*/
1231 0, /*tp_descr_set*/
1232 0, /*tp_dictoffset*/
1233 0, /*tp_init*/
1234 0, /*tp_alloc*/
1235 0, /*tp_new*/
1236 0, /*tp_free*/
1237 0, /*tp_is_gc*/
1238};
1239
1240PyDoc_STRVAR(unicodedata_docstring,static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1241"This module provides access to the Unicode Character Database which\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1242defines character properties for all Unicode characters. The data in\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1243this database is based on the UnicodeData.txt file version\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
12445.2.0 which is publically available from ftp://ftp.unicode.org/.\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1245\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1246The module uses the same names and symbols as defined by the\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1247UnicodeData File Format 5.2.0 (see\n\static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html)."
1248http://www.unicode.org/reports/tr44/tr44-4.html).")static char unicodedata_docstring[] = "This module provides access to the Unicode Character Database which\ndefines character properties for all Unicode characters. The data in\nthis database is based on the UnicodeData.txt file version\n5.2.0 which is publically available from ftp://ftp.unicode.org/.\n\nThe module uses the same names and symbols as defined by the\nUnicodeData File Format 5.2.0 (see\nhttp://www.unicode.org/reports/tr44/tr44-4.html).";
1249
1250
1251static struct PyModuleDef unicodedatamodule = {
1252 PyModuleDef_HEAD_INIT{ { 0, 0, 1, ((void*)0) }, ((void*)0), 0, ((void*)0), },
1253 "unicodedata",
1254 unicodedata_docstring,
1255 -1,
1256 unicodedata_functions,
1257 NULL((void*)0),
1258 NULL((void*)0),
1259 NULL((void*)0),
1260 NULL((void*)0)
1261};
1262
1263PyMODINIT_FUNCPyObject*
1264PyInit_unicodedata(void)
1265{
1266 PyObject *m, *v;
1267
1268 Py_TYPE(&UCD_Type)(((PyObject*)(&UCD_Type))->ob_type) = &PyType_Type;
1269
1270 m = PyModule_Create(&unicodedatamodule)PyModule_Create2TraceRefs(&unicodedatamodule, 1013);
1271 if (!m)
1272 return NULL((void*)0);
1273
1274 PyModule_AddStringConstant(m, "unidata_version", UNIDATA_VERSION"6.0.0");
1275 Py_INCREF(&UCD_Type)( _Py_RefTotal++ , ((PyObject*)(&UCD_Type))->ob_refcnt
++)
;
1276 PyModule_AddObject(m, "UCD", (PyObject*)&UCD_Type);
1277
1278 /* Previous versions */
1279 v = new_previous_version("3.2.0", get_change_3_2_0, normalization_3_2_0);
1280 if (v != NULL((void*)0))
1281 PyModule_AddObject(m, "ucd_3_2_0", v);
1282
1283 /* Export C API */
1284 v = PyCapsule_New((void *)&hashAPI, PyUnicodeData_CAPSULE_NAME"unicodedata.ucnhash_CAPI", NULL((void*)0));
1285 if (v != NULL((void*)0))
1286 PyModule_AddObject(m, "ucnhash_CAPI", v);
1287 return m;
1288}
1289
1290/*
1291Local variables:
1292c-basic-offset: 4
1293indent-tabs-mode: nil
1294End:
1295*/