Bug Summary

File:Objects/unicodeobject.c
Location:line 6356, column 6
Description:Value stored to 'output' is never read

Annotated Source Code

1/*
2
3Unicode implementation based on original code by Fredrik Lundh,
4modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
5Unicode Integration Proposal (see file Misc/unicode.txt).
6
7Major speed upgrades to the method implementations at the Reykjavik
8NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
9
10Copyright (c) Corporation for National Research Initiatives.
11
12--------------------------------------------------------------------
13The original string type implementation is:
14
15 Copyright (c) 1999 by Secret Labs AB
16 Copyright (c) 1999 by Fredrik Lundh
17
18By obtaining, using, and/or copying this software and/or its
19associated documentation, you agree that you have read, understood,
20and will comply with the following terms and conditions:
21
22Permission to use, copy, modify, and distribute this software and its
23associated documentation for any purpose and without fee is hereby
24granted, provided that the above copyright notice appears in all
25copies, and that both that copyright notice and this permission notice
26appear in supporting documentation, and that the name of Secret Labs
27AB or the author not be used in advertising or publicity pertaining to
28distribution of the software without specific, written prior
29permission.
30
31SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
32THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
33FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
34ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
35WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
36ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
37OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
38--------------------------------------------------------------------
39
40*/
41
42#define PY_SSIZE_T_CLEAN
43#include "Python.h"
44#include "ucnhash.h"
45
46#ifdef MS_WINDOWS
47#include <windows.h>
48#endif
49
50/* Limit for the Unicode object free list */
51
52#define PyUnicode_MAXFREELIST1024 1024
53
54/* Limit for the Unicode object free list stay alive optimization.
55
56 The implementation will keep allocated Unicode memory intact for
57 all objects on the free list having a size less than this
58 limit. This reduces malloc() overhead for small Unicode objects.
59
60 At worst this will result in PyUnicode_MAXFREELIST *
61 (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
62 malloc()-overhead) bytes of unused garbage.
63
64 Setting the limit to 0 effectively turns the feature off.
65
66 Note: This is an experimental feature ! If you get core dumps when
67 using Unicode objects, turn this feature off.
68
69*/
70
71#define KEEPALIVE_SIZE_LIMIT9 9
72
73/* Endianness switches; defaults to little endian */
74
75#ifdef WORDS_BIGENDIAN
76# define BYTEORDER_IS_BIG_ENDIAN
77#else
78# define BYTEORDER_IS_LITTLE_ENDIAN
79#endif
80
81/* --- Globals ------------------------------------------------------------
82
83 The globals are initialized by the _PyUnicode_Init() API and should
84 not be used before calling that API.
85
86*/
87
88
89#ifdef __cplusplus
90extern "C" {
91#endif
92
93/* This dictionary holds all interned unicode strings. Note that references
94 to strings in this dictionary are *not* counted in the string's ob_refcnt.
95 When the interned string reaches a refcnt of 0 the string deallocation
96 function will delete the reference from this dictionary.
97
98 Another way to look at this is that to say that the actual reference
99 count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
100*/
101static PyObject *interned;
102
103/* Free list for Unicode objects */
104static PyUnicodeObject *free_list;
105static int numfree;
106
107/* The empty Unicode object is shared to improve performance. */
108static PyUnicodeObject *unicode_empty;
109
110/* Single character Unicode strings in the Latin-1 range are being
111 shared as well. */
112static PyUnicodeObject *unicode_latin1[256];
113
114/* Fast detection of the most frequent whitespace characters */
115const unsigned char _Py_ascii_whitespace[] = {
116 0, 0, 0, 0, 0, 0, 0, 0,
117/* case 0x0009: * CHARACTER TABULATION */
118/* case 0x000A: * LINE FEED */
119/* case 0x000B: * LINE TABULATION */
120/* case 0x000C: * FORM FEED */
121/* case 0x000D: * CARRIAGE RETURN */
122 0, 1, 1, 1, 1, 1, 0, 0,
123 0, 0, 0, 0, 0, 0, 0, 0,
124/* case 0x001C: * FILE SEPARATOR */
125/* case 0x001D: * GROUP SEPARATOR */
126/* case 0x001E: * RECORD SEPARATOR */
127/* case 0x001F: * UNIT SEPARATOR */
128 0, 0, 0, 0, 1, 1, 1, 1,
129/* case 0x0020: * SPACE */
130 1, 0, 0, 0, 0, 0, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0,
132 0, 0, 0, 0, 0, 0, 0, 0,
133 0, 0, 0, 0, 0, 0, 0, 0,
134
135 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 0, 0, 0, 0, 0, 0, 0,
138 0, 0, 0, 0, 0, 0, 0, 0,
139 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0,
141 0, 0, 0, 0, 0, 0, 0, 0,
142 0, 0, 0, 0, 0, 0, 0, 0
143};
144
145static PyObject *unicode_encode_call_errorhandler(const char *errors,
146 PyObject **errorHandler,const char *encoding, const char *reason,
147 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
148 Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos);
149
150static void raise_encode_exception(PyObject **exceptionObject,
151 const char *encoding,
152 const Py_UNICODE *unicode, Py_ssize_t size,
153 Py_ssize_t startpos, Py_ssize_t endpos,
154 const char *reason);
155
156/* Same for linebreaks */
157static unsigned char ascii_linebreak[] = {
158 0, 0, 0, 0, 0, 0, 0, 0,
159/* 0x000A, * LINE FEED */
160/* 0x000B, * LINE TABULATION */
161/* 0x000C, * FORM FEED */
162/* 0x000D, * CARRIAGE RETURN */
163 0, 0, 1, 1, 1, 1, 0, 0,
164 0, 0, 0, 0, 0, 0, 0, 0,
165/* 0x001C, * FILE SEPARATOR */
166/* 0x001D, * GROUP SEPARATOR */
167/* 0x001E, * RECORD SEPARATOR */
168 0, 0, 0, 0, 1, 1, 1, 0,
169 0, 0, 0, 0, 0, 0, 0, 0,
170 0, 0, 0, 0, 0, 0, 0, 0,
171 0, 0, 0, 0, 0, 0, 0, 0,
172 0, 0, 0, 0, 0, 0, 0, 0,
173
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 0, 0, 0
182};
183
184
185Py_UNICODE
186PyUnicode_GetMaxPyUnicodeUCS2_GetMax(void)
187{
188#ifdef Py_UNICODE_WIDE
189 return 0x10FFFF;
190#else
191 /* This is actually an illegal character, so it should
192 not be passed to unichr. */
193 return 0xFFFF;
194#endif
195}
196
197/* --- Bloom Filters ----------------------------------------------------- */
198
199/* stuff to implement simple "bloom filters" for Unicode characters.
200 to keep things simple, we use a single bitmask, using the least 5
201 bits from each unicode characters as the bit index. */
202
203/* the linebreak mask is set up by Unicode_Init below */
204
205#if LONG_BIT64 >= 128
206#define BLOOM_WIDTH64 128
207#elif LONG_BIT64 >= 64
208#define BLOOM_WIDTH64 64
209#elif LONG_BIT64 >= 32
210#define BLOOM_WIDTH64 32
211#else
212#error "LONG_BIT is smaller than 32"
213#endif
214
215#define BLOOM_MASKunsigned long unsigned long
216
217static BLOOM_MASKunsigned long bloom_linebreak;
218
219#define BLOOM_ADD(mask, ch)((mask |= (1UL << ((ch) & (64 - 1))))) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH64 - 1)))))
220#define BLOOM(mask, ch)((mask & (1UL << ((ch) & (64 - 1))))) ((mask & (1UL << ((ch) & (BLOOM_WIDTH64 - 1)))))
221
222#define BLOOM_LINEBREAK(ch)((ch) < 128U ? ascii_linebreak[(ch)] : (((bloom_linebreak &
(1UL << (((ch)) & (64 - 1))))) && _PyUnicode_IsLinebreak
(ch)))
\
223 ((ch) < 128U ? ascii_linebreak[(ch)] : \
224 (BLOOM(bloom_linebreak, (ch))((bloom_linebreak & (1UL << (((ch)) & (64 - 1))
)))
&& Py_UNICODE_ISLINEBREAK(ch)_PyUnicode_IsLinebreak(ch)))
225
226Py_LOCAL_INLINE(BLOOM_MASK)static inline unsigned long make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
227{
228 /* calculate simple bloom-style bitmask for a given unicode string */
229
230 BLOOM_MASKunsigned long mask;
231 Py_ssize_t i;
232
233 mask = 0;
234 for (i = 0; i < len; i++)
235 BLOOM_ADD(mask, ptr[i])((mask |= (1UL << ((ptr[i]) & (64 - 1)))));
236
237 return mask;
238}
239
240Py_LOCAL_INLINE(int)static inline int unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen)
241{
242 Py_ssize_t i;
243
244 for (i = 0; i < setlen; i++)
245 if (set[i] == chr)
246 return 1;
247
248 return 0;
249}
250
251#define BLOOM_MEMBER(mask, chr, set, setlen)((mask & (1UL << ((chr) & (64 - 1))))) &&
unicode_member(chr, set, setlen)
\
252 BLOOM(mask, chr)((mask & (1UL << ((chr) & (64 - 1))))) && unicode_member(chr, set, setlen)
253
254/* --- Unicode Object ----------------------------------------------------- */
255
256static
257int unicode_resize(register PyUnicodeObject *unicode,
258 Py_ssize_t length)
259{
260 void *oldstr;
261
262 /* Shortcut if there's nothing much to do. */
263 if (unicode->length == length)
264 goto reset;
265
266 /* Resizing shared object (unicode_empty or single character
267 objects) in-place is not allowed. Use PyUnicode_Resize()
268 instead ! */
269
270 if (unicode == unicode_empty ||
271 (unicode->length == 1 &&
272 unicode->str[0] < 256U &&
273 unicode_latin1[unicode->str[0]] == unicode)) {
274 PyErr_SetString(PyExc_SystemError,
275 "can't resize shared str objects");
276 return -1;
277 }
278
279 /* We allocate one more byte to make sure the string is Ux0000 terminated.
280 The overallocation is also used by fastsearch, which assumes that it's
281 safe to look at str[length] (without making any assumptions about what
282 it contains). */
283
284 oldstr = unicode->str;
285 unicode->str = PyObject_REALLOC_PyObject_DebugRealloc(unicode->str,
286 sizeof(Py_UNICODE) * (length + 1));
287 if (!unicode->str) {
288 unicode->str = (Py_UNICODE *)oldstr;
289 PyErr_NoMemory();
290 return -1;
291 }
292 unicode->str[length] = 0;
293 unicode->length = length;
294
295 reset:
296 /* Reset the object caches */
297 if (unicode->defenc) {
298 Py_CLEAR(unicode->defenc)do { if (unicode->defenc) { PyObject *_py_tmp = (PyObject *
)(unicode->defenc); (unicode->defenc) = ((void *)0); do
{ if (_Py_RefTotal-- , --((PyObject*)(_py_tmp))->ob_refcnt
!= 0) { if (((PyObject*)_py_tmp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 298, (PyObject *)(_py_tmp)); } else
_Py_Dealloc((PyObject *)(_py_tmp)); } while (0); } } while (
0)
;
299 }
300 unicode->hash = -1;
301
302 return 0;
303}
304
305/* We allocate one more byte to make sure the string is
306 Ux0000 terminated; some code (e.g. new_identifier)
307 relies on that.
308
309 XXX This allocator could further be enhanced by assuring that the
310 free list never reduces its size below 1.
311
312*/
313
314static
315PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
316{
317 register PyUnicodeObject *unicode;
318
319 /* Optimization for empty strings */
320 if (length == 0 && unicode_empty != NULL((void *)0)) {
321 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
322 return unicode_empty;
323 }
324
325 /* Ensure we won't overflow the size. */
326 if (length > ((PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) / sizeof(Py_UNICODE)) - 1)) {
327 return (PyUnicodeObject *)PyErr_NoMemory();
328 }
329
330 /* Unicode freelist & memory allocation */
331 if (free_list) {
332 unicode = free_list;
333 free_list = *(PyUnicodeObject **)unicode;
334 numfree--;
335 if (unicode->str) {
336 /* Keep-Alive optimization: we only upsize the buffer,
337 never downsize it. */
338 if ((unicode->length < length) &&
339 unicode_resize(unicode, length) < 0) {
340 PyObject_DEL_PyObject_DebugFree(unicode->str);
341 unicode->str = NULL((void *)0);
342 }
343 }
344 else {
345 size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
346 unicode->str = (Py_UNICODE*) PyObject_MALLOC_PyObject_DebugMalloc(new_size);
347 }
348 PyObject_INIT(unicode, &PyUnicode_Type)( (((PyObject*)(unicode))->ob_type) = (&PyUnicode_Type
), _Py_NewReference((PyObject *)(unicode)), (unicode) )
;
349 }
350 else {
351 size_t new_size;
352 unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type)( (PyUnicodeObject *) _PyObject_New(&PyUnicode_Type) );
353 if (unicode == NULL((void *)0))
354 return NULL((void *)0);
355 new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
356 unicode->str = (Py_UNICODE*) PyObject_MALLOC_PyObject_DebugMalloc(new_size);
357 }
358
359 if (!unicode->str) {
360 PyErr_NoMemory();
361 goto onError;
362 }
363 /* Initialize the first element to guard against cases where
364 * the caller fails before initializing str -- unicode_resize()
365 * reads str[0], and the Keep-Alive optimization can keep memory
366 * allocated for str alive across a call to unicode_dealloc(unicode).
367 * We don't want unicode_resize to read uninitialized memory in
368 * that case.
369 */
370 unicode->str[0] = 0;
371 unicode->str[length] = 0;
372 unicode->length = length;
373 unicode->hash = -1;
374 unicode->state = 0;
375 unicode->defenc = NULL((void *)0);
376 return unicode;
377
378 onError:
379 /* XXX UNREF/NEWREF interface should be more symmetrical */
380 _Py_DEC_REFTOTAL_Py_RefTotal--;
381 _Py_ForgetReference((PyObject *)unicode);
382 PyObject_Del_PyObject_DebugFree(unicode);
383 return NULL((void *)0);
384}
385
386static
387void unicode_dealloc(register PyUnicodeObject *unicode)
388{
389 switch (PyUnicode_CHECK_INTERNED(unicode)(((PyUnicodeObject *)(unicode))->state)) {
390 case SSTATE_NOT_INTERNED0:
391 break;
392
393 case SSTATE_INTERNED_MORTAL1:
394 /* revive dead object temporarily for DelItem */
395 Py_REFCNT(unicode)(((PyObject*)(unicode))->ob_refcnt) = 3;
396 if (PyDict_DelItem(interned, (PyObject *)unicode) != 0)
397 Py_FatalError(
398 "deletion of interned string failed");
399 break;
400
401 case SSTATE_INTERNED_IMMORTAL2:
402 Py_FatalError("Immortal interned string died.");
403
404 default:
405 Py_FatalError("Inconsistent interned string state.");
406 }
407
408 if (PyUnicode_CheckExact(unicode)((((PyObject*)(unicode))->ob_type) == &PyUnicode_Type) &&
409 numfree < PyUnicode_MAXFREELIST1024) {
410 /* Keep-Alive optimization */
411 if (unicode->length >= KEEPALIVE_SIZE_LIMIT9) {
412 PyObject_DEL_PyObject_DebugFree(unicode->str);
413 unicode->str = NULL((void *)0);
414 unicode->length = 0;
415 }
416 if (unicode->defenc) {
417 Py_CLEAR(unicode->defenc)do { if (unicode->defenc) { PyObject *_py_tmp = (PyObject *
)(unicode->defenc); (unicode->defenc) = ((void *)0); do
{ if (_Py_RefTotal-- , --((PyObject*)(_py_tmp))->ob_refcnt
!= 0) { if (((PyObject*)_py_tmp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 417, (PyObject *)(_py_tmp)); } else
_Py_Dealloc((PyObject *)(_py_tmp)); } while (0); } } while (
0)
;
418 }
419 /* Add to free list */
420 *(PyUnicodeObject **)unicode = free_list;
421 free_list = unicode;
422 numfree++;
423 }
424 else {
425 PyObject_DEL_PyObject_DebugFree(unicode->str);
426 Py_XDECREF(unicode->defenc)do { if ((unicode->defenc) == ((void *)0)) ; else do { if (
_Py_RefTotal-- , --((PyObject*)(unicode->defenc))->ob_refcnt
!= 0) { if (((PyObject*)unicode->defenc)->ob_refcnt <
0) _Py_NegativeRefcount("Objects/unicodeobject.c", 426, (PyObject
*)(unicode->defenc)); } else _Py_Dealloc((PyObject *)(unicode
->defenc)); } while (0); } while (0)
;
427 Py_TYPE(unicode)(((PyObject*)(unicode))->ob_type)->tp_free((PyObject *)unicode);
428 }
429}
430
431static
432int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length)
433{
434 register PyUnicodeObject *v;
435
436 /* Argument checks */
437 if (unicode == NULL((void *)0)) {
438 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 438);
439 return -1;
440 }
441 v = *unicode;
442 if (v == NULL((void *)0) || !PyUnicode_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
28))) != 0)
|| Py_REFCNT(v)(((PyObject*)(v))->ob_refcnt) != 1 || length < 0) {
443 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 443);
444 return -1;
445 }
446
447 /* Resizing unicode_empty and single character objects is not
448 possible since these are being shared. We simply return a fresh
449 copy with the same Unicode content. */
450 if (v->length != length &&
451 (v == unicode_empty || v->length == 1)) {
452 PyUnicodeObject *w = _PyUnicode_New(length);
453 if (w == NULL((void *)0))
454 return -1;
455 Py_UNICODE_COPY(w->str, v->str,((__builtin_object_size ((w->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((w->str), (v->str), (length < v->length ? length
: v->length)*sizeof(Py_UNICODE), __builtin_object_size ((
w->str), 0)) : __inline_memcpy_chk ((w->str), (v->str
), (length < v->length ? length : v->length)*sizeof(
Py_UNICODE)))
456 length < v->length ? length : v->length)((__builtin_object_size ((w->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((w->str), (v->str), (length < v->length ? length
: v->length)*sizeof(Py_UNICODE), __builtin_object_size ((
w->str), 0)) : __inline_memcpy_chk ((w->str), (v->str
), (length < v->length ? length : v->length)*sizeof(
Py_UNICODE)))
;
457 Py_DECREF(*unicode)do { if (_Py_RefTotal-- , --((PyObject*)(*unicode))->ob_refcnt
!= 0) { if (((PyObject*)*unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 457, (PyObject *)(*unicode)); } else
_Py_Dealloc((PyObject *)(*unicode)); } while (0)
;
458 *unicode = w;
459 return 0;
460 }
461
462 /* Note that we don't have to modify *unicode for unshared Unicode
463 objects, since we can modify them in-place. */
464 return unicode_resize(v, length);
465}
466
467int PyUnicode_ResizePyUnicodeUCS2_Resize(PyObject **unicode, Py_ssize_t length)
468{
469 return _PyUnicode_Resize((PyUnicodeObject **)unicode, length);
470}
471
472PyObject *PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(const Py_UNICODE *u,
473 Py_ssize_t size)
474{
475 PyUnicodeObject *unicode;
476
477 /* If the Unicode data is known at construction time, we can apply
478 some optimizations which share commonly used objects. */
479 if (u != NULL((void *)0)) {
480
481 /* Optimization for empty strings */
482 if (size == 0 && unicode_empty != NULL((void *)0)) {
483 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
484 return (PyObject *)unicode_empty;
485 }
486
487 /* Single character Unicode objects in the Latin-1 range are
488 shared when using this constructor */
489 if (size == 1 && *u < 256) {
490 unicode = unicode_latin1[*u];
491 if (!unicode) {
492 unicode = _PyUnicode_New(1);
493 if (!unicode)
494 return NULL((void *)0);
495 unicode->str[0] = *u;
496 unicode_latin1[*u] = unicode;
497 }
498 Py_INCREF(unicode)( _Py_RefTotal++ , ((PyObject*)(unicode))->ob_refcnt++);
499 return (PyObject *)unicode;
500 }
501 }
502
503 unicode = _PyUnicode_New(size);
504 if (!unicode)
505 return NULL((void *)0);
506
507 /* Copy the Unicode data into the new object */
508 if (u != NULL((void *)0))
509 Py_UNICODE_COPY(unicode->str, u, size)((__builtin_object_size ((unicode->str), 0) != (size_t) -1
) ? __builtin___memcpy_chk ((unicode->str), (u), (size)*sizeof
(Py_UNICODE), __builtin_object_size ((unicode->str), 0)) :
__inline_memcpy_chk ((unicode->str), (u), (size)*sizeof(Py_UNICODE
)))
;
510
511 return (PyObject *)unicode;
512}
513
514PyObject *PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(const char *u, Py_ssize_t size)
515{
516 PyUnicodeObject *unicode;
517
518 if (size < 0) {
519 PyErr_SetString(PyExc_SystemError,
520 "Negative size passed to PyUnicode_FromStringAndSize");
521 return NULL((void *)0);
522 }
523
524 /* If the Unicode data is known at construction time, we can apply
525 some optimizations which share commonly used objects.
526 Also, this means the input must be UTF-8, so fall back to the
527 UTF-8 decoder at the end. */
528 if (u != NULL((void *)0)) {
529
530 /* Optimization for empty strings */
531 if (size == 0 && unicode_empty != NULL((void *)0)) {
532 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
533 return (PyObject *)unicode_empty;
534 }
535
536 /* Single characters are shared when using this constructor.
537 Restrict to ASCII, since the input must be UTF-8. */
538 if (size == 1 && Py_CHARMASK(*u)((unsigned char)((*u) & 0xff)) < 128) {
539 unicode = unicode_latin1[Py_CHARMASK(*u)((unsigned char)((*u) & 0xff))];
540 if (!unicode) {
541 unicode = _PyUnicode_New(1);
542 if (!unicode)
543 return NULL((void *)0);
544 unicode->str[0] = Py_CHARMASK(*u)((unsigned char)((*u) & 0xff));
545 unicode_latin1[Py_CHARMASK(*u)((unsigned char)((*u) & 0xff))] = unicode;
546 }
547 Py_INCREF(unicode)( _Py_RefTotal++ , ((PyObject*)(unicode))->ob_refcnt++);
548 return (PyObject *)unicode;
549 }
550
551 return PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(u, size, NULL((void *)0));
552 }
553
554 unicode = _PyUnicode_New(size);
555 if (!unicode)
556 return NULL((void *)0);
557
558 return (PyObject *)unicode;
559}
560
561PyObject *PyUnicode_FromStringPyUnicodeUCS2_FromString(const char *u)
562{
563 size_t size = strlen(u);
564 if (size > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1))) {
565 PyErr_SetString(PyExc_OverflowError, "input too long");
566 return NULL((void *)0);
567 }
568
569 return PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(u, size);
570}
571
572#ifdef HAVE_WCHAR_H1
573
574#if (Py_UNICODE_SIZE2 == 2) && defined(SIZEOF_WCHAR_T4) && (SIZEOF_WCHAR_T4 == 4)
575# define CONVERT_WCHAR_TO_SURROGATES
576#endif
577
578#ifdef CONVERT_WCHAR_TO_SURROGATES
579
580/* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need
581 to convert from UTF32 to UTF16. */
582
583PyObject *PyUnicode_FromWideCharPyUnicodeUCS2_FromWideChar(register const wchar_t *w,
584 Py_ssize_t size)
585{
586 PyUnicodeObject *unicode;
587 register Py_ssize_t i;
588 Py_ssize_t alloc;
589 const wchar_t *orig_w;
590
591 if (w == NULL((void *)0)) {
592 if (size == 0)
593 return PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(NULL((void *)0), 0);
594 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 594);
595 return NULL((void *)0);
596 }
597
598 if (size == -1) {
599 size = wcslen(w);
600 }
601
602 alloc = size;
603 orig_w = w;
604 for (i = size; i > 0; i--) {
605 if (*w > 0xFFFF)
606 alloc++;
607 w++;
608 }
609 w = orig_w;
610 unicode = _PyUnicode_New(alloc);
611 if (!unicode)
612 return NULL((void *)0);
613
614 /* Copy the wchar_t data into the new object */
615 {
616 register Py_UNICODE *u;
617 u = PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 617, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
618 for (i = size; i > 0; i--) {
619 if (*w > 0xFFFF) {
620 wchar_t ordinal = *w++;
621 ordinal -= 0x10000;
622 *u++ = 0xD800 | (ordinal >> 10);
623 *u++ = 0xDC00 | (ordinal & 0x3FF);
624 }
625 else
626 *u++ = *w++;
627 }
628 }
629 return (PyObject *)unicode;
630}
631
632#else
633
634PyObject *PyUnicode_FromWideCharPyUnicodeUCS2_FromWideChar(register const wchar_t *w,
635 Py_ssize_t size)
636{
637 PyUnicodeObject *unicode;
638
639 if (w == NULL((void *)0)) {
640 if (size == 0)
641 return PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(NULL((void *)0), 0);
642 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 642);
643 return NULL((void *)0);
644 }
645
646 if (size == -1) {
647 size = wcslen(w);
648 }
649
650 unicode = _PyUnicode_New(size);
651 if (!unicode)
652 return NULL((void *)0);
653
654 /* Copy the wchar_t data into the new object */
655#if Py_UNICODE_SIZE2 == SIZEOF_WCHAR_T4
656 memcpy(unicode->str, w, size * sizeof(wchar_t))((__builtin_object_size (unicode->str, 0) != (size_t) -1) ?
__builtin___memcpy_chk (unicode->str, w, size * sizeof(wchar_t
), __builtin_object_size (unicode->str, 0)) : __inline_memcpy_chk
(unicode->str, w, size * sizeof(wchar_t)))
;
657#else
658 {
659 register Py_UNICODE *u;
660 register Py_ssize_t i;
661 u = PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 661, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
662 for (i = size; i > 0; i--)
663 *u++ = *w++;
664 }
665#endif
666
667 return (PyObject *)unicode;
668}
669
670#endif /* CONVERT_WCHAR_TO_SURROGATES */
671
672#undef CONVERT_WCHAR_TO_SURROGATES
673
674static void
675makefmt(char *fmt, int longflag, int longlongflag, int size_tflag,
676 int zeropad, int width, int precision, char c)
677{
678 *fmt++ = '%';
679 if (width) {
680 if (zeropad)
681 *fmt++ = '0';
682 fmt += sprintf(fmt, "%d", width)__builtin___sprintf_chk (fmt, 0, __builtin_object_size (fmt, 2
> 1), "%d", width)
;
683 }
684 if (precision)
685 fmt += sprintf(fmt, ".%d", precision)__builtin___sprintf_chk (fmt, 0, __builtin_object_size (fmt, 2
> 1), ".%d", precision)
;
686 if (longflag)
687 *fmt++ = 'l';
688 else if (longlongflag) {
689 /* longlongflag should only ever be nonzero on machines with
690 HAVE_LONG_LONG defined */
691#ifdef HAVE_LONG_LONG1
692 char *f = PY_FORMAT_LONG_LONG"ll";
693 while (*f)
694 *fmt++ = *f++;
695#else
696 /* we shouldn't ever get here */
697 assert(0)(__builtin_expect(!(0), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 697, "0") : (void)0)
;
698 *fmt++ = 'l';
699#endif
700 }
701 else if (size_tflag) {
702 char *f = PY_FORMAT_SIZE_T"l";
703 while (*f)
704 *fmt++ = *f++;
705 }
706 *fmt++ = c;
707 *fmt = '\0';
708}
709
710#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
711
712/* size of fixed-size buffer for formatting single arguments */
713#define ITEM_BUFFER_LEN21 21
714/* maximum number of characters required for output of %ld. 21 characters
715 allows for 64-bit integers (in decimal) and an optional sign. */
716#define MAX_LONG_CHARS21 21
717/* maximum number of characters required for output of %lld.
718 We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
719 plus 1 for the sign. 53/22 is an upper bound for log10(256). */
720#define MAX_LONG_LONG_CHARS(2 + (8*53 -1) / 22) (2 + (SIZEOF_LONG_LONG8*53-1) / 22)
721
722PyObject *
723PyUnicode_FromFormatVPyUnicodeUCS2_FromFormatV(const char *format, va_list vargs)
724{
725 va_list count;
726 Py_ssize_t callcount = 0;
727 PyObject **callresults = NULL((void *)0);
728 PyObject **callresult = NULL((void *)0);
729 Py_ssize_t n = 0;
730 int width = 0;
731 int precision = 0;
732 int zeropad;
733 const char* f;
734 Py_UNICODE *s;
735 PyObject *string;
736 /* used by sprintf */
737 char buffer[ITEM_BUFFER_LEN21+1];
738 /* use abuffer instead of buffer, if we need more space
739 * (which can happen if there's a format specifier with width). */
740 char *abuffer = NULL((void *)0);
741 char *realbuffer;
742 Py_ssize_t abuffersize = 0;
743 char fmt[61]; /* should be enough for %0width.precisionlld */
744 const char *copy;
745
746 Py_VA_COPY(count, vargs)((__builtin_object_size ((count), 0) != (size_t) -1) ? __builtin___memcpy_chk
((count), (vargs), sizeof(va_list), __builtin_object_size ((
count), 0)) : __inline_memcpy_chk ((count), (vargs), sizeof(va_list
)))
;
747 /* step 1: count the number of %S/%R/%A/%s format specifications
748 * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
749 * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
750 * result in an array) */
751 for (f = format; *f; f++) {
752 if (*f == '%') {
753 if (*(f+1)=='%')
754 continue;
755 if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
756 ++callcount;
757 while (Py_ISDIGIT((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & 0x04)
)
758 width = (width*10) + *f++ - '0';
759 while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & (0x01|0x02))
)
760 ;
761 if (*f == 's')
762 ++callcount;
763 }
764 else if (128 <= (unsigned char)*f) {
765 PyErr_Format(PyExc_ValueError,
766 "PyUnicode_FromFormatV() expects an ASCII-encoded format "
767 "string, got a non-ASCII byte: 0x%02x",
768 (unsigned char)*f);
769 return NULL((void *)0);
770 }
771 }
772 /* step 2: allocate memory for the results of
773 * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
774 if (callcount) {
775 callresults = PyObject_Malloc_PyObject_DebugMalloc(sizeof(PyObject *)*callcount);
776 if (!callresults) {
777 PyErr_NoMemory();
778 return NULL((void *)0);
779 }
780 callresult = callresults;
781 }
782 /* step 3: figure out how large a buffer we need */
783 for (f = format; *f; f++) {
784 if (*f == '%') {
785#ifdef HAVE_LONG_LONG1
786 int longlongflag = 0;
787#endif
788 const char* p = f;
789 width = 0;
790 while (Py_ISDIGIT((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & 0x04)
)
791 width = (width*10) + *f++ - '0';
792 while (*++f && *f != '%' && !Py_ISALPHA((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & (0x01|0x02))
)
793 ;
794
795 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
796 * they don't affect the amount of space we reserve.
797 */
798 if (*f == 'l') {
799 if (f[1] == 'd' || f[1] == 'u') {
800 ++f;
801 }
802#ifdef HAVE_LONG_LONG1
803 else if (f[1] == 'l' &&
804 (f[2] == 'd' || f[2] == 'u')) {
805 longlongflag = 1;
806 f += 2;
807 }
808#endif
809 }
810 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
811 ++f;
812 }
813
814 switch (*f) {
815 case 'c':
816 (void)va_arg(count, int)__builtin_va_arg(count, int);
817 /* fall through... */
818 case '%':
819 n++;
820 break;
821 case 'd': case 'u': case 'i': case 'x':
822 (void) va_arg(count, int)__builtin_va_arg(count, int);
823#ifdef HAVE_LONG_LONG1
824 if (longlongflag) {
825 if (width < MAX_LONG_LONG_CHARS(2 + (8*53 -1) / 22))
826 width = MAX_LONG_LONG_CHARS(2 + (8*53 -1) / 22);
827 }
828 else
829#endif
830 /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
831 including sign. Decimal takes the most space. This
832 isn't enough for octal. If a width is specified we
833 need more (which we allocate later). */
834 if (width < MAX_LONG_CHARS21)
835 width = MAX_LONG_CHARS21;
836 n += width;
837 /* XXX should allow for large precision here too. */
838 if (abuffersize < width)
839 abuffersize = width;
840 break;
841 case 's':
842 {
843 /* UTF-8 */
844 const char *s = va_arg(count, const char*)__builtin_va_arg(count, const char*);
845 PyObject *str = PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(s, strlen(s), "replace");
846 if (!str)
847 goto fail;
848 n += PyUnicode_GET_SIZE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 848, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->length))
;
849 /* Remember the str and switch to the next slot */
850 *callresult++ = str;
851 break;
852 }
853 case 'U':
854 {
855 PyObject *obj = va_arg(count, PyObject *)__builtin_va_arg(count, PyObject *);
856 assert(obj && PyUnicode_Check(obj))(__builtin_expect(!(obj && ((((((PyObject*)(obj))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 856, "obj && PyUnicode_Check(obj)"
) : (void)0)
;
857 n += PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 857, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->length))
;
858 break;
859 }
860 case 'V':
861 {
862 PyObject *obj = va_arg(count, PyObject *)__builtin_va_arg(count, PyObject *);
863 const char *str = va_arg(count, const char *)__builtin_va_arg(count, const char *);
864 assert(obj || str)(__builtin_expect(!(obj || str), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 864, "obj || str") : (void)0)
;
865 assert(!obj || PyUnicode_Check(obj))(__builtin_expect(!(!obj || ((((((PyObject*)(obj))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 865, "!obj || PyUnicode_Check(obj)"
) : (void)0)
;
866 if (obj)
867 n += PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 867, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->length))
;
868 else
869 n += strlen(str);
870 break;
871 }
872 case 'S':
873 {
874 PyObject *obj = va_arg(count, PyObject *)__builtin_va_arg(count, PyObject *);
875 PyObject *str;
876 assert(obj)(__builtin_expect(!(obj), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 876, "obj") : (void)0)
;
877 str = PyObject_Str(obj);
878 if (!str)
879 goto fail;
880 n += PyUnicode_GET_SIZE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 880, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->length))
;
881 /* Remember the str and switch to the next slot */
882 *callresult++ = str;
883 break;
884 }
885 case 'R':
886 {
887 PyObject *obj = va_arg(count, PyObject *)__builtin_va_arg(count, PyObject *);
888 PyObject *repr;
889 assert(obj)(__builtin_expect(!(obj), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 889, "obj") : (void)0)
;
890 repr = PyObject_Repr(obj);
891 if (!repr)
892 goto fail;
893 n += PyUnicode_GET_SIZE(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 893, "PyUnicode_Check(repr)") : (
void)0),(((PyUnicodeObject *)(repr))->length))
;
894 /* Remember the repr and switch to the next slot */
895 *callresult++ = repr;
896 break;
897 }
898 case 'A':
899 {
900 PyObject *obj = va_arg(count, PyObject *)__builtin_va_arg(count, PyObject *);
901 PyObject *ascii;
902 assert(obj)(__builtin_expect(!(obj), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 902, "obj") : (void)0)
;
903 ascii = PyObject_ASCII(obj);
904 if (!ascii)
905 goto fail;
906 n += PyUnicode_GET_SIZE(ascii)((__builtin_expect(!(((((((PyObject*)(ascii))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 906, "PyUnicode_Check(ascii)") :
(void)0),(((PyUnicodeObject *)(ascii))->length))
;
907 /* Remember the repr and switch to the next slot */
908 *callresult++ = ascii;
909 break;
910 }
911 case 'p':
912 (void) va_arg(count, int)__builtin_va_arg(count, int);
913 /* maximum 64-bit pointer representation:
914 * 0xffffffffffffffff
915 * so 19 characters is enough.
916 * XXX I count 18 -- what's the extra for?
917 */
918 n += 19;
919 break;
920 default:
921 /* if we stumble upon an unknown
922 formatting code, copy the rest of
923 the format string to the output
924 string. (we cannot just skip the
925 code, since there's no way to know
926 what's in the argument list) */
927 n += strlen(p);
928 goto expand;
929 }
930 } else
931 n++;
932 }
933 expand:
934 if (abuffersize > ITEM_BUFFER_LEN21) {
935 /* add 1 for sprintf's trailing null byte */
936 abuffer = PyObject_Malloc_PyObject_DebugMalloc(abuffersize + 1);
937 if (!abuffer) {
938 PyErr_NoMemory();
939 goto fail;
940 }
941 realbuffer = abuffer;
942 }
943 else
944 realbuffer = buffer;
945 /* step 4: fill the buffer */
946 /* Since we've analyzed how much space we need for the worst case,
947 we don't have to resize the string.
948 There can be no errors beyond this point. */
949 string = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), n);
950 if (!string)
951 goto fail;
952
953 s = PyUnicode_AS_UNICODE(string)((__builtin_expect(!(((((((PyObject*)(string))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 953, "PyUnicode_Check(string)") :
(void)0),(((PyUnicodeObject *)(string))->str))
;
954 callresult = callresults;
955
956 for (f = format; *f; f++) {
957 if (*f == '%') {
958 const char* p = f++;
959 int longflag = 0;
960 int longlongflag = 0;
961 int size_tflag = 0;
962 zeropad = (*f == '0');
963 /* parse the width.precision part */
964 width = 0;
965 while (Py_ISDIGIT((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & 0x04)
)
966 width = (width*10) + *f++ - '0';
967 precision = 0;
968 if (*f == '.') {
969 f++;
970 while (Py_ISDIGIT((unsigned)*f)(_Py_ctype_table[((unsigned char)(((unsigned)*f) & 0xff))
] & 0x04)
)
971 precision = (precision*10) + *f++ - '0';
972 }
973 /* Handle %ld, %lu, %lld and %llu. */
974 if (*f == 'l') {
975 if (f[1] == 'd' || f[1] == 'u') {
976 longflag = 1;
977 ++f;
978 }
979#ifdef HAVE_LONG_LONG1
980 else if (f[1] == 'l' &&
981 (f[2] == 'd' || f[2] == 'u')) {
982 longlongflag = 1;
983 f += 2;
984 }
985#endif
986 }
987 /* handle the size_t flag. */
988 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
989 size_tflag = 1;
990 ++f;
991 }
992
993 switch (*f) {
994 case 'c':
995 *s++ = va_arg(vargs, int)__builtin_va_arg(vargs, int);
996 break;
997 case 'd':
998 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
999 width, precision, 'd');
1000 if (longflag)
1001 sprintf(realbuffer, fmt, va_arg(vargs, long))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, long))
;
1002#ifdef HAVE_LONG_LONG1
1003 else if (longlongflag)
1004 sprintf(realbuffer, fmt, va_arg(vargs, PY_LONG_LONG))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, long long
))
;
1005#endif
1006 else if (size_tflag)
1007 sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, Py_ssize_t
))
;
1008 else
1009 sprintf(realbuffer, fmt, va_arg(vargs, int))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, int))
;
1010 appendstring(realbuffer);
1011 break;
1012 case 'u':
1013 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
1014 width, precision, 'u');
1015 if (longflag)
1016 sprintf(realbuffer, fmt, va_arg(vargs, unsigned long))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, unsigned
long))
;
1017#ifdef HAVE_LONG_LONG1
1018 else if (longlongflag)
1019 sprintf(realbuffer, fmt, va_arg(vargs,__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, unsigned
long long))
1020 unsigned PY_LONG_LONG))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, unsigned
long long))
;
1021#endif
1022 else if (size_tflag)
1023 sprintf(realbuffer, fmt, va_arg(vargs, size_t))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, size_t)
)
;
1024 else
1025 sprintf(realbuffer, fmt, va_arg(vargs, unsigned int))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, unsigned
int))
;
1026 appendstring(realbuffer);
1027 break;
1028 case 'i':
1029 makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'i');
1030 sprintf(realbuffer, fmt, va_arg(vargs, int))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, int))
;
1031 appendstring(realbuffer);
1032 break;
1033 case 'x':
1034 makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
1035 sprintf(realbuffer, fmt, va_arg(vargs, int))__builtin___sprintf_chk (realbuffer, 0, __builtin_object_size
(realbuffer, 2 > 1), fmt, __builtin_va_arg(vargs, int))
;
1036 appendstring(realbuffer);
1037 break;
1038 case 's':
1039 {
1040 /* unused, since we already have the result */
1041 (void) va_arg(vargs, char *)__builtin_va_arg(vargs, char *);
1042 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),((__builtin_object_size ((s), 0) != (size_t) -1) ? __builtin___memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(*callresult))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1042, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->str))), (
((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1043, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
)*sizeof(Py_UNICODE), __builtin_object_size ((s), 0)) : __inline_memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(*callresult))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1042, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->str))), (
((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1043, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
)*sizeof(Py_UNICODE)))
1043 PyUnicode_GET_SIZE(*callresult))((__builtin_object_size ((s), 0) != (size_t) -1) ? __builtin___memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(*callresult))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1042, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->str))), (
((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1043, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
)*sizeof(Py_UNICODE), __builtin_object_size ((s), 0)) : __inline_memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(*callresult))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1042, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->str))), (
((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1043, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
)*sizeof(Py_UNICODE)))
;
1044 s += PyUnicode_GET_SIZE(*callresult)((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1044, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
;
1045 /* We're done with the unicode()/repr() => forget it */
1046 Py_DECREF(*callresult)do { if (_Py_RefTotal-- , --((PyObject*)(*callresult))->ob_refcnt
!= 0) { if (((PyObject*)*callresult)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1046, (PyObject *)(*callresult));
} else _Py_Dealloc((PyObject *)(*callresult)); } while (0)
;
1047 /* switch to next unicode()/repr() result */
1048 ++callresult;
1049 break;
1050 }
1051 case 'U':
1052 {
1053 PyObject *obj = va_arg(vargs, PyObject *)__builtin_va_arg(vargs, PyObject *);
1054 Py_ssize_t size = PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1054, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->length))
;
1055 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size)((__builtin_object_size ((s), 0) != (size_t) -1) ? __builtin___memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(obj))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1055, "PyUnicode_Check(obj)"
) : (void)0),(((PyUnicodeObject *)(obj))->str))), (size)*sizeof
(Py_UNICODE), __builtin_object_size ((s), 0)) : __inline_memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(obj))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1055, "PyUnicode_Check(obj)"
) : (void)0),(((PyUnicodeObject *)(obj))->str))), (size)*sizeof
(Py_UNICODE)))
;
1056 s += size;
1057 break;
1058 }
1059 case 'V':
1060 {
1061 PyObject *obj = va_arg(vargs, PyObject *)__builtin_va_arg(vargs, PyObject *);
1062 const char *str = va_arg(vargs, const char *)__builtin_va_arg(vargs, const char *);
1063 if (obj) {
1064 Py_ssize_t size = PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1064, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->length))
;
1065 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size)((__builtin_object_size ((s), 0) != (size_t) -1) ? __builtin___memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(obj))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1065, "PyUnicode_Check(obj)"
) : (void)0),(((PyUnicodeObject *)(obj))->str))), (size)*sizeof
(Py_UNICODE), __builtin_object_size ((s), 0)) : __inline_memcpy_chk
((s), (((__builtin_expect(!(((((((PyObject*)(obj))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1065, "PyUnicode_Check(obj)"
) : (void)0),(((PyUnicodeObject *)(obj))->str))), (size)*sizeof
(Py_UNICODE)))
;
1066 s += size;
1067 } else {
1068 appendstring(str);
1069 }
1070 break;
1071 }
1072 case 'S':
1073 case 'R':
1074 case 'A':
1075 {
1076 Py_UNICODE *ucopy;
1077 Py_ssize_t usize;
1078 Py_ssize_t upos;
1079 /* unused, since we already have the result */
1080 (void) va_arg(vargs, PyObject *)__builtin_va_arg(vargs, PyObject *);
1081 ucopy = PyUnicode_AS_UNICODE(*callresult)((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1081, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->str))
;
1082 usize = PyUnicode_GET_SIZE(*callresult)((__builtin_expect(!(((((((PyObject*)(*callresult))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1082, "PyUnicode_Check(*callresult)"
) : (void)0),(((PyUnicodeObject *)(*callresult))->length))
;
1083 for (upos = 0; upos<usize;)
1084 *s++ = ucopy[upos++];
1085 /* We're done with the unicode()/repr() => forget it */
1086 Py_DECREF(*callresult)do { if (_Py_RefTotal-- , --((PyObject*)(*callresult))->ob_refcnt
!= 0) { if (((PyObject*)*callresult)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1086, (PyObject *)(*callresult));
} else _Py_Dealloc((PyObject *)(*callresult)); } while (0)
;
1087 /* switch to next unicode()/repr() result */
1088 ++callresult;
1089 break;
1090 }
1091 case 'p':
1092 sprintf(buffer, "%p", va_arg(vargs, void*))__builtin___sprintf_chk (buffer, 0, __builtin_object_size (buffer
, 2 > 1), "%p", __builtin_va_arg(vargs, void*))
;
1093 /* %p is ill-defined: ensure leading 0x. */
1094 if (buffer[1] == 'X')
1095 buffer[1] = 'x';
1096 else if (buffer[1] != 'x') {
1097 memmove(buffer+2, buffer, strlen(buffer)+1)((__builtin_object_size (buffer+2, 0) != (size_t) -1) ? __builtin___memmove_chk
(buffer+2, buffer, strlen(buffer)+1, __builtin_object_size (
buffer+2, 0)) : __inline_memmove_chk (buffer+2, buffer, strlen
(buffer)+1))
;
1098 buffer[0] = '0';
1099 buffer[1] = 'x';
1100 }
1101 appendstring(buffer);
1102 break;
1103 case '%':
1104 *s++ = '%';
1105 break;
1106 default:
1107 appendstring(p);
1108 goto end;
1109 }
1110 }
1111 else
1112 *s++ = *f;
1113 }
1114
1115 end:
1116 if (callresults)
1117 PyObject_Free_PyObject_DebugFree(callresults);
1118 if (abuffer)
1119 PyObject_Free_PyObject_DebugFree(abuffer);
1120 PyUnicode_ResizePyUnicodeUCS2_Resize(&string, s - PyUnicode_AS_UNICODE(string)((__builtin_expect(!(((((((PyObject*)(string))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1120, "PyUnicode_Check(string)")
: (void)0),(((PyUnicodeObject *)(string))->str))
);
1121 return string;
1122 fail:
1123 if (callresults) {
1124 PyObject **callresult2 = callresults;
1125 while (callresult2 < callresult) {
1126 Py_DECREF(*callresult2)do { if (_Py_RefTotal-- , --((PyObject*)(*callresult2))->ob_refcnt
!= 0) { if (((PyObject*)*callresult2)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1126, (PyObject *)(*callresult2))
; } else _Py_Dealloc((PyObject *)(*callresult2)); } while (0)
;
1127 ++callresult2;
1128 }
1129 PyObject_Free_PyObject_DebugFree(callresults);
1130 }
1131 if (abuffer)
1132 PyObject_Free_PyObject_DebugFree(abuffer);
1133 return NULL((void *)0);
1134}
1135
1136#undef appendstring
1137
1138PyObject *
1139PyUnicode_FromFormatPyUnicodeUCS2_FromFormat(const char *format, ...)
1140{
1141 PyObject* ret;
1142 va_list vargs;
1143
1144#ifdef HAVE_STDARG_PROTOTYPES1
1145 va_start(vargs, format)__builtin_va_start(vargs, format);
1146#else
1147 va_start(vargs);
1148#endif
1149 ret = PyUnicode_FromFormatVPyUnicodeUCS2_FromFormatV(format, vargs);
1150 va_end(vargs)__builtin_va_end(vargs);
1151 return ret;
1152}
1153
1154/* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString():
1155 convert a Unicode object to a wide character string.
1156
1157 - If w is NULL: return the number of wide characters (including the nul
1158 character) required to convert the unicode object. Ignore size argument.
1159
1160 - Otherwise: return the number of wide characters (excluding the nul
1161 character) written into w. Write at most size wide characters (including
1162 the nul character). */
1163static Py_ssize_t
1164unicode_aswidechar(PyUnicodeObject *unicode,
1165 wchar_t *w,
1166 Py_ssize_t size)
1167{
1168#if Py_UNICODE_SIZE2 == SIZEOF_WCHAR_T4
1169 Py_ssize_t res;
1170 if (w != NULL((void *)0)) {
1171 res = PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1171, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
;
1172 if (size > res)
1173 size = res + 1;
1174 else
1175 res = size;
1176 memcpy(w, unicode->str, size * sizeof(wchar_t))((__builtin_object_size (w, 0) != (size_t) -1) ? __builtin___memcpy_chk
(w, unicode->str, size * sizeof(wchar_t), __builtin_object_size
(w, 0)) : __inline_memcpy_chk (w, unicode->str, size * sizeof
(wchar_t)))
;
1177 return res;
1178 }
1179 else
1180 return PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1180, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
+ 1;
1181#elif Py_UNICODE_SIZE2 == 2 && SIZEOF_WCHAR_T4 == 4
1182 register const Py_UNICODE *u;
1183 const Py_UNICODE *uend;
1184 const wchar_t *worig, *wend;
1185 Py_ssize_t nchar;
1186
1187 u = PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1187, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
1188 uend = u + PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1188, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
;
1189 if (w != NULL((void *)0)) {
1190 worig = w;
1191 wend = w + size;
1192 while (u != uend && w != wend) {
1193 if (0xD800 <= u[0] && u[0] <= 0xDBFF
1194 && 0xDC00 <= u[1] && u[1] <= 0xDFFF)
1195 {
1196 *w = (((u[0] & 0x3FF) << 10) | (u[1] & 0x3FF)) + 0x10000;
1197 u += 2;
1198 }
1199 else {
1200 *w = *u;
1201 u++;
1202 }
1203 w++;
1204 }
1205 if (w != wend)
1206 *w = L'\0';
1207 return w - worig;
1208 }
1209 else {
1210 nchar = 1; /* nul character at the end */
1211 while (u != uend) {
1212 if (0xD800 <= u[0] && u[0] <= 0xDBFF
1213 && 0xDC00 <= u[1] && u[1] <= 0xDFFF)
1214 u += 2;
1215 else
1216 u++;
1217 nchar++;
1218 }
1219 }
1220 return nchar;
1221#elif Py_UNICODE_SIZE2 == 4 && SIZEOF_WCHAR_T4 == 2
1222 register Py_UNICODE *u, *uend, ordinal;
1223 register Py_ssize_t i;
1224 wchar_t *worig, *wend;
1225 Py_ssize_t nchar;
1226
1227 u = PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1227, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
1228 uend = u + PyUnicode_GET_SIZE(u)((__builtin_expect(!(((((((PyObject*)(u))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 1228, "PyUnicode_Check(u)") : (void)0),(((PyUnicodeObject *
)(u))->length))
;
1229 if (w != NULL((void *)0)) {
1230 worig = w;
1231 wend = w + size;
1232 while (u != uend && w != wend) {
1233 ordinal = *u;
1234 if (ordinal > 0xffff) {
1235 ordinal -= 0x10000;
1236 *w++ = 0xD800 | (ordinal >> 10);
1237 *w++ = 0xDC00 | (ordinal & 0x3FF);
1238 }
1239 else
1240 *w++ = ordinal;
1241 u++;
1242 }
1243 if (w != wend)
1244 *w = 0;
1245 return w - worig;
1246 }
1247 else {
1248 nchar = 1; /* nul character */
1249 while (u != uend) {
1250 if (*u > 0xffff)
1251 nchar += 2;
1252 else
1253 nchar++;
1254 u++;
1255 }
1256 return nchar;
1257 }
1258#else
1259# error "unsupported wchar_t and Py_UNICODE sizes, see issue #8670"
1260#endif
1261}
1262
1263Py_ssize_t
1264PyUnicode_AsWideCharPyUnicodeUCS2_AsWideChar(PyObject *unicode,
1265 wchar_t *w,
1266 Py_ssize_t size)
1267{
1268 if (unicode == NULL((void *)0)) {
1269 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 1269);
1270 return -1;
1271 }
1272 return unicode_aswidechar((PyUnicodeObject*)unicode, w, size);
1273}
1274
1275wchar_t*
1276PyUnicode_AsWideCharStringPyUnicodeUCS2_AsWideCharString(PyObject *unicode,
1277 Py_ssize_t *size)
1278{
1279 wchar_t* buffer;
1280 Py_ssize_t buflen;
1281
1282 if (unicode == NULL((void *)0)) {
1283 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 1283);
1284 return NULL((void *)0);
1285 }
1286
1287 buflen = unicode_aswidechar((PyUnicodeObject *)unicode, NULL((void *)0), 0);
1288 if (PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) / sizeof(wchar_t) < buflen) {
1289 PyErr_NoMemory();
1290 return NULL((void *)0);
1291 }
1292
1293 buffer = PyMem_MALLOC_PyMem_DebugMalloc(buflen * sizeof(wchar_t));
1294 if (buffer == NULL((void *)0)) {
1295 PyErr_NoMemory();
1296 return NULL((void *)0);
1297 }
1298 buflen = unicode_aswidechar((PyUnicodeObject *)unicode, buffer, buflen);
1299 if (size != NULL((void *)0))
1300 *size = buflen;
1301 return buffer;
1302}
1303
1304#endif
1305
1306PyObject *PyUnicode_FromOrdinalPyUnicodeUCS2_FromOrdinal(int ordinal)
1307{
1308 Py_UNICODE s[2];
1309
1310 if (ordinal < 0 || ordinal > 0x10ffff) {
1311 PyErr_SetString(PyExc_ValueError,
1312 "chr() arg not in range(0x110000)");
1313 return NULL((void *)0);
1314 }
1315
1316#ifndef Py_UNICODE_WIDE
1317 if (ordinal > 0xffff) {
1318 ordinal -= 0x10000;
1319 s[0] = 0xD800 | (ordinal >> 10);
1320 s[1] = 0xDC00 | (ordinal & 0x3FF);
1321 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(s, 2);
1322 }
1323#endif
1324
1325 s[0] = (Py_UNICODE)ordinal;
1326 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(s, 1);
1327}
1328
1329PyObject *PyUnicode_FromObjectPyUnicodeUCS2_FromObject(register PyObject *obj)
1330{
1331 /* XXX Perhaps we should make this API an alias of
1332 PyObject_Str() instead ?! */
1333 if (PyUnicode_CheckExact(obj)((((PyObject*)(obj))->ob_type) == &PyUnicode_Type)) {
1334 Py_INCREF(obj)( _Py_RefTotal++ , ((PyObject*)(obj))->ob_refcnt++);
1335 return obj;
1336 }
1337 if (PyUnicode_Check(obj)((((((PyObject*)(obj))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
1338 /* For a Unicode subtype that's not a Unicode object,
1339 return a true Unicode object with the same data. */
1340 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(PyUnicode_AS_UNICODE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1340, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->str))
,
1341 PyUnicode_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1341, "PyUnicode_Check(obj)") : (
void)0),(((PyUnicodeObject *)(obj))->length))
);
1342 }
1343 PyErr_Format(PyExc_TypeError,
1344 "Can't convert '%.100s' object to str implicitly",
1345 Py_TYPE(obj)(((PyObject*)(obj))->ob_type)->tp_name);
1346 return NULL((void *)0);
1347}
1348
1349PyObject *PyUnicode_FromEncodedObjectPyUnicodeUCS2_FromEncodedObject(register PyObject *obj,
1350 const char *encoding,
1351 const char *errors)
1352{
1353 Py_buffer buffer;
1354 PyObject *v;
1355
1356 if (obj == NULL((void *)0)) {
1357 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 1357);
1358 return NULL((void *)0);
1359 }
1360
1361 /* Decoding bytes objects is the most common case and should be fast */
1362 if (PyBytes_Check(obj)((((((PyObject*)(obj))->ob_type))->tp_flags & ((1L<<
27))) != 0)
) {
1363 if (PyBytes_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1363, "PyBytes_Check(obj)") : (void
)0),(((PyVarObject*)(obj))->ob_size))
== 0) {
1364 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
1365 v = (PyObject *) unicode_empty;
1366 }
1367 else {
1368 v = PyUnicode_DecodePyUnicodeUCS2_Decode(
1369 PyBytes_AS_STRING(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1369, "PyBytes_Check(obj)") : (void
)0), (((PyBytesObject *)(obj))->ob_sval))
, PyBytes_GET_SIZE(obj)((__builtin_expect(!(((((((PyObject*)(obj))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1369, "PyBytes_Check(obj)") : (void
)0),(((PyVarObject*)(obj))->ob_size))
,
1370 encoding, errors);
1371 }
1372 return v;
1373 }
1374
1375 if (PyUnicode_Check(obj)((((((PyObject*)(obj))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
1376 PyErr_SetString(PyExc_TypeError,
1377 "decoding str is not supported");
1378 return NULL((void *)0);
1379 }
1380
1381 /* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
1382 if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE0) < 0) {
1383 PyErr_Format(PyExc_TypeError,
1384 "coercing to str: need bytes, bytearray "
1385 "or buffer-like object, %.80s found",
1386 Py_TYPE(obj)(((PyObject*)(obj))->ob_type)->tp_name);
1387 return NULL((void *)0);
1388 }
1389
1390 if (buffer.len == 0) {
1391 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
1392 v = (PyObject *) unicode_empty;
1393 }
1394 else
1395 v = PyUnicode_DecodePyUnicodeUCS2_Decode((char*) buffer.buf, buffer.len, encoding, errors);
1396
1397 PyBuffer_Release(&buffer);
1398 return v;
1399}
1400
1401/* Convert encoding to lower case and replace '_' with '-' in order to
1402 catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1),
1403 1 on success. */
1404static int
1405normalize_encoding(const char *encoding,
1406 char *lower,
1407 size_t lower_len)
1408{
1409 const char *e;
1410 char *l;
1411 char *l_end;
1412
1413 e = encoding;
1414 l = lower;
1415 l_end = &lower[lower_len - 1];
1416 while (*e) {
1417 if (l == l_end)
1418 return 0;
1419 if (Py_ISUPPER(*e)(_Py_ctype_table[((unsigned char)((*e) & 0xff))] & 0x02
)
) {
1420 *l++ = Py_TOLOWER(*e++)(_Py_ctype_tolower[((unsigned char)((*e++) & 0xff))]);
1421 }
1422 else if (*e == '_') {
1423 *l++ = '-';
1424 e++;
1425 }
1426 else {
1427 *l++ = *e++;
1428 }
1429 }
1430 *l = '\0';
1431 return 1;
1432}
1433
1434PyObject *PyUnicode_DecodePyUnicodeUCS2_Decode(const char *s,
1435 Py_ssize_t size,
1436 const char *encoding,
1437 const char *errors)
1438{
1439 PyObject *buffer = NULL((void *)0), *unicode;
1440 Py_buffer info;
1441 char lower[11]; /* Enough for any encoding shortcut */
1442
1443 if (encoding == NULL((void *)0))
1444 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1445
1446 /* Shortcuts for common default encodings */
1447 if (normalize_encoding(encoding, lower, sizeof(lower))) {
1448 if (strcmp(lower, "utf-8") == 0)
1449 return PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(s, size, errors);
1450 else if ((strcmp(lower, "latin-1") == 0) ||
1451 (strcmp(lower, "iso-8859-1") == 0))
1452 return PyUnicode_DecodeLatin1PyUnicodeUCS2_DecodeLatin1(s, size, errors);
1453#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1454 else if (strcmp(lower, "mbcs") == 0)
1455 return PyUnicode_DecodeMBCS(s, size, errors);
1456#endif
1457 else if (strcmp(lower, "ascii") == 0)
1458 return PyUnicode_DecodeASCIIPyUnicodeUCS2_DecodeASCII(s, size, errors);
1459 else if (strcmp(lower, "utf-16") == 0)
1460 return PyUnicode_DecodeUTF16PyUnicodeUCS2_DecodeUTF16(s, size, errors, 0);
1461 else if (strcmp(lower, "utf-32") == 0)
1462 return PyUnicode_DecodeUTF32PyUnicodeUCS2_DecodeUTF32(s, size, errors, 0);
1463 }
1464
1465 /* Decode via the codec registry */
1466 buffer = NULL((void *)0);
1467 if (PyBuffer_FillInfo(&info, NULL((void *)0), (void *)s, size, 1, PyBUF_FULL_RO((0x0100 | (0x0010 | 0x0008)) | 0x0004)) < 0)
1468 goto onError;
1469 buffer = PyMemoryView_FromBuffer(&info);
1470 if (buffer == NULL((void *)0))
1471 goto onError;
1472 unicode = PyCodec_Decode(buffer, encoding, errors);
1473 if (unicode == NULL((void *)0))
1474 goto onError;
1475 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1476 PyErr_Format(PyExc_TypeError,
1477 "decoder did not return a str object (type=%.400s)",
1478 Py_TYPE(unicode)(((PyObject*)(unicode))->ob_type)->tp_name);
1479 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1479, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
1480 goto onError;
1481 }
1482 Py_DECREF(buffer)do { if (_Py_RefTotal-- , --((PyObject*)(buffer))->ob_refcnt
!= 0) { if (((PyObject*)buffer)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1482, (PyObject *)(buffer)); } else
_Py_Dealloc((PyObject *)(buffer)); } while (0)
;
1483 return unicode;
1484
1485 onError:
1486 Py_XDECREF(buffer)do { if ((buffer) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(buffer))->ob_refcnt != 0) { if (((PyObject
*)buffer)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 1486, (PyObject *)(buffer)); } else _Py_Dealloc((PyObject *
)(buffer)); } while (0); } while (0)
;
1487 return NULL((void *)0);
1488}
1489
1490PyObject *PyUnicode_AsDecodedObjectPyUnicodeUCS2_AsDecodedObject(PyObject *unicode,
1491 const char *encoding,
1492 const char *errors)
1493{
1494 PyObject *v;
1495
1496 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1497 PyErr_BadArgument();
1498 goto onError;
1499 }
1500
1501 if (encoding == NULL((void *)0))
1502 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1503
1504 /* Decode via the codec registry */
1505 v = PyCodec_Decode(unicode, encoding, errors);
1506 if (v == NULL((void *)0))
1507 goto onError;
1508 return v;
1509
1510 onError:
1511 return NULL((void *)0);
1512}
1513
1514PyObject *PyUnicode_AsDecodedUnicodePyUnicodeUCS2_AsDecodedUnicode(PyObject *unicode,
1515 const char *encoding,
1516 const char *errors)
1517{
1518 PyObject *v;
1519
1520 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1521 PyErr_BadArgument();
1522 goto onError;
1523 }
1524
1525 if (encoding == NULL((void *)0))
1526 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1527
1528 /* Decode via the codec registry */
1529 v = PyCodec_Decode(unicode, encoding, errors);
1530 if (v == NULL((void *)0))
1531 goto onError;
1532 if (!PyUnicode_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
1533 PyErr_Format(PyExc_TypeError,
1534 "decoder did not return a str object (type=%.400s)",
1535 Py_TYPE(v)(((PyObject*)(v))->ob_type)->tp_name);
1536 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1536, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
1537 goto onError;
1538 }
1539 return v;
1540
1541 onError:
1542 return NULL((void *)0);
1543}
1544
1545PyObject *PyUnicode_EncodePyUnicodeUCS2_Encode(const Py_UNICODE *s,
1546 Py_ssize_t size,
1547 const char *encoding,
1548 const char *errors)
1549{
1550 PyObject *v, *unicode;
1551
1552 unicode = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(s, size);
1553 if (unicode == NULL((void *)0))
1554 return NULL((void *)0);
1555 v = PyUnicode_AsEncodedStringPyUnicodeUCS2_AsEncodedString(unicode, encoding, errors);
1556 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1556, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
1557 return v;
1558}
1559
1560PyObject *PyUnicode_AsEncodedObjectPyUnicodeUCS2_AsEncodedObject(PyObject *unicode,
1561 const char *encoding,
1562 const char *errors)
1563{
1564 PyObject *v;
1565
1566 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1567 PyErr_BadArgument();
1568 goto onError;
1569 }
1570
1571 if (encoding == NULL((void *)0))
1572 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1573
1574 /* Encode via the codec registry */
1575 v = PyCodec_Encode(unicode, encoding, errors);
1576 if (v == NULL((void *)0))
1577 goto onError;
1578 return v;
1579
1580 onError:
1581 return NULL((void *)0);
1582}
1583
1584PyObject *
1585PyUnicode_EncodeFSDefault(PyObject *unicode)
1586{
1587#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1588 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1588, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1589 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1589, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1590 NULL((void *)0));
1591#elif defined(__APPLE__1)
1592 return PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1592, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1593 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1593, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1594 "surrogateescape");
1595#else
1596 if (Py_FileSystemDefaultEncoding) {
1597 return PyUnicode_AsEncodedStringPyUnicodeUCS2_AsEncodedString(unicode,
1598 Py_FileSystemDefaultEncoding,
1599 "surrogateescape");
1600 }
1601 else {
1602 /* locale encoding with surrogateescape */
1603 wchar_t *wchar;
1604 char *bytes;
1605 PyObject *bytes_obj;
1606 size_t error_pos;
1607
1608 wchar = PyUnicode_AsWideCharStringPyUnicodeUCS2_AsWideCharString(unicode, NULL((void *)0));
1609 if (wchar == NULL((void *)0))
1610 return NULL((void *)0);
1611 bytes = _Py_wchar2char(wchar, &error_pos);
1612 if (bytes == NULL((void *)0)) {
1613 if (error_pos != (size_t)-1) {
1614 char *errmsg = strerror(errno(*__error()));
1615 PyObject *exc = NULL((void *)0);
1616 if (errmsg == NULL((void *)0))
1617 errmsg = "Py_wchar2char() failed";
1618 raise_encode_exception(&exc,
1619 "filesystemencoding",
1620 PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1620, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
, PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1620, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1621 error_pos, error_pos+1,
1622 errmsg);
1623 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 1623, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
1624 }
1625 else
1626 PyErr_NoMemory();
1627 PyMem_Free(wchar);
1628 return NULL((void *)0);
1629 }
1630 PyMem_Free(wchar);
1631
1632 bytes_obj = PyBytes_FromString(bytes);
1633 PyMem_Free(bytes);
1634 return bytes_obj;
1635 }
1636#endif
1637}
1638
1639PyObject *PyUnicode_AsEncodedStringPyUnicodeUCS2_AsEncodedString(PyObject *unicode,
1640 const char *encoding,
1641 const char *errors)
1642{
1643 PyObject *v;
1644 char lower[11]; /* Enough for any encoding shortcut */
1645
1646 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1647 PyErr_BadArgument();
1648 return NULL((void *)0);
1649 }
1650
1651 if (encoding == NULL((void *)0))
1652 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1653
1654 /* Shortcuts for common default encodings */
1655 if (normalize_encoding(encoding, lower, sizeof(lower))) {
1656 if (strcmp(lower, "utf-8") == 0)
1657 return PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1657, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1658 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1658, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1659 errors);
1660 else if ((strcmp(lower, "latin-1") == 0) ||
1661 (strcmp(lower, "iso-8859-1") == 0))
1662 return PyUnicode_EncodeLatin1PyUnicodeUCS2_EncodeLatin1(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1662, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1663 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1663, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1664 errors);
1665#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1666 else if (strcmp(lower, "mbcs") == 0)
1667 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1667, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1668 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1668, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1669 errors);
1670#endif
1671 else if (strcmp(lower, "ascii") == 0)
1672 return PyUnicode_EncodeASCIIPyUnicodeUCS2_EncodeASCII(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1672, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1673 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1673, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1674 errors);
1675 }
1676 /* During bootstrap, we may need to find the encodings
1677 package, to load the file system encoding, and require the
1678 file system encoding in order to load the encodings
1679 package.
1680
1681 Break out of this dependency by assuming that the path to
1682 the encodings module is ASCII-only. XXX could try wcstombs
1683 instead, if the file system encoding is the locale's
1684 encoding. */
1685 if (Py_FileSystemDefaultEncoding &&
1686 strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 &&
1687 !PyThreadState_GET()PyThreadState_Get()->interp->codecs_initialized)
1688 return PyUnicode_EncodeASCIIPyUnicodeUCS2_EncodeASCII(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1688, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1689 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1689, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1690 errors);
1691
1692 /* Encode via the codec registry */
1693 v = PyCodec_Encode(unicode, encoding, errors);
1694 if (v == NULL((void *)0))
1695 return NULL((void *)0);
1696
1697 /* The normal path */
1698 if (PyBytes_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
27))) != 0)
)
1699 return v;
1700
1701 /* If the codec returns a buffer, raise a warning and convert to bytes */
1702 if (PyByteArray_Check(v)((((PyObject*)(v))->ob_type) == (&PyByteArray_Type) ||
PyType_IsSubtype((((PyObject*)(v))->ob_type), (&PyByteArray_Type
)))
) {
1703 int error;
1704 PyObject *b;
1705
1706 error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
1707 "encoder %s returned bytearray instead of bytes",
1708 encoding);
1709 if (error) {
1710 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1710, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
1711 return NULL((void *)0);
1712 }
1713
1714 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v)((__builtin_expect(!(((((PyObject*)(v))->ob_type) == (&
PyByteArray_Type) || PyType_IsSubtype((((PyObject*)(v))->ob_type
), (&PyByteArray_Type)))), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 1714, "PyByteArray_Check(v)") : (void)0), (((PyVarObject*)(
v))->ob_size) ? ((PyByteArrayObject *)(v))->ob_bytes : _PyByteArray_empty_string
)
, Py_SIZE(v)(((PyVarObject*)(v))->ob_size));
1715 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1715, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
1716 return b;
1717 }
1718
1719 PyErr_Format(PyExc_TypeError,
1720 "encoder did not return a bytes object (type=%.400s)",
1721 Py_TYPE(v)(((PyObject*)(v))->ob_type)->tp_name);
1722 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1722, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
1723 return NULL((void *)0);
1724}
1725
1726PyObject *PyUnicode_AsEncodedUnicodePyUnicodeUCS2_AsEncodedUnicode(PyObject *unicode,
1727 const char *encoding,
1728 const char *errors)
1729{
1730 PyObject *v;
1731
1732 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1733 PyErr_BadArgument();
1734 goto onError;
1735 }
1736
1737 if (encoding == NULL((void *)0))
1738 encoding = PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding();
1739
1740 /* Encode via the codec registry */
1741 v = PyCodec_Encode(unicode, encoding, errors);
1742 if (v == NULL((void *)0))
1743 goto onError;
1744 if (!PyUnicode_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
1745 PyErr_Format(PyExc_TypeError,
1746 "encoder did not return an str object (type=%.400s)",
1747 Py_TYPE(v)(((PyObject*)(v))->ob_type)->tp_name);
1748 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1748, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
1749 goto onError;
1750 }
1751 return v;
1752
1753 onError:
1754 return NULL((void *)0);
1755}
1756
1757PyObject *_PyUnicode_AsDefaultEncodedString_PyUnicodeUCS2_AsDefaultEncodedString(PyObject *unicode,
1758 const char *errors)
1759{
1760 PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
1761 if (v)
1762 return v;
1763 if (errors != NULL((void *)0))
1764 Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString");
1765 v = PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1765, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
1766 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1766, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
1767 NULL((void *)0));
1768 if (!v)
1769 return NULL((void *)0);
1770 ((PyUnicodeObject *)unicode)->defenc = v;
1771 return v;
1772}
1773
1774PyObject*
1775PyUnicode_DecodeFSDefaultPyUnicodeUCS2_DecodeFSDefault(const char *s) {
1776 Py_ssize_t size = (Py_ssize_t)strlen(s);
1777 return PyUnicode_DecodeFSDefaultAndSizePyUnicodeUCS2_DecodeFSDefaultAndSize(s, size);
1778}
1779
1780PyObject*
1781PyUnicode_DecodeFSDefaultAndSizePyUnicodeUCS2_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
1782{
1783#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1784 return PyUnicode_DecodeMBCS(s, size, NULL((void *)0));
1785#elif defined(__APPLE__1)
1786 return PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(s, size, "surrogateescape");
1787#else
1788 /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
1789 can be undefined. If it is case, decode using UTF-8. The following assumes
1790 that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
1791 bootstrapping process where the codecs aren't ready yet.
1792 */
1793 if (Py_FileSystemDefaultEncoding) {
1794 return PyUnicode_DecodePyUnicodeUCS2_Decode(s, size,
1795 Py_FileSystemDefaultEncoding,
1796 "surrogateescape");
1797 }
1798 else {
1799 /* locale encoding with surrogateescape */
1800 wchar_t *wchar;
1801 PyObject *unicode;
1802 size_t len;
1803
1804 if (s[size] != '\0' || size != strlen(s)) {
1805 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
1806 return NULL((void *)0);
1807 }
1808
1809 wchar = _Py_char2wchar(s, &len);
1810 if (wchar == NULL((void *)0))
1811 return PyErr_NoMemory();
1812
1813 unicode = PyUnicode_FromWideCharPyUnicodeUCS2_FromWideChar(wchar, len);
1814 PyMem_Free(wchar);
1815 return unicode;
1816 }
1817#endif
1818}
1819
1820
1821int
1822PyUnicode_FSConverterPyUnicodeUCS2_FSConverter(PyObject* arg, void* addr)
1823{
1824 PyObject *output = NULL((void *)0);
1825 Py_ssize_t size;
1826 void *data;
1827 if (arg == NULL((void *)0)) {
1828 Py_DECREF(*(PyObject**)addr)do { if (_Py_RefTotal-- , --((PyObject*)(*(PyObject**)addr))->
ob_refcnt != 0) { if (((PyObject*)*(PyObject**)addr)->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 1828
, (PyObject *)(*(PyObject**)addr)); } else _Py_Dealloc((PyObject
*)(*(PyObject**)addr)); } while (0)
;
1829 return 1;
1830 }
1831 if (PyBytes_Check(arg)((((((PyObject*)(arg))->ob_type))->tp_flags & ((1L<<
27))) != 0)
) {
1832 output = arg;
1833 Py_INCREF(output)( _Py_RefTotal++ , ((PyObject*)(output))->ob_refcnt++);
1834 }
1835 else {
1836 arg = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(arg);
1837 if (!arg)
1838 return 0;
1839 output = PyUnicode_EncodeFSDefault(arg);
1840 Py_DECREF(arg)do { if (_Py_RefTotal-- , --((PyObject*)(arg))->ob_refcnt !=
0) { if (((PyObject*)arg)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1840, (PyObject *)(arg)); } else _Py_Dealloc
((PyObject *)(arg)); } while (0)
;
1841 if (!output)
1842 return 0;
1843 if (!PyBytes_Check(output)((((((PyObject*)(output))->ob_type))->tp_flags & ((
1L<<27))) != 0)
) {
1844 Py_DECREF(output)do { if (_Py_RefTotal-- , --((PyObject*)(output))->ob_refcnt
!= 0) { if (((PyObject*)output)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1844, (PyObject *)(output)); } else
_Py_Dealloc((PyObject *)(output)); } while (0)
;
1845 PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
1846 return 0;
1847 }
1848 }
1849 size = PyBytes_GET_SIZE(output)((__builtin_expect(!(((((((PyObject*)(output))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1849, "PyBytes_Check(output)") :
(void)0),(((PyVarObject*)(output))->ob_size))
;
1850 data = PyBytes_AS_STRING(output)((__builtin_expect(!(((((((PyObject*)(output))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1850, "PyBytes_Check(output)") :
(void)0), (((PyBytesObject *)(output))->ob_sval))
;
1851 if (size != strlen(data)) {
1852 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
1853 Py_DECREF(output)do { if (_Py_RefTotal-- , --((PyObject*)(output))->ob_refcnt
!= 0) { if (((PyObject*)output)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1853, (PyObject *)(output)); } else
_Py_Dealloc((PyObject *)(output)); } while (0)
;
1854 return 0;
1855 }
1856 *(PyObject**)addr = output;
1857 return Py_CLEANUP_SUPPORTED0x20000;
1858}
1859
1860
1861int
1862PyUnicode_FSDecoderPyUnicodeUCS2_FSDecoder(PyObject* arg, void* addr)
1863{
1864 PyObject *output = NULL((void *)0);
1865 Py_ssize_t size;
1866 void *data;
1867 if (arg == NULL((void *)0)) {
1868 Py_DECREF(*(PyObject**)addr)do { if (_Py_RefTotal-- , --((PyObject*)(*(PyObject**)addr))->
ob_refcnt != 0) { if (((PyObject*)*(PyObject**)addr)->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 1868
, (PyObject *)(*(PyObject**)addr)); } else _Py_Dealloc((PyObject
*)(*(PyObject**)addr)); } while (0)
;
1869 return 1;
1870 }
1871 if (PyUnicode_Check(arg)((((((PyObject*)(arg))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
1872 output = arg;
1873 Py_INCREF(output)( _Py_RefTotal++ , ((PyObject*)(output))->ob_refcnt++);
1874 }
1875 else {
1876 arg = PyBytes_FromObject(arg);
1877 if (!arg)
1878 return 0;
1879 output = PyUnicode_DecodeFSDefaultAndSizePyUnicodeUCS2_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg)((__builtin_expect(!(((((((PyObject*)(arg))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1879, "PyBytes_Check(arg)") : (void
)0), (((PyBytesObject *)(arg))->ob_sval))
,
1880 PyBytes_GET_SIZE(arg)((__builtin_expect(!(((((((PyObject*)(arg))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1880, "PyBytes_Check(arg)") : (void
)0),(((PyVarObject*)(arg))->ob_size))
);
1881 Py_DECREF(arg)do { if (_Py_RefTotal-- , --((PyObject*)(arg))->ob_refcnt !=
0) { if (((PyObject*)arg)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1881, (PyObject *)(arg)); } else _Py_Dealloc
((PyObject *)(arg)); } while (0)
;
1882 if (!output)
1883 return 0;
1884 if (!PyUnicode_Check(output)((((((PyObject*)(output))->ob_type))->tp_flags & ((
1L<<28))) != 0)
) {
1885 Py_DECREF(output)do { if (_Py_RefTotal-- , --((PyObject*)(output))->ob_refcnt
!= 0) { if (((PyObject*)output)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1885, (PyObject *)(output)); } else
_Py_Dealloc((PyObject *)(output)); } while (0)
;
1886 PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode");
1887 return 0;
1888 }
1889 }
1890 size = PyUnicode_GET_SIZE(output)((__builtin_expect(!(((((((PyObject*)(output))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1890, "PyUnicode_Check(output)")
: (void)0),(((PyUnicodeObject *)(output))->length))
;
1891 data = PyUnicode_AS_UNICODE(output)((__builtin_expect(!(((((((PyObject*)(output))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1891, "PyUnicode_Check(output)")
: (void)0),(((PyUnicodeObject *)(output))->str))
;
1892 if (size != Py_UNICODE_strlen(data)) {
1893 PyErr_SetString(PyExc_TypeError, "embedded NUL character");
1894 Py_DECREF(output)do { if (_Py_RefTotal-- , --((PyObject*)(output))->ob_refcnt
!= 0) { if (((PyObject*)output)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 1894, (PyObject *)(output)); } else
_Py_Dealloc((PyObject *)(output)); } while (0)
;
1895 return 0;
1896 }
1897 *(PyObject**)addr = output;
1898 return Py_CLEANUP_SUPPORTED0x20000;
1899}
1900
1901
1902char*
1903_PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
1904{
1905 PyObject *bytes;
1906 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1907 PyErr_BadArgument();
1908 return NULL((void *)0);
1909 }
1910 bytes = _PyUnicode_AsDefaultEncodedString_PyUnicodeUCS2_AsDefaultEncodedString(unicode, NULL((void *)0));
1911 if (bytes == NULL((void *)0))
1912 return NULL((void *)0);
1913 if (psize != NULL((void *)0))
1914 *psize = PyBytes_GET_SIZE(bytes)((__builtin_expect(!(((((((PyObject*)(bytes))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1914, "PyBytes_Check(bytes)") : (
void)0),(((PyVarObject*)(bytes))->ob_size))
;
1915 return PyBytes_AS_STRING(bytes)((__builtin_expect(!(((((((PyObject*)(bytes))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 1915, "PyBytes_Check(bytes)") : (
void)0), (((PyBytesObject *)(bytes))->ob_sval))
;
1916}
1917
1918char*
1919_PyUnicode_AsString(PyObject *unicode)
1920{
1921 return _PyUnicode_AsStringAndSize(unicode, NULL((void *)0));
1922}
1923
1924Py_UNICODE *PyUnicode_AsUnicodePyUnicodeUCS2_AsUnicode(PyObject *unicode)
1925{
1926 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1927 PyErr_BadArgument();
1928 goto onError;
1929 }
1930 return PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1930, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
1931
1932 onError:
1933 return NULL((void *)0);
1934}
1935
1936Py_ssize_t PyUnicode_GetSizePyUnicodeUCS2_GetSize(PyObject *unicode)
1937{
1938 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
1939 PyErr_BadArgument();
1940 goto onError;
1941 }
1942 return PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1942, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
;
1943
1944 onError:
1945 return -1;
1946}
1947
1948const char *PyUnicode_GetDefaultEncodingPyUnicodeUCS2_GetDefaultEncoding(void)
1949{
1950 return "utf-8";
1951}
1952
1953/* create or adjust a UnicodeDecodeError */
1954static void
1955make_decode_exception(PyObject **exceptionObject,
1956 const char *encoding,
1957 const char *input, Py_ssize_t length,
1958 Py_ssize_t startpos, Py_ssize_t endpos,
1959 const char *reason)
1960{
1961 if (*exceptionObject == NULL((void *)0)) {
1962 *exceptionObject = PyUnicodeDecodeError_Create(
1963 encoding, input, length, startpos, endpos, reason);
1964 }
1965 else {
1966 if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos))
1967 goto onError;
1968 if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos))
1969 goto onError;
1970 if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
1971 goto onError;
1972 }
1973 return;
1974
1975onError:
1976 Py_DECREF(*exceptionObject)do { if (_Py_RefTotal-- , --((PyObject*)(*exceptionObject))->
ob_refcnt != 0) { if (((PyObject*)*exceptionObject)->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 1976
, (PyObject *)(*exceptionObject)); } else _Py_Dealloc((PyObject
*)(*exceptionObject)); } while (0)
;
1977 *exceptionObject = NULL((void *)0);
1978}
1979
1980/* error handling callback helper:
1981 build arguments, call the callback and check the arguments,
1982 if no exception occurred, copy the replacement to the output
1983 and adjust various state variables.
1984 return 0 on success, -1 on error
1985*/
1986
1987static
1988int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
1989 const char *encoding, const char *reason,
1990 const char **input, const char **inend, Py_ssize_t *startinpos,
1991 Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
1992 PyUnicodeObject **output, Py_ssize_t *outpos, Py_UNICODE **outptr)
1993{
1994 static char *argparse = "O!n;decoding error handler must return (str, int) tuple";
1995
1996 PyObject *restuple = NULL((void *)0);
1997 PyObject *repunicode = NULL((void *)0);
1998 Py_ssize_t outsize = PyUnicode_GET_SIZE(*output)((__builtin_expect(!(((((((PyObject*)(*output))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 1998, "PyUnicode_Check(*output)"
) : (void)0),(((PyUnicodeObject *)(*output))->length))
;
1999 Py_ssize_t insize;
2000 Py_ssize_t requiredsize;
2001 Py_ssize_t newpos;
2002 Py_UNICODE *repptr;
2003 PyObject *inputobj = NULL((void *)0);
2004 Py_ssize_t repsize;
2005 int res = -1;
2006
2007 if (*errorHandler == NULL((void *)0)) {
2008 *errorHandler = PyCodec_LookupError(errors);
2009 if (*errorHandler == NULL((void *)0))
2010 goto onError;
2011 }
2012
2013 make_decode_exception(exceptionObject,
2014 encoding,
2015 *input, *inend - *input,
2016 *startinpos, *endinpos,
2017 reason);
2018 if (*exceptionObject == NULL((void *)0))
2019 goto onError;
2020
2021 restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL((void *)0));
2022 if (restuple == NULL((void *)0))
2023 goto onError;
2024 if (!PyTuple_Check(restuple)((((((PyObject*)(restuple))->ob_type))->tp_flags & (
(1L<<26))) != 0)
) {
2025 PyErr_SetString(PyExc_TypeError, &argparse[4]);
2026 goto onError;
2027 }
2028 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
2029 goto onError;
2030
2031 /* Copy back the bytes variables, which might have been modified by the
2032 callback */
2033 inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
2034 if (!inputobj)
2035 goto onError;
2036 if (!PyBytes_Check(inputobj)((((((PyObject*)(inputobj))->ob_type))->tp_flags & (
(1L<<27))) != 0)
) {
2037 PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
2038 }
2039 *input = PyBytes_AS_STRING(inputobj)((__builtin_expect(!(((((((PyObject*)(inputobj))->ob_type)
)->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2039, "PyBytes_Check(inputobj)"
) : (void)0), (((PyBytesObject *)(inputobj))->ob_sval))
;
2040 insize = PyBytes_GET_SIZE(inputobj)((__builtin_expect(!(((((((PyObject*)(inputobj))->ob_type)
)->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2040, "PyBytes_Check(inputobj)"
) : (void)0),(((PyVarObject*)(inputobj))->ob_size))
;
2041 *inend = *input + insize;
2042 /* we can DECREF safely, as the exception has another reference,
2043 so the object won't go away. */
2044 Py_DECREF(inputobj)do { if (_Py_RefTotal-- , --((PyObject*)(inputobj))->ob_refcnt
!= 0) { if (((PyObject*)inputobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2044, (PyObject *)(inputobj)); } else
_Py_Dealloc((PyObject *)(inputobj)); } while (0)
;
2045
2046 if (newpos<0)
2047 newpos = insize+newpos;
2048 if (newpos<0 || newpos>insize) {
2049 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
2050 goto onError;
2051 }
2052
2053 /* need more space? (at least enough for what we
2054 have+the replacement+the rest of the string (starting
2055 at the new input position), so we won't have to check space
2056 when there are no errors in the rest of the string) */
2057 repptr = PyUnicode_AS_UNICODE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2057, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->str))
;
2058 repsize = PyUnicode_GET_SIZE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2058, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->length))
;
2059 requiredsize = *outpos + repsize + insize-newpos;
2060 if (requiredsize > outsize) {
2061 if (requiredsize<2*outsize)
2062 requiredsize = 2*outsize;
2063 if (_PyUnicode_Resize(output, requiredsize) < 0)
2064 goto onError;
2065 *outptr = PyUnicode_AS_UNICODE(*output)((__builtin_expect(!(((((((PyObject*)(*output))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2065, "PyUnicode_Check(*output)"
) : (void)0),(((PyUnicodeObject *)(*output))->str))
+ *outpos;
2066 }
2067 *endinpos = newpos;
2068 *inptr = *input + newpos;
2069 Py_UNICODE_COPY(*outptr, repptr, repsize)((__builtin_object_size ((*outptr), 0) != (size_t) -1) ? __builtin___memcpy_chk
((*outptr), (repptr), (repsize)*sizeof(Py_UNICODE), __builtin_object_size
((*outptr), 0)) : __inline_memcpy_chk ((*outptr), (repptr), (
repsize)*sizeof(Py_UNICODE)))
;
2070 *outptr += repsize;
2071 *outpos += repsize;
2072
2073 /* we made it! */
2074 res = 0;
2075
2076 onError:
2077 Py_XDECREF(restuple)do { if ((restuple) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(restuple))->ob_refcnt != 0) { if (((PyObject
*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 2077, (PyObject *)(restuple)); } else _Py_Dealloc((PyObject
*)(restuple)); } while (0); } while (0)
;
2078 return res;
2079}
2080
2081/* --- UTF-7 Codec -------------------------------------------------------- */
2082
2083/* See RFC2152 for details. We encode conservatively and decode liberally. */
2084
2085/* Three simple macros defining base-64. */
2086
2087/* Is c a base-64 character? */
2088
2089#define IS_BASE64(c) \
2090 (((c) >= 'A' && (c) <= 'Z') || \
2091 ((c) >= 'a' && (c) <= 'z') || \
2092 ((c) >= '0' && (c) <= '9') || \
2093 (c) == '+' || (c) == '/')
2094
2095/* given that c is a base-64 character, what is its base-64 value? */
2096
2097#define FROM_BASE64(c) \
2098 (((c) >= 'A' && (c) <= 'Z') ? (c) - 'A' : \
2099 ((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 26 : \
2100 ((c) >= '0' && (c) <= '9') ? (c) - '0' + 52 : \
2101 (c) == '+' ? 62 : 63)
2102
2103/* What is the base-64 character of the bottom 6 bits of n? */
2104
2105#define TO_BASE64(n) \
2106 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
2107
2108/* DECODE_DIRECT: this byte encountered in a UTF-7 string should be
2109 * decoded as itself. We are permissive on decoding; the only ASCII
2110 * byte not decoding to itself is the + which begins a base64
2111 * string. */
2112
2113#define DECODE_DIRECT(c) \
2114 ((c) <= 127 && (c) != '+')
2115
2116/* The UTF-7 encoder treats ASCII characters differently according to
2117 * whether they are Set D, Set O, Whitespace, or special (i.e. none of
2118 * the above). See RFC2152. This array identifies these different
2119 * sets:
2120 * 0 : "Set D"
2121 * alphanumeric and '(),-./:?
2122 * 1 : "Set O"
2123 * !"#$%&*;<=>@[]^_`{|}
2124 * 2 : "whitespace"
2125 * ht nl cr sp
2126 * 3 : special (must be base64 encoded)
2127 * everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127)
2128 */
2129
2130static
2131char utf7_category[128] = {
2132/* nul soh stx etx eot enq ack bel bs ht nl vt np cr so si */
2133 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
2134/* dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us */
2135 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2136/* sp ! " # $ % & ' ( ) * + , - . / */
2137 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0,
2138/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
2139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
2140/* @ A B C D E F G H I J K L M N O */
2141 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2142/* P Q R S T U V W X Y Z [ \ ] ^ _ */
2143 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
2144/* ` a b c d e f g h i j k l m n o */
2145 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2146/* p q r s t u v w x y z { | } ~ del */
2147 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3,
2148};
2149
2150/* ENCODE_DIRECT: this character should be encoded as itself. The
2151 * answer depends on whether we are encoding set O as itself, and also
2152 * on whether we are encoding whitespace as itself. RFC2152 makes it
2153 * clear that the answers to these questions vary between
2154 * applications, so this code needs to be flexible. */
2155
2156#define ENCODE_DIRECT(c, directO, directWS) \
2157 ((c) < 128 && (c) > 0 && \
2158 ((utf7_category[(c)] == 0) || \
2159 (directWS && (utf7_category[(c)] == 2)) || \
2160 (directO && (utf7_category[(c)] == 1))))
2161
2162PyObject *PyUnicode_DecodeUTF7(const char *s,
2163 Py_ssize_t size,
2164 const char *errors)
2165{
2166 return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL((void *)0));
2167}
2168
2169/* The decoder. The only state we preserve is our read position,
2170 * i.e. how many characters we have consumed. So if we end in the
2171 * middle of a shift sequence we have to back off the read position
2172 * and the output to the beginning of the sequence, otherwise we lose
2173 * all the shift state (seen bits, number of bits seen, high
2174 * surrogate). */
2175
2176PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
2177 Py_ssize_t size,
2178 const char *errors,
2179 Py_ssize_t *consumed)
2180{
2181 const char *starts = s;
2182 Py_ssize_t startinpos;
2183 Py_ssize_t endinpos;
2184 Py_ssize_t outpos;
2185 const char *e;
2186 PyUnicodeObject *unicode;
2187 Py_UNICODE *p;
2188 const char *errmsg = "";
2189 int inShift = 0;
2190 Py_UNICODE *shiftOutStart;
2191 unsigned int base64bits = 0;
2192 unsigned long base64buffer = 0;
2193 Py_UNICODE surrogate = 0;
2194 PyObject *errorHandler = NULL((void *)0);
2195 PyObject *exc = NULL((void *)0);
2196
2197 unicode = _PyUnicode_New(size);
2198 if (!unicode)
2199 return NULL((void *)0);
2200 if (size == 0) {
2201 if (consumed)
2202 *consumed = 0;
2203 return (PyObject *)unicode;
2204 }
2205
2206 p = unicode->str;
2207 shiftOutStart = p;
2208 e = s + size;
2209
2210 while (s < e) {
2211 Py_UNICODE ch;
2212 restart:
2213 ch = (unsigned char) *s;
2214
2215 if (inShift) { /* in a base-64 section */
2216 if (IS_BASE64(ch)) { /* consume a base-64 character */
2217 base64buffer = (base64buffer << 6) | FROM_BASE64(ch);
2218 base64bits += 6;
2219 s++;
2220 if (base64bits >= 16) {
2221 /* we have enough bits for a UTF-16 value */
2222 Py_UNICODE outCh = (Py_UNICODE)
2223 (base64buffer >> (base64bits-16));
2224 base64bits -= 16;
2225 base64buffer &= (1 << base64bits) - 1; /* clear high bits */
2226 if (surrogate) {
2227 /* expecting a second surrogate */
2228 if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
2229#ifdef Py_UNICODE_WIDE
2230 *p++ = (((surrogate & 0x3FF)<<10)
2231 | (outCh & 0x3FF)) + 0x10000;
2232#else
2233 *p++ = surrogate;
2234 *p++ = outCh;
2235#endif
2236 surrogate = 0;
2237 }
2238 else {
2239 surrogate = 0;
2240 errmsg = "second surrogate missing";
2241 goto utf7Error;
2242 }
2243 }
2244 else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
2245 /* first surrogate */
2246 surrogate = outCh;
2247 }
2248 else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
2249 errmsg = "unexpected second surrogate";
2250 goto utf7Error;
2251 }
2252 else {
2253 *p++ = outCh;
2254 }
2255 }
2256 }
2257 else { /* now leaving a base-64 section */
2258 inShift = 0;
2259 s++;
2260 if (surrogate) {
2261 errmsg = "second surrogate missing at end of shift sequence";
2262 goto utf7Error;
2263 }
2264 if (base64bits > 0) { /* left-over bits */
2265 if (base64bits >= 6) {
2266 /* We've seen at least one base-64 character */
2267 errmsg = "partial character in shift sequence";
2268 goto utf7Error;
2269 }
2270 else {
2271 /* Some bits remain; they should be zero */
2272 if (base64buffer != 0) {
2273 errmsg = "non-zero padding bits in shift sequence";
2274 goto utf7Error;
2275 }
2276 }
2277 }
2278 if (ch != '-') {
2279 /* '-' is absorbed; other terminating
2280 characters are preserved */
2281 *p++ = ch;
2282 }
2283 }
2284 }
2285 else if ( ch == '+' ) {
2286 startinpos = s-starts;
2287 s++; /* consume '+' */
2288 if (s < e && *s == '-') { /* '+-' encodes '+' */
2289 s++;
2290 *p++ = '+';
2291 }
2292 else { /* begin base64-encoded section */
2293 inShift = 1;
2294 shiftOutStart = p;
2295 base64bits = 0;
2296 }
2297 }
2298 else if (DECODE_DIRECT(ch)) { /* character decodes as itself */
2299 *p++ = ch;
2300 s++;
2301 }
2302 else {
2303 startinpos = s-starts;
2304 s++;
2305 errmsg = "unexpected special character";
2306 goto utf7Error;
2307 }
2308 continue;
2309utf7Error:
2310 outpos = p-PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2310, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
2311 endinpos = s-starts;
2312 if (unicode_decode_call_errorhandler(
2313 errors, &errorHandler,
2314 "utf7", errmsg,
2315 &starts, &e, &startinpos, &endinpos, &exc, &s,
2316 &unicode, &outpos, &p))
2317 goto onError;
2318 }
2319
2320 /* end of string */
2321
2322 if (inShift && !consumed) { /* in shift sequence, no more to follow */
2323 /* if we're in an inconsistent state, that's an error */
2324 if (surrogate ||
2325 (base64bits >= 6) ||
2326 (base64bits > 0 && base64buffer != 0)) {
2327 outpos = p-PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2327, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
2328 endinpos = size;
2329 if (unicode_decode_call_errorhandler(
2330 errors, &errorHandler,
2331 "utf7", "unterminated shift sequence",
2332 &starts, &e, &startinpos, &endinpos, &exc, &s,
2333 &unicode, &outpos, &p))
2334 goto onError;
2335 if (s < e)
2336 goto restart;
2337 }
2338 }
2339
2340 /* return state */
2341 if (consumed) {
2342 if (inShift) {
2343 p = shiftOutStart; /* back off output */
2344 *consumed = startinpos;
2345 }
2346 else {
2347 *consumed = s-starts;
2348 }
2349 }
2350
2351 if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2351, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
) < 0)
2352 goto onError;
2353
2354 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2354, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
2355 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 2355, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
2356 return (PyObject *)unicode;
2357
2358 onError:
2359 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2359, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
2360 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 2360, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
2361 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2361, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
2362 return NULL((void *)0);
2363}
2364
2365
2366PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s,
2367 Py_ssize_t size,
2368 int base64SetO,
2369 int base64WhiteSpace,
2370 const char *errors)
2371{
2372 PyObject *v;
2373 /* It might be possible to tighten this worst case */
2374 Py_ssize_t allocated = 8 * size;
2375 int inShift = 0;
2376 Py_ssize_t i = 0;
2377 unsigned int base64bits = 0;
2378 unsigned long base64buffer = 0;
2379 char * out;
2380 char * start;
2381
2382 if (size == 0)
2383 return PyBytes_FromStringAndSize(NULL((void *)0), 0);
2384
2385 if (allocated / 8 != size)
2386 return PyErr_NoMemory();
2387
2388 v = PyBytes_FromStringAndSize(NULL((void *)0), allocated);
2389 if (v == NULL((void *)0))
2390 return NULL((void *)0);
2391
2392 start = out = PyBytes_AS_STRING(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 2392, "PyBytes_Check(v)") : (void)0), (((PyBytesObject *)(v
))->ob_sval))
;
2393 for (;i < size; ++i) {
2394 Py_UNICODE ch = s[i];
2395
2396 if (inShift) {
2397 if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
2398 /* shifting out */
2399 if (base64bits) { /* output remaining bits */
2400 *out++ = TO_BASE64(base64buffer << (6-base64bits));
2401 base64buffer = 0;
2402 base64bits = 0;
2403 }
2404 inShift = 0;
2405 /* Characters not in the BASE64 set implicitly unshift the sequence
2406 so no '-' is required, except if the character is itself a '-' */
2407 if (IS_BASE64(ch) || ch == '-') {
2408 *out++ = '-';
2409 }
2410 *out++ = (char) ch;
2411 }
2412 else {
2413 goto encode_char;
2414 }
2415 }
2416 else { /* not in a shift sequence */
2417 if (ch == '+') {
2418 *out++ = '+';
2419 *out++ = '-';
2420 }
2421 else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) {
2422 *out++ = (char) ch;
2423 }
2424 else {
2425 *out++ = '+';
2426 inShift = 1;
2427 goto encode_char;
2428 }
2429 }
2430 continue;
2431encode_char:
2432#ifdef Py_UNICODE_WIDE
2433 if (ch >= 0x10000) {
2434 /* code first surrogate */
2435 base64bits += 16;
2436 base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
2437 while (base64bits >= 6) {
2438 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
2439 base64bits -= 6;
2440 }
2441 /* prepare second surrogate */
2442 ch = 0xDC00 | ((ch-0x10000) & 0x3FF);
2443 }
2444#endif
2445 base64bits += 16;
2446 base64buffer = (base64buffer << 16) | ch;
2447 while (base64bits >= 6) {
2448 *out++ = TO_BASE64(base64buffer >> (base64bits-6));
2449 base64bits -= 6;
2450 }
2451 }
2452 if (base64bits)
2453 *out++= TO_BASE64(base64buffer << (6-base64bits) );
2454 if (inShift)
2455 *out++ = '-';
2456 if (_PyBytes_Resize(&v, out - start) < 0)
2457 return NULL((void *)0);
2458 return v;
2459}
2460
2461#undef IS_BASE64
2462#undef FROM_BASE64
2463#undef TO_BASE64
2464#undef DECODE_DIRECT
2465#undef ENCODE_DIRECT
2466
2467/* --- UTF-8 Codec -------------------------------------------------------- */
2468
2469static
2470char utf8_code_length[256] = {
2471 /* Map UTF-8 encoded prefix byte to sequence length. Zero means
2472 illegal prefix. See RFC 3629 for details */
2473 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */
2474 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2476 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */
2481 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */
2482 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2483 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2484 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */
2485 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */
2486 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */
2487 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */
2488 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */
2489};
2490
2491PyObject *PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(const char *s,
2492 Py_ssize_t size,
2493 const char *errors)
2494{
2495 return PyUnicode_DecodeUTF8StatefulPyUnicodeUCS2_DecodeUTF8Stateful(s, size, errors, NULL((void *)0));
2496}
2497
2498/* Mask to check or force alignment of a pointer to C 'long' boundaries */
2499#define LONG_PTR_MASK(size_t) (8 - 1) (size_t) (SIZEOF_LONG8 - 1)
2500
2501/* Mask to quickly check whether a C 'long' contains a
2502 non-ASCII, UTF8-encoded char. */
2503#if (SIZEOF_LONG8 == 8)
2504# define ASCII_CHAR_MASK 0x8080808080808080L
2505#elif (SIZEOF_LONG8 == 4)
2506# define ASCII_CHAR_MASK 0x80808080L
2507#else
2508# error C 'long' size should be either 4 or 8!
2509#endif
2510
2511PyObject *PyUnicode_DecodeUTF8StatefulPyUnicodeUCS2_DecodeUTF8Stateful(const char *s,
2512 Py_ssize_t size,
2513 const char *errors,
2514 Py_ssize_t *consumed)
2515{
2516 const char *starts = s;
2517 int n;
2518 int k;
2519 Py_ssize_t startinpos;
2520 Py_ssize_t endinpos;
2521 Py_ssize_t outpos;
2522 const char *e, *aligned_end;
2523 PyUnicodeObject *unicode;
2524 Py_UNICODE *p;
2525 const char *errmsg = "";
2526 PyObject *errorHandler = NULL((void *)0);
2527 PyObject *exc = NULL((void *)0);
2528
2529 /* Note: size will always be longer than the resulting Unicode
2530 character count */
2531 unicode = _PyUnicode_New(size);
2532 if (!unicode)
2533 return NULL((void *)0);
2534 if (size == 0) {
2535 if (consumed)
2536 *consumed = 0;
2537 return (PyObject *)unicode;
2538 }
2539
2540 /* Unpack UTF-8 encoded data */
2541 p = unicode->str;
2542 e = s + size;
2543 aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK(size_t) (8 - 1));
2544
2545 while (s < e) {
2546 Py_UCS4 ch = (unsigned char)*s;
2547
2548 if (ch < 0x80) {
2549 /* Fast path for runs of ASCII characters. Given that common UTF-8
2550 input will consist of an overwhelming majority of ASCII
2551 characters, we try to optimize for this case by checking
2552 as many characters as a C 'long' can contain.
2553 First, check if we can do an aligned read, as most CPUs have
2554 a penalty for unaligned reads.
2555 */
2556 if (!((size_t) s & LONG_PTR_MASK(size_t) (8 - 1))) {
2557 /* Help register allocation */
2558 register const char *_s = s;
2559 register Py_UNICODE *_p = p;
2560 while (_s < aligned_end) {
2561 /* Read a whole long at a time (either 4 or 8 bytes),
2562 and do a fast unrolled copy if it only contains ASCII
2563 characters. */
2564 unsigned long data = *(unsigned long *) _s;
2565 if (data & ASCII_CHAR_MASK)
2566 break;
2567 _p[0] = (unsigned char) _s[0];
2568 _p[1] = (unsigned char) _s[1];
2569 _p[2] = (unsigned char) _s[2];
2570 _p[3] = (unsigned char) _s[3];
2571#if (SIZEOF_LONG8 == 8)
2572 _p[4] = (unsigned char) _s[4];
2573 _p[5] = (unsigned char) _s[5];
2574 _p[6] = (unsigned char) _s[6];
2575 _p[7] = (unsigned char) _s[7];
2576#endif
2577 _s += SIZEOF_LONG8;
2578 _p += SIZEOF_LONG8;
2579 }
2580 s = _s;
2581 p = _p;
2582 if (s == e)
2583 break;
2584 ch = (unsigned char)*s;
2585 }
2586 }
2587
2588 if (ch < 0x80) {
2589 *p++ = (Py_UNICODE)ch;
2590 s++;
2591 continue;
2592 }
2593
2594 n = utf8_code_length[ch];
2595
2596 if (s + n > e) {
2597 if (consumed)
2598 break;
2599 else {
2600 errmsg = "unexpected end of data";
2601 startinpos = s-starts;
2602 endinpos = startinpos+1;
2603 for (k=1; (k < size-startinpos) && ((s[k]&0xC0) == 0x80); k++)
2604 endinpos++;
2605 goto utf8Error;
2606 }
2607 }
2608
2609 switch (n) {
2610
2611 case 0:
2612 errmsg = "invalid start byte";
2613 startinpos = s-starts;
2614 endinpos = startinpos+1;
2615 goto utf8Error;
2616
2617 case 1:
2618 errmsg = "internal error";
2619 startinpos = s-starts;
2620 endinpos = startinpos+1;
2621 goto utf8Error;
2622
2623 case 2:
2624 if ((s[1] & 0xc0) != 0x80) {
2625 errmsg = "invalid continuation byte";
2626 startinpos = s-starts;
2627 endinpos = startinpos + 1;
2628 goto utf8Error;
2629 }
2630 ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
2631 assert ((ch > 0x007F) && (ch <= 0x07FF))(__builtin_expect(!((ch > 0x007F) && (ch <= 0x07FF
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2631
, "(ch > 0x007F) && (ch <= 0x07FF)") : (void)0)
;
2632 *p++ = (Py_UNICODE)ch;
2633 break;
2634
2635 case 3:
2636 /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf
2637 will result in surrogates in range d800-dfff. Surrogates are
2638 not valid UTF-8 so they are rejected.
2639 See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
2640 (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
2641 if ((s[1] & 0xc0) != 0x80 ||
2642 (s[2] & 0xc0) != 0x80 ||
2643 ((unsigned char)s[0] == 0xE0 &&
2644 (unsigned char)s[1] < 0xA0) ||
2645 ((unsigned char)s[0] == 0xED &&
2646 (unsigned char)s[1] > 0x9F)) {
2647 errmsg = "invalid continuation byte";
2648 startinpos = s-starts;
2649 endinpos = startinpos + 1;
2650
2651 /* if s[1] first two bits are 1 and 0, then the invalid
2652 continuation byte is s[2], so increment endinpos by 1,
2653 if not, s[1] is invalid and endinpos doesn't need to
2654 be incremented. */
2655 if ((s[1] & 0xC0) == 0x80)
2656 endinpos++;
2657 goto utf8Error;
2658 }
2659 ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
2660 assert ((ch > 0x07FF) && (ch <= 0xFFFF))(__builtin_expect(!((ch > 0x07FF) && (ch <= 0xFFFF
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2660
, "(ch > 0x07FF) && (ch <= 0xFFFF)") : (void)0)
;
2661 *p++ = (Py_UNICODE)ch;
2662 break;
2663
2664 case 4:
2665 if ((s[1] & 0xc0) != 0x80 ||
2666 (s[2] & 0xc0) != 0x80 ||
2667 (s[3] & 0xc0) != 0x80 ||
2668 ((unsigned char)s[0] == 0xF0 &&
2669 (unsigned char)s[1] < 0x90) ||
2670 ((unsigned char)s[0] == 0xF4 &&
2671 (unsigned char)s[1] > 0x8F)) {
2672 errmsg = "invalid continuation byte";
2673 startinpos = s-starts;
2674 endinpos = startinpos + 1;
2675 if ((s[1] & 0xC0) == 0x80) {
2676 endinpos++;
2677 if ((s[2] & 0xC0) == 0x80)
2678 endinpos++;
2679 }
2680 goto utf8Error;
2681 }
2682 ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
2683 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
2684 assert ((ch > 0xFFFF) && (ch <= 0x10ffff))(__builtin_expect(!((ch > 0xFFFF) && (ch <= 0x10ffff
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2684
, "(ch > 0xFFFF) && (ch <= 0x10ffff)") : (void)
0)
;
2685
2686#ifdef Py_UNICODE_WIDE
2687 *p++ = (Py_UNICODE)ch;
2688#else
2689 /* compute and append the two surrogates: */
2690
2691 /* translate from 10000..10FFFF to 0..FFFF */
2692 ch -= 0x10000;
2693
2694 /* high surrogate = top 10 bits added to D800 */
2695 *p++ = (Py_UNICODE)(0xD800 + (ch >> 10));
2696
2697 /* low surrogate = bottom 10 bits added to DC00 */
2698 *p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
2699#endif
2700 break;
2701 }
2702 s += n;
2703 continue;
2704
2705 utf8Error:
2706 outpos = p-PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2706, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
2707 if (unicode_decode_call_errorhandler(
2708 errors, &errorHandler,
2709 "utf8", errmsg,
2710 &starts, &e, &startinpos, &endinpos, &exc, &s,
2711 &unicode, &outpos, &p))
2712 goto onError;
2713 aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK(size_t) (8 - 1));
2714 }
2715 if (consumed)
2716 *consumed = s-starts;
2717
2718 /* Adjust length */
2719 if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
2720 goto onError;
2721
2722 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2722, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
2723 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 2723, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
2724 return (PyObject *)unicode;
2725
2726 onError:
2727 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2727, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
2728 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 2728, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
2729 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2729, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
2730 return NULL((void *)0);
2731}
2732
2733#undef ASCII_CHAR_MASK
2734
2735#ifdef __APPLE__1
2736
2737/* Simplified UTF-8 decoder using surrogateescape error handler,
2738 used to decode the command line arguments on Mac OS X. */
2739
2740wchar_t*
2741_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
2742{
2743 int n;
2744 const char *e;
2745 wchar_t *unicode, *p;
2746
2747 /* Note: size will always be longer than the resulting Unicode
2748 character count */
2749 if (PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) / sizeof(wchar_t) < (size + 1)) {
2750 PyErr_NoMemory();
2751 return NULL((void *)0);
2752 }
2753 unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
2754 if (!unicode)
2755 return NULL((void *)0);
2756
2757 /* Unpack UTF-8 encoded data */
2758 p = unicode;
2759 e = s + size;
2760 while (s < e) {
2761 Py_UCS4 ch = (unsigned char)*s;
2762
2763 if (ch < 0x80) {
2764 *p++ = (wchar_t)ch;
2765 s++;
2766 continue;
2767 }
2768
2769 n = utf8_code_length[ch];
2770 if (s + n > e) {
2771 goto surrogateescape;
2772 }
2773
2774 switch (n) {
2775 case 0:
2776 case 1:
2777 goto surrogateescape;
2778
2779 case 2:
2780 if ((s[1] & 0xc0) != 0x80)
2781 goto surrogateescape;
2782 ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
2783 assert ((ch > 0x007F) && (ch <= 0x07FF))(__builtin_expect(!((ch > 0x007F) && (ch <= 0x07FF
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2783
, "(ch > 0x007F) && (ch <= 0x07FF)") : (void)0)
;
2784 *p++ = (wchar_t)ch;
2785 break;
2786
2787 case 3:
2788 /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf
2789 will result in surrogates in range d800-dfff. Surrogates are
2790 not valid UTF-8 so they are rejected.
2791 See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
2792 (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
2793 if ((s[1] & 0xc0) != 0x80 ||
2794 (s[2] & 0xc0) != 0x80 ||
2795 ((unsigned char)s[0] == 0xE0 &&
2796 (unsigned char)s[1] < 0xA0) ||
2797 ((unsigned char)s[0] == 0xED &&
2798 (unsigned char)s[1] > 0x9F)) {
2799
2800 goto surrogateescape;
2801 }
2802 ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
2803 assert ((ch > 0x07FF) && (ch <= 0xFFFF))(__builtin_expect(!((ch > 0x07FF) && (ch <= 0xFFFF
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2803
, "(ch > 0x07FF) && (ch <= 0xFFFF)") : (void)0)
;
2804 *p++ = (Py_UNICODE)ch;
2805 break;
2806
2807 case 4:
2808 if ((s[1] & 0xc0) != 0x80 ||
2809 (s[2] & 0xc0) != 0x80 ||
2810 (s[3] & 0xc0) != 0x80 ||
2811 ((unsigned char)s[0] == 0xF0 &&
2812 (unsigned char)s[1] < 0x90) ||
2813 ((unsigned char)s[0] == 0xF4 &&
2814 (unsigned char)s[1] > 0x8F)) {
2815 goto surrogateescape;
2816 }
2817 ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
2818 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
2819 assert ((ch > 0xFFFF) && (ch <= 0x10ffff))(__builtin_expect(!((ch > 0xFFFF) && (ch <= 0x10ffff
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 2819
, "(ch > 0xFFFF) && (ch <= 0x10ffff)") : (void)
0)
;
2820
2821#if SIZEOF_WCHAR_T4 == 4
2822 *p++ = (wchar_t)ch;
2823#else
2824 /* compute and append the two surrogates: */
2825
2826 /* translate from 10000..10FFFF to 0..FFFF */
2827 ch -= 0x10000;
2828
2829 /* high surrogate = top 10 bits added to D800 */
2830 *p++ = (wchar_t)(0xD800 + (ch >> 10));
2831
2832 /* low surrogate = bottom 10 bits added to DC00 */
2833 *p++ = (wchar_t)(0xDC00 + (ch & 0x03FF));
2834#endif
2835 break;
2836 }
2837 s += n;
2838 continue;
2839
2840 surrogateescape:
2841 *p++ = 0xDC00 + ch;
2842 s++;
2843 }
2844 *p = L'\0';
2845 return unicode;
2846}
2847
2848#endif /* __APPLE__ */
2849
2850/* Allocation strategy: if the string is short, convert into a stack buffer
2851 and allocate exactly as much space needed at the end. Else allocate the
2852 maximum possible needed (4 result bytes per Unicode character), and return
2853 the excess memory at the end.
2854*/
2855PyObject *
2856PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(const Py_UNICODE *s,
2857 Py_ssize_t size,
2858 const char *errors)
2859{
2860#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
2861
2862 Py_ssize_t i; /* index into s of next input byte */
2863 PyObject *result; /* result string object */
2864 char *p; /* next free byte in output buffer */
2865 Py_ssize_t nallocated; /* number of result bytes allocated */
2866 Py_ssize_t nneeded; /* number of result bytes needed */
2867 char stackbuf[MAX_SHORT_UNICHARS * 4];
2868 PyObject *errorHandler = NULL((void *)0);
2869 PyObject *exc = NULL((void *)0);
2870
2871 assert(s != NULL)(__builtin_expect(!(s != ((void *)0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2871, "s != NULL") : (void)0)
;
2872 assert(size >= 0)(__builtin_expect(!(size >= 0), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2872, "size >= 0") : (void)0)
;
2873
2874 if (size <= MAX_SHORT_UNICHARS) {
2875 /* Write into the stack buffer; nallocated can't overflow.
2876 * At the end, we'll allocate exactly as much heap space as it
2877 * turns out we need.
2878 */
2879 nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int)((__builtin_expect(!((size_t)(int)(sizeof(stackbuf)) == (sizeof
(stackbuf))), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 2879, "(size_t)(int)(sizeof(stackbuf)) == (sizeof(stackbuf))"
) : (void)0), (int)(sizeof(stackbuf)))
;
2880 result = NULL((void *)0); /* will allocate after we're done */
2881 p = stackbuf;
2882 }
2883 else {
2884 /* Overallocate on the heap, and give the excess back at the end. */
2885 nallocated = size * 4;
2886 if (nallocated / 4 != size) /* overflow! */
2887 return PyErr_NoMemory();
2888 result = PyBytes_FromStringAndSize(NULL((void *)0), nallocated);
2889 if (result == NULL((void *)0))
2890 return NULL((void *)0);
2891 p = PyBytes_AS_STRING(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2891, "PyBytes_Check(result)") :
(void)0), (((PyBytesObject *)(result))->ob_sval))
;
2892 }
2893
2894 for (i = 0; i < size;) {
2895 Py_UCS4 ch = s[i++];
2896
2897 if (ch < 0x80)
2898 /* Encode ASCII */
2899 *p++ = (char) ch;
2900
2901 else if (ch < 0x0800) {
2902 /* Encode Latin-1 */
2903 *p++ = (char)(0xc0 | (ch >> 6));
2904 *p++ = (char)(0x80 | (ch & 0x3f));
2905 } else if (0xD800 <= ch && ch <= 0xDFFF) {
2906#ifndef Py_UNICODE_WIDE
2907 /* Special case: check for high and low surrogate */
2908 if (ch <= 0xDBFF && i != size && 0xDC00 <= s[i] && s[i] <= 0xDFFF) {
2909 Py_UCS4 ch2 = s[i];
2910 /* Combine the two surrogates to form a UCS4 value */
2911 ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000;
2912 i++;
2913
2914 /* Encode UCS4 Unicode ordinals */
2915 *p++ = (char)(0xf0 | (ch >> 18));
2916 *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
2917 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
2918 *p++ = (char)(0x80 | (ch & 0x3f));
2919 } else {
2920#endif
2921 Py_ssize_t newpos;
2922 PyObject *rep;
2923 Py_ssize_t repsize, k;
2924 rep = unicode_encode_call_errorhandler
2925 (errors, &errorHandler, "utf-8", "surrogates not allowed",
2926 s, size, &exc, i-1, i, &newpos);
2927 if (!rep)
2928 goto error;
2929
2930 if (PyBytes_Check(rep)((((((PyObject*)(rep))->ob_type))->tp_flags & ((1L<<
27))) != 0)
)
2931 repsize = PyBytes_GET_SIZE(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2931, "PyBytes_Check(rep)") : (void
)0),(((PyVarObject*)(rep))->ob_size))
;
2932 else
2933 repsize = PyUnicode_GET_SIZE(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2933, "PyUnicode_Check(rep)") : (
void)0),(((PyUnicodeObject *)(rep))->length))
;
2934
2935 if (repsize > 4) {
2936 Py_ssize_t offset;
2937
2938 if (result == NULL((void *)0))
2939 offset = p - stackbuf;
2940 else
2941 offset = p - PyBytes_AS_STRING(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2941, "PyBytes_Check(result)") :
(void)0), (((PyBytesObject *)(result))->ob_sval))
;
2942
2943 if (nallocated > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - repsize + 4) {
2944 /* integer overflow */
2945 PyErr_NoMemory();
2946 goto error;
2947 }
2948 nallocated += repsize - 4;
2949 if (result != NULL((void *)0)) {
2950 if (_PyBytes_Resize(&result, nallocated) < 0)
2951 goto error;
2952 } else {
2953 result = PyBytes_FromStringAndSize(NULL((void *)0), nallocated);
2954 if (result == NULL((void *)0))
2955 goto error;
2956 Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset)((__builtin_object_size (((__builtin_expect(!(((((((PyObject*
)(result))->ob_type))->tp_flags & ((1L<<27)))
!= 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 2956, "PyBytes_Check(result)") : (void)0), (((PyBytesObject
*)(result))->ob_sval)), 0) != (size_t) -1) ? __builtin___memcpy_chk
(((__builtin_expect(!(((((((PyObject*)(result))->ob_type)
)->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2956, "PyBytes_Check(result)"
) : (void)0), (((PyBytesObject *)(result))->ob_sval)), stackbuf
, offset, __builtin_object_size (((__builtin_expect(!(((((((PyObject
*)(result))->ob_type))->tp_flags & ((1L<<27))
) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 2956, "PyBytes_Check(result)") : (void)0), (((PyBytesObject
*)(result))->ob_sval)), 0)) : __inline_memcpy_chk (((__builtin_expect
(!(((((((PyObject*)(result))->ob_type))->tp_flags &
((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 2956, "PyBytes_Check(result)") : (void)0), (((PyBytesObject
*)(result))->ob_sval)), stackbuf, offset))
;
2957 }
2958 p = PyBytes_AS_STRING(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2958, "PyBytes_Check(result)") :
(void)0), (((PyBytesObject *)(result))->ob_sval))
+ offset;
2959 }
2960
2961 if (PyBytes_Check(rep)((((((PyObject*)(rep))->ob_type))->tp_flags & ((1L<<
27))) != 0)
) {
2962 char *prep = PyBytes_AS_STRING(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2962, "PyBytes_Check(rep)") : (void
)0), (((PyBytesObject *)(rep))->ob_sval))
;
2963 for(k = repsize; k > 0; k--)
2964 *p++ = *prep++;
2965 } else /* rep is unicode */ {
2966 Py_UNICODE *prep = PyUnicode_AS_UNICODE(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 2966, "PyUnicode_Check(rep)") : (
void)0),(((PyUnicodeObject *)(rep))->str))
;
2967 Py_UNICODE c;
2968
2969 for(k=0; k<repsize; k++) {
2970 c = prep[k];
2971 if (0x80 <= c) {
2972 raise_encode_exception(&exc, "utf-8", s, size,
2973 i-1, i, "surrogates not allowed");
2974 goto error;
2975 }
2976 *p++ = (char)prep[k];
2977 }
2978 }
2979 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 2979, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
2980#ifndef Py_UNICODE_WIDE
2981 }
2982#endif
2983 } else if (ch < 0x10000) {
2984 *p++ = (char)(0xe0 | (ch >> 12));
2985 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
2986 *p++ = (char)(0x80 | (ch & 0x3f));
2987 } else /* ch >= 0x10000 */ {
2988 /* Encode UCS4 Unicode ordinals */
2989 *p++ = (char)(0xf0 | (ch >> 18));
2990 *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
2991 *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
2992 *p++ = (char)(0x80 | (ch & 0x3f));
2993 }
2994 }
2995
2996 if (result == NULL((void *)0)) {
2997 /* This was stack allocated. */
2998 nneeded = p - stackbuf;
2999 assert(nneeded <= nallocated)(__builtin_expect(!(nneeded <= nallocated), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 2999, "nneeded <= nallocated"
) : (void)0)
;
3000 result = PyBytes_FromStringAndSize(stackbuf, nneeded);
3001 }
3002 else {
3003 /* Cut back to size actually needed. */
3004 nneeded = p - PyBytes_AS_STRING(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 3004, "PyBytes_Check(result)") :
(void)0), (((PyBytesObject *)(result))->ob_sval))
;
3005 assert(nneeded <= nallocated)(__builtin_expect(!(nneeded <= nallocated), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3005, "nneeded <= nallocated"
) : (void)0)
;
3006 _PyBytes_Resize(&result, nneeded);
3007 }
3008 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3008, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3009 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3009, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3010 return result;
3011 error:
3012 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3012, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3013 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3013, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3014 Py_XDECREF(result)do { if ((result) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(result))->ob_refcnt != 0) { if (((PyObject
*)result)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3014, (PyObject *)(result)); } else _Py_Dealloc((PyObject *
)(result)); } while (0); } while (0)
;
3015 return NULL((void *)0);
3016
3017#undef MAX_SHORT_UNICHARS
3018}
3019
3020PyObject *PyUnicode_AsUTF8StringPyUnicodeUCS2_AsUTF8String(PyObject *unicode)
3021{
3022 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
3023 PyErr_BadArgument();
3024 return NULL((void *)0);
3025 }
3026 return PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3026, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
3027 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3027, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
3028 NULL((void *)0));
3029}
3030
3031/* --- UTF-32 Codec ------------------------------------------------------- */
3032
3033PyObject *
3034PyUnicode_DecodeUTF32PyUnicodeUCS2_DecodeUTF32(const char *s,
3035 Py_ssize_t size,
3036 const char *errors,
3037 int *byteorder)
3038{
3039 return PyUnicode_DecodeUTF32StatefulPyUnicodeUCS2_DecodeUTF32Stateful(s, size, errors, byteorder, NULL((void *)0));
3040}
3041
3042PyObject *
3043PyUnicode_DecodeUTF32StatefulPyUnicodeUCS2_DecodeUTF32Stateful(const char *s,
3044 Py_ssize_t size,
3045 const char *errors,
3046 int *byteorder,
3047 Py_ssize_t *consumed)
3048{
3049 const char *starts = s;
3050 Py_ssize_t startinpos;
3051 Py_ssize_t endinpos;
3052 Py_ssize_t outpos;
3053 PyUnicodeObject *unicode;
3054 Py_UNICODE *p;
3055#ifndef Py_UNICODE_WIDE
3056 int pairs = 0;
3057 const unsigned char *qq;
3058#else
3059 const int pairs = 0;
3060#endif
3061 const unsigned char *q, *e;
3062 int bo = 0; /* assume native ordering by default */
3063 const char *errmsg = "";
3064 /* Offsets from q for retrieving bytes in the right order. */
3065#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3066 int iorder[] = {0, 1, 2, 3};
3067#else
3068 int iorder[] = {3, 2, 1, 0};
3069#endif
3070 PyObject *errorHandler = NULL((void *)0);
3071 PyObject *exc = NULL((void *)0);
3072
3073 q = (unsigned char *)s;
3074 e = q + size;
3075
3076 if (byteorder)
3077 bo = *byteorder;
3078
3079 /* Check for BOM marks (U+FEFF) in the input and adjust current
3080 byte order setting accordingly. In native mode, the leading BOM
3081 mark is skipped, in all other modes, it is copied to the output
3082 stream as-is (giving a ZWNBSP character). */
3083 if (bo == 0) {
3084 if (size >= 4) {
3085 const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
3086 (q[iorder[1]] << 8) | q[iorder[0]];
3087#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3088 if (bom == 0x0000FEFF) {
3089 q += 4;
3090 bo = -1;
3091 }
3092 else if (bom == 0xFFFE0000) {
3093 q += 4;
3094 bo = 1;
3095 }
3096#else
3097 if (bom == 0x0000FEFF) {
3098 q += 4;
3099 bo = 1;
3100 }
3101 else if (bom == 0xFFFE0000) {
3102 q += 4;
3103 bo = -1;
3104 }
3105#endif
3106 }
3107 }
3108
3109 if (bo == -1) {
3110 /* force LE */
3111 iorder[0] = 0;
3112 iorder[1] = 1;
3113 iorder[2] = 2;
3114 iorder[3] = 3;
3115 }
3116 else if (bo == 1) {
3117 /* force BE */
3118 iorder[0] = 3;
3119 iorder[1] = 2;
3120 iorder[2] = 1;
3121 iorder[3] = 0;
3122 }
3123
3124 /* On narrow builds we split characters outside the BMP into two
3125 codepoints => count how much extra space we need. */
3126#ifndef Py_UNICODE_WIDE
3127 for (qq = q; qq < e; qq += 4)
3128 if (qq[iorder[2]] != 0 || qq[iorder[3]] != 0)
3129 pairs++;
3130#endif
3131
3132 /* This might be one to much, because of a BOM */
3133 unicode = _PyUnicode_New((size+3)/4+pairs);
3134 if (!unicode)
3135 return NULL((void *)0);
3136 if (size == 0)
3137 return (PyObject *)unicode;
3138
3139 /* Unpack UTF-32 encoded data */
3140 p = unicode->str;
3141
3142 while (q < e) {
3143 Py_UCS4 ch;
3144 /* remaining bytes at the end? (size should be divisible by 4) */
3145 if (e-q<4) {
3146 if (consumed)
3147 break;
3148 errmsg = "truncated data";
3149 startinpos = ((const char *)q)-starts;
3150 endinpos = ((const char *)e)-starts;
3151 goto utf32Error;
3152 /* The remaining input chars are ignored if the callback
3153 chooses to skip the input */
3154 }
3155 ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
3156 (q[iorder[1]] << 8) | q[iorder[0]];
3157
3158 if (ch >= 0x110000)
3159 {
3160 errmsg = "codepoint not in range(0x110000)";
3161 startinpos = ((const char *)q)-starts;
3162 endinpos = startinpos+4;
3163 goto utf32Error;
3164 }
3165#ifndef Py_UNICODE_WIDE
3166 if (ch >= 0x10000)
3167 {
3168 *p++ = 0xD800 | ((ch-0x10000) >> 10);
3169 *p++ = 0xDC00 | ((ch-0x10000) & 0x3FF);
3170 }
3171 else
3172#endif
3173 *p++ = ch;
3174 q += 4;
3175 continue;
3176 utf32Error:
3177 outpos = p-PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3177, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
3178 if (unicode_decode_call_errorhandler(
3179 errors, &errorHandler,
3180 "utf32", errmsg,
3181 &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
3182 &unicode, &outpos, &p))
3183 goto onError;
3184 }
3185
3186 if (byteorder)
3187 *byteorder = bo;
3188
3189 if (consumed)
3190 *consumed = (const char *)q-starts;
3191
3192 /* Adjust length */
3193 if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
3194 goto onError;
3195
3196 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3196, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3197 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3197, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3198 return (PyObject *)unicode;
3199
3200 onError:
3201 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3201, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
3202 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3202, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3203 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3203, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3204 return NULL((void *)0);
3205}
3206
3207PyObject *
3208PyUnicode_EncodeUTF32PyUnicodeUCS2_EncodeUTF32(const Py_UNICODE *s,
3209 Py_ssize_t size,
3210 const char *errors,
3211 int byteorder)
3212{
3213 PyObject *v;
3214 unsigned char *p;
3215 Py_ssize_t nsize, bytesize;
3216#ifndef Py_UNICODE_WIDE
3217 Py_ssize_t i, pairs;
3218#else
3219 const int pairs = 0;
3220#endif
3221 /* Offsets from p for storing byte pairs in the right order. */
3222#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3223 int iorder[] = {0, 1, 2, 3};
3224#else
3225 int iorder[] = {3, 2, 1, 0};
3226#endif
3227
3228#define STORECHAR(CH) \
3229 do { \
3230 p[iorder[3]] = ((CH) >> 24) & 0xff; \
3231 p[iorder[2]] = ((CH) >> 16) & 0xff; \
3232 p[iorder[1]] = ((CH) >> 8) & 0xff; \
3233 p[iorder[0]] = (CH) & 0xff; \
3234 p += 4; \
3235 } while(0)
3236
3237 /* In narrow builds we can output surrogate pairs as one codepoint,
3238 so we need less space. */
3239#ifndef Py_UNICODE_WIDE
3240 for (i = pairs = 0; i < size-1; i++)
3241 if (0xD800 <= s[i] && s[i] <= 0xDBFF &&
3242 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF)
3243 pairs++;
3244#endif
3245 nsize = (size - pairs + (byteorder == 0));
3246 bytesize = nsize * 4;
3247 if (bytesize / 4 != nsize)
3248 return PyErr_NoMemory();
3249 v = PyBytes_FromStringAndSize(NULL((void *)0), bytesize);
3250 if (v == NULL((void *)0))
3251 return NULL((void *)0);
3252
3253 p = (unsigned char *)PyBytes_AS_STRING(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3253, "PyBytes_Check(v)") : (void)0), (((PyBytesObject *)(v
))->ob_sval))
;
3254 if (byteorder == 0)
3255 STORECHAR(0xFEFF);
3256 if (size == 0)
3257 goto done;
3258
3259 if (byteorder == -1) {
3260 /* force LE */
3261 iorder[0] = 0;
3262 iorder[1] = 1;
3263 iorder[2] = 2;
3264 iorder[3] = 3;
3265 }
3266 else if (byteorder == 1) {
3267 /* force BE */
3268 iorder[0] = 3;
3269 iorder[1] = 2;
3270 iorder[2] = 1;
3271 iorder[3] = 0;
3272 }
3273
3274 while (size-- > 0) {
3275 Py_UCS4 ch = *s++;
3276#ifndef Py_UNICODE_WIDE
3277 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
3278 Py_UCS4 ch2 = *s;
3279 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
3280 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
3281 s++;
3282 size--;
3283 }
3284 }
3285#endif
3286 STORECHAR(ch);
3287 }
3288
3289 done:
3290 return v;
3291#undef STORECHAR
3292}
3293
3294PyObject *PyUnicode_AsUTF32StringPyUnicodeUCS2_AsUTF32String(PyObject *unicode)
3295{
3296 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
3297 PyErr_BadArgument();
3298 return NULL((void *)0);
3299 }
3300 return PyUnicode_EncodeUTF32PyUnicodeUCS2_EncodeUTF32(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3300, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
3301 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3301, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
3302 NULL((void *)0),
3303 0);
3304}
3305
3306/* --- UTF-16 Codec ------------------------------------------------------- */
3307
3308PyObject *
3309PyUnicode_DecodeUTF16PyUnicodeUCS2_DecodeUTF16(const char *s,
3310 Py_ssize_t size,
3311 const char *errors,
3312 int *byteorder)
3313{
3314 return PyUnicode_DecodeUTF16StatefulPyUnicodeUCS2_DecodeUTF16Stateful(s, size, errors, byteorder, NULL((void *)0));
3315}
3316
3317/* Two masks for fast checking of whether a C 'long' may contain
3318 UTF16-encoded surrogate characters. This is an efficient heuristic,
3319 assuming that non-surrogate characters with a code point >= 0x8000 are
3320 rare in most input.
3321 FAST_CHAR_MASK is used when the input is in native byte ordering,
3322 SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
3323*/
3324#if (SIZEOF_LONG8 == 8)
3325# define FAST_CHAR_MASK 0x8000800080008000L
3326# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
3327#elif (SIZEOF_LONG8 == 4)
3328# define FAST_CHAR_MASK 0x80008000L
3329# define SWAPPED_FAST_CHAR_MASK 0x00800080L
3330#else
3331# error C 'long' size should be either 4 or 8!
3332#endif
3333
3334PyObject *
3335PyUnicode_DecodeUTF16StatefulPyUnicodeUCS2_DecodeUTF16Stateful(const char *s,
3336 Py_ssize_t size,
3337 const char *errors,
3338 int *byteorder,
3339 Py_ssize_t *consumed)
3340{
3341 const char *starts = s;
3342 Py_ssize_t startinpos;
3343 Py_ssize_t endinpos;
3344 Py_ssize_t outpos;
3345 PyUnicodeObject *unicode;
3346 Py_UNICODE *p;
3347 const unsigned char *q, *e, *aligned_end;
3348 int bo = 0; /* assume native ordering by default */
3349 int native_ordering = 0;
3350 const char *errmsg = "";
3351 /* Offsets from q for retrieving byte pairs in the right order. */
3352#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3353 int ihi = 1, ilo = 0;
3354#else
3355 int ihi = 0, ilo = 1;
3356#endif
3357 PyObject *errorHandler = NULL((void *)0);
3358 PyObject *exc = NULL((void *)0);
3359
3360 /* Note: size will always be longer than the resulting Unicode
3361 character count */
3362 unicode = _PyUnicode_New(size);
3363 if (!unicode)
3364 return NULL((void *)0);
3365 if (size == 0)
3366 return (PyObject *)unicode;
3367
3368 /* Unpack UTF-16 encoded data */
3369 p = unicode->str;
3370 q = (unsigned char *)s;
3371 e = q + size - 1;
3372
3373 if (byteorder)
3374 bo = *byteorder;
3375
3376 /* Check for BOM marks (U+FEFF) in the input and adjust current
3377 byte order setting accordingly. In native mode, the leading BOM
3378 mark is skipped, in all other modes, it is copied to the output
3379 stream as-is (giving a ZWNBSP character). */
3380 if (bo == 0) {
3381 if (size >= 2) {
3382 const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
3383#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3384 if (bom == 0xFEFF) {
3385 q += 2;
3386 bo = -1;
3387 }
3388 else if (bom == 0xFFFE) {
3389 q += 2;
3390 bo = 1;
3391 }
3392#else
3393 if (bom == 0xFEFF) {
3394 q += 2;
3395 bo = 1;
3396 }
3397 else if (bom == 0xFFFE) {
3398 q += 2;
3399 bo = -1;
3400 }
3401#endif
3402 }
3403 }
3404
3405 if (bo == -1) {
3406 /* force LE */
3407 ihi = 1;
3408 ilo = 0;
3409 }
3410 else if (bo == 1) {
3411 /* force BE */
3412 ihi = 0;
3413 ilo = 1;
3414 }
3415#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3416 native_ordering = ilo < ihi;
3417#else
3418 native_ordering = ilo > ihi;
3419#endif
3420
3421 aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK(size_t) (8 - 1));
3422 while (q < e) {
3423 Py_UNICODE ch;
3424 /* First check for possible aligned read of a C 'long'. Unaligned
3425 reads are more expensive, better to defer to another iteration. */
3426 if (!((size_t) q & LONG_PTR_MASK(size_t) (8 - 1))) {
3427 /* Fast path for runs of non-surrogate chars. */
3428 register const unsigned char *_q = q;
3429 Py_UNICODE *_p = p;
3430 if (native_ordering) {
3431 /* Native ordering is simple: as long as the input cannot
3432 possibly contain a surrogate char, do an unrolled copy
3433 of several 16-bit code points to the target object.
3434 The non-surrogate check is done on several input bytes
3435 at a time (as many as a C 'long' can contain). */
3436 while (_q < aligned_end) {
3437 unsigned long data = * (unsigned long *) _q;
3438 if (data & FAST_CHAR_MASK)
3439 break;
3440 _p[0] = ((unsigned short *) _q)[0];
3441 _p[1] = ((unsigned short *) _q)[1];
3442#if (SIZEOF_LONG8 == 8)
3443 _p[2] = ((unsigned short *) _q)[2];
3444 _p[3] = ((unsigned short *) _q)[3];
3445#endif
3446 _q += SIZEOF_LONG8;
3447 _p += SIZEOF_LONG8 / 2;
3448 }
3449 }
3450 else {
3451 /* Byteswapped ordering is similar, but we must decompose
3452 the copy bytewise, and take care of zero'ing out the
3453 upper bytes if the target object is in 32-bit units
3454 (that is, in UCS-4 builds). */
3455 while (_q < aligned_end) {
3456 unsigned long data = * (unsigned long *) _q;
3457 if (data & SWAPPED_FAST_CHAR_MASK)
3458 break;
3459 /* Zero upper bytes in UCS-4 builds */
3460#if (Py_UNICODE_SIZE2 > 2)
3461 _p[0] = 0;
3462 _p[1] = 0;
3463#if (SIZEOF_LONG8 == 8)
3464 _p[2] = 0;
3465 _p[3] = 0;
3466#endif
3467#endif
3468 /* Issue #4916; UCS-4 builds on big endian machines must
3469 fill the two last bytes of each 4-byte unit. */
3470#if (!defined(BYTEORDER_IS_LITTLE_ENDIAN) && Py_UNICODE_SIZE2 > 2)
3471# define OFF 2
3472#else
3473# define OFF 0
3474#endif
3475 ((unsigned char *) _p)[OFF + 1] = _q[0];
3476 ((unsigned char *) _p)[OFF + 0] = _q[1];
3477 ((unsigned char *) _p)[OFF + 1 + Py_UNICODE_SIZE2] = _q[2];
3478 ((unsigned char *) _p)[OFF + 0 + Py_UNICODE_SIZE2] = _q[3];
3479#if (SIZEOF_LONG8 == 8)
3480 ((unsigned char *) _p)[OFF + 1 + 2 * Py_UNICODE_SIZE2] = _q[4];
3481 ((unsigned char *) _p)[OFF + 0 + 2 * Py_UNICODE_SIZE2] = _q[5];
3482 ((unsigned char *) _p)[OFF + 1 + 3 * Py_UNICODE_SIZE2] = _q[6];
3483 ((unsigned char *) _p)[OFF + 0 + 3 * Py_UNICODE_SIZE2] = _q[7];
3484#endif
3485#undef OFF
3486 _q += SIZEOF_LONG8;
3487 _p += SIZEOF_LONG8 / 2;
3488 }
3489 }
3490 p = _p;
3491 q = _q;
3492 if (q >= e)
3493 break;
3494 }
3495 ch = (q[ihi] << 8) | q[ilo];
3496
3497 q += 2;
3498
3499 if (ch < 0xD800 || ch > 0xDFFF) {
3500 *p++ = ch;
3501 continue;
3502 }
3503
3504 /* UTF-16 code pair: */
3505 if (q > e) {
3506 errmsg = "unexpected end of data";
3507 startinpos = (((const char *)q) - 2) - starts;
3508 endinpos = ((const char *)e) + 1 - starts;
3509 goto utf16Error;
3510 }
3511 if (0xD800 <= ch && ch <= 0xDBFF) {
3512 Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
3513 q += 2;
3514 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
3515#ifndef Py_UNICODE_WIDE
3516 *p++ = ch;
3517 *p++ = ch2;
3518#else
3519 *p++ = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
3520#endif
3521 continue;
3522 }
3523 else {
3524 errmsg = "illegal UTF-16 surrogate";
3525 startinpos = (((const char *)q)-4)-starts;
3526 endinpos = startinpos+2;
3527 goto utf16Error;
3528 }
3529
3530 }
3531 errmsg = "illegal encoding";
3532 startinpos = (((const char *)q)-2)-starts;
3533 endinpos = startinpos+2;
3534 /* Fall through to report the error */
3535
3536 utf16Error:
3537 outpos = p - PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3537, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
3538 if (unicode_decode_call_errorhandler(
3539 errors,
3540 &errorHandler,
3541 "utf16", errmsg,
3542 &starts,
3543 (const char **)&e,
3544 &startinpos,
3545 &endinpos,
3546 &exc,
3547 (const char **)&q,
3548 &unicode,
3549 &outpos,
3550 &p))
3551 goto onError;
3552 }
3553 /* remaining byte at the end? (size should be even) */
3554 if (e == q) {
3555 if (!consumed) {
3556 errmsg = "truncated data";
3557 startinpos = ((const char *)q) - starts;
3558 endinpos = ((const char *)e) + 1 - starts;
3559 outpos = p - PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3559, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
3560 if (unicode_decode_call_errorhandler(
3561 errors,
3562 &errorHandler,
3563 "utf16", errmsg,
3564 &starts,
3565 (const char **)&e,
3566 &startinpos,
3567 &endinpos,
3568 &exc,
3569 (const char **)&q,
3570 &unicode,
3571 &outpos,
3572 &p))
3573 goto onError;
3574 /* The remaining input chars are ignored if the callback
3575 chooses to skip the input */
3576 }
3577 }
3578
3579 if (byteorder)
3580 *byteorder = bo;
3581
3582 if (consumed)
3583 *consumed = (const char *)q-starts;
3584
3585 /* Adjust length */
3586 if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
3587 goto onError;
3588
3589 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3589, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3590 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3590, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3591 return (PyObject *)unicode;
3592
3593 onError:
3594 Py_DECREF(unicode)do { if (_Py_RefTotal-- , --((PyObject*)(unicode))->ob_refcnt
!= 0) { if (((PyObject*)unicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3594, (PyObject *)(unicode)); } else
_Py_Dealloc((PyObject *)(unicode)); } while (0)
;
3595 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3595, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3596 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3596, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3597 return NULL((void *)0);
3598}
3599
3600#undef FAST_CHAR_MASK
3601#undef SWAPPED_FAST_CHAR_MASK
3602
3603PyObject *
3604PyUnicode_EncodeUTF16PyUnicodeUCS2_EncodeUTF16(const Py_UNICODE *s,
3605 Py_ssize_t size,
3606 const char *errors,
3607 int byteorder)
3608{
3609 PyObject *v;
3610 unsigned char *p;
3611 Py_ssize_t nsize, bytesize;
3612#ifdef Py_UNICODE_WIDE
3613 Py_ssize_t i, pairs;
3614#else
3615 const int pairs = 0;
3616#endif
3617 /* Offsets from p for storing byte pairs in the right order. */
3618#ifdef BYTEORDER_IS_LITTLE_ENDIAN
3619 int ihi = 1, ilo = 0;
3620#else
3621 int ihi = 0, ilo = 1;
3622#endif
3623
3624#define STORECHAR(CH) \
3625 do { \
3626 p[ihi] = ((CH) >> 8) & 0xff; \
3627 p[ilo] = (CH) & 0xff; \
3628 p += 2; \
3629 } while(0)
3630
3631#ifdef Py_UNICODE_WIDE
3632 for (i = pairs = 0; i < size; i++)
3633 if (s[i] >= 0x10000)
3634 pairs++;
3635#endif
3636 /* 2 * (size + pairs + (byteorder == 0)) */
3637 if (size > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) ||
3638 size > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - pairs - (byteorder == 0))
3639 return PyErr_NoMemory();
3640 nsize = size + pairs + (byteorder == 0);
3641 bytesize = nsize * 2;
3642 if (bytesize / 2 != nsize)
3643 return PyErr_NoMemory();
3644 v = PyBytes_FromStringAndSize(NULL((void *)0), bytesize);
3645 if (v == NULL((void *)0))
3646 return NULL((void *)0);
3647
3648 p = (unsigned char *)PyBytes_AS_STRING(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3648, "PyBytes_Check(v)") : (void)0), (((PyBytesObject *)(v
))->ob_sval))
;
3649 if (byteorder == 0)
3650 STORECHAR(0xFEFF);
3651 if (size == 0)
3652 goto done;
3653
3654 if (byteorder == -1) {
3655 /* force LE */
3656 ihi = 1;
3657 ilo = 0;
3658 }
3659 else if (byteorder == 1) {
3660 /* force BE */
3661 ihi = 0;
3662 ilo = 1;
3663 }
3664
3665 while (size-- > 0) {
3666 Py_UNICODE ch = *s++;
3667 Py_UNICODE ch2 = 0;
3668#ifdef Py_UNICODE_WIDE
3669 if (ch >= 0x10000) {
3670 ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF);
3671 ch = 0xD800 | ((ch-0x10000) >> 10);
3672 }
3673#endif
3674 STORECHAR(ch);
3675 if (ch2)
3676 STORECHAR(ch2);
3677 }
3678
3679 done:
3680 return v;
3681#undef STORECHAR
3682}
3683
3684PyObject *PyUnicode_AsUTF16StringPyUnicodeUCS2_AsUTF16String(PyObject *unicode)
3685{
3686 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
3687 PyErr_BadArgument();
3688 return NULL((void *)0);
3689 }
3690 return PyUnicode_EncodeUTF16PyUnicodeUCS2_EncodeUTF16(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3690, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
3691 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 3691, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
3692 NULL((void *)0),
3693 0);
3694}
3695
3696/* --- Unicode Escape Codec ----------------------------------------------- */
3697
3698static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL((void *)0);
3699
3700PyObject *PyUnicode_DecodeUnicodeEscapePyUnicodeUCS2_DecodeUnicodeEscape(const char *s,
3701 Py_ssize_t size,
3702 const char *errors)
3703{
3704 const char *starts = s;
3705 Py_ssize_t startinpos;
3706 Py_ssize_t endinpos;
3707 Py_ssize_t outpos;
3708 int i;
3709 PyUnicodeObject *v;
3710 Py_UNICODE *p;
3711 const char *end;
3712 char* message;
3713 Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
3714 PyObject *errorHandler = NULL((void *)0);
3715 PyObject *exc = NULL((void *)0);
3716
3717 /* Escaped strings will always be longer than the resulting
3718 Unicode string, so we start with size here and then reduce the
3719 length after conversion to the true value.
3720 (but if the error callback returns a long replacement string
3721 we'll have to allocate more space) */
3722 v = _PyUnicode_New(size);
3723 if (v == NULL((void *)0))
3724 goto onError;
3725 if (size == 0)
3726 return (PyObject *)v;
3727
3728 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3728, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
3729 end = s + size;
3730
3731 while (s < end) {
3732 unsigned char c;
3733 Py_UNICODE x;
3734 int digits;
3735
3736 /* Non-escape characters are interpreted as Unicode ordinals */
3737 if (*s != '\\') {
3738 *p++ = (unsigned char) *s++;
3739 continue;
3740 }
3741
3742 startinpos = s-starts;
3743 /* \ - Escapes */
3744 s++;
3745 c = *s++;
3746 if (s > end)
3747 c = '\0'; /* Invalid after \ */
3748 switch (c) {
3749
3750 /* \x escapes */
3751 case '\n': break;
3752 case '\\': *p++ = '\\'; break;
3753 case '\'': *p++ = '\''; break;
3754 case '\"': *p++ = '\"'; break;
3755 case 'b': *p++ = '\b'; break;
3756 case 'f': *p++ = '\014'; break; /* FF */
3757 case 't': *p++ = '\t'; break;
3758 case 'n': *p++ = '\n'; break;
3759 case 'r': *p++ = '\r'; break;
3760 case 'v': *p++ = '\013'; break; /* VT */
3761 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
3762
3763 /* \OOO (octal) escapes */
3764 case '0': case '1': case '2': case '3':
3765 case '4': case '5': case '6': case '7':
3766 x = s[-1] - '0';
3767 if (s < end && '0' <= *s && *s <= '7') {
3768 x = (x<<3) + *s++ - '0';
3769 if (s < end && '0' <= *s && *s <= '7')
3770 x = (x<<3) + *s++ - '0';
3771 }
3772 *p++ = x;
3773 break;
3774
3775 /* hex escapes */
3776 /* \xXX */
3777 case 'x':
3778 digits = 2;
3779 message = "truncated \\xXX escape";
3780 goto hexescape;
3781
3782 /* \uXXXX */
3783 case 'u':
3784 digits = 4;
3785 message = "truncated \\uXXXX escape";
3786 goto hexescape;
3787
3788 /* \UXXXXXXXX */
3789 case 'U':
3790 digits = 8;
3791 message = "truncated \\UXXXXXXXX escape";
3792 hexescape:
3793 chr = 0;
3794 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3794, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
3795 if (s+digits>end) {
3796 endinpos = size;
3797 if (unicode_decode_call_errorhandler(
3798 errors, &errorHandler,
3799 "unicodeescape", "end of string in escape sequence",
3800 &starts, &end, &startinpos, &endinpos, &exc, &s,
3801 &v, &outpos, &p))
3802 goto onError;
3803 goto nextByte;
3804 }
3805 for (i = 0; i < digits; ++i) {
3806 c = (unsigned char) s[i];
3807 if (!Py_ISXDIGIT(c)(_Py_ctype_table[((unsigned char)((c) & 0xff))] & 0x10
)
) {
3808 endinpos = (s+i+1)-starts;
3809 if (unicode_decode_call_errorhandler(
3810 errors, &errorHandler,
3811 "unicodeescape", message,
3812 &starts, &end, &startinpos, &endinpos, &exc, &s,
3813 &v, &outpos, &p))
3814 goto onError;
3815 goto nextByte;
3816 }
3817 chr = (chr<<4) & ~0xF;
3818 if (c >= '0' && c <= '9')
3819 chr += c - '0';
3820 else if (c >= 'a' && c <= 'f')
3821 chr += 10 + c - 'a';
3822 else
3823 chr += 10 + c - 'A';
3824 }
3825 s += i;
3826 if (chr == 0xffffffff && PyErr_Occurred())
3827 /* _decoding_error will have already written into the
3828 target buffer. */
3829 break;
3830 store:
3831 /* when we get here, chr is a 32-bit unicode character */
3832 if (chr <= 0xffff)
3833 /* UCS-2 character */
3834 *p++ = (Py_UNICODE) chr;
3835 else if (chr <= 0x10ffff) {
3836 /* UCS-4 character. Either store directly, or as
3837 surrogate pair. */
3838#ifdef Py_UNICODE_WIDE
3839 *p++ = chr;
3840#else
3841 chr -= 0x10000L;
3842 *p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
3843 *p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
3844#endif
3845 } else {
3846 endinpos = s-starts;
3847 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3847, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
3848 if (unicode_decode_call_errorhandler(
3849 errors, &errorHandler,
3850 "unicodeescape", "illegal Unicode character",
3851 &starts, &end, &startinpos, &endinpos, &exc, &s,
3852 &v, &outpos, &p))
3853 goto onError;
3854 }
3855 break;
3856
3857 /* \N{name} */
3858 case 'N':
3859 message = "malformed \\N character escape";
3860 if (ucnhash_CAPI == NULL((void *)0)) {
3861 /* load the unicode data module */
3862 ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(PyUnicodeData_CAPSULE_NAME"unicodedata.ucnhash_CAPI", 1);
3863 if (ucnhash_CAPI == NULL((void *)0))
3864 goto ucnhashError;
3865 }
3866 if (*s == '{') {
3867 const char *start = s+1;
3868 /* look for the closing brace */
3869 while (*s != '}' && s < end)
3870 s++;
3871 if (s > start && s < end && *s == '}') {
3872 /* found a name. look it up in the unicode database */
3873 message = "unknown Unicode character name";
3874 s++;
3875 if (ucnhash_CAPI->getcode(NULL((void *)0), start, (int)(s-start-1), &chr))
3876 goto store;
3877 }
3878 }
3879 endinpos = s-starts;
3880 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3880, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
3881 if (unicode_decode_call_errorhandler(
3882 errors, &errorHandler,
3883 "unicodeescape", message,
3884 &starts, &end, &startinpos, &endinpos, &exc, &s,
3885 &v, &outpos, &p))
3886 goto onError;
3887 break;
3888
3889 default:
3890 if (s > end) {
3891 message = "\\ at end of string";
3892 s--;
3893 endinpos = s-starts;
3894 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3894, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
3895 if (unicode_decode_call_errorhandler(
3896 errors, &errorHandler,
3897 "unicodeescape", message,
3898 &starts, &end, &startinpos, &endinpos, &exc, &s,
3899 &v, &outpos, &p))
3900 goto onError;
3901 }
3902 else {
3903 *p++ = '\\';
3904 *p++ = (unsigned char)s[-1];
3905 }
3906 break;
3907 }
3908 nextByte:
3909 ;
3910 }
3911 if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 3911, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
) < 0)
3912 goto onError;
3913 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3913, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3914 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3914, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3915 return (PyObject *)v;
3916
3917 ucnhashError:
3918 PyErr_SetString(
3919 PyExc_UnicodeError,
3920 "\\N escapes not supported (can't load unicodedata module)"
3921 );
3922 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3922, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
3923 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3923, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3924 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3924, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3925 return NULL((void *)0);
3926
3927 onError:
3928 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3928, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
3929 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 3929, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
3930 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 3930, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
3931 return NULL((void *)0);
3932}
3933
3934/* Return a Unicode-Escape string version of the Unicode object.
3935
3936 If quotes is true, the string is enclosed in u"" or u'' quotes as
3937 appropriate.
3938
3939*/
3940
3941Py_LOCAL_INLINE(const Py_UNICODE *)static inline const Py_UNICODE * findchar(const Py_UNICODE *s,
3942 Py_ssize_t size,
3943 Py_UNICODE ch)
3944{
3945 /* like wcschr, but doesn't stop at NULL characters */
3946
3947 while (size-- > 0) {
3948 if (*s == ch)
3949 return s;
3950 s++;
3951 }
3952
3953 return NULL((void *)0);
3954}
3955
3956static const char *hexdigits = "0123456789abcdef";
3957
3958PyObject *PyUnicode_EncodeUnicodeEscapePyUnicodeUCS2_EncodeUnicodeEscape(const Py_UNICODE *s,
3959 Py_ssize_t size)
3960{
3961 PyObject *repr;
3962 char *p;
3963
3964#ifdef Py_UNICODE_WIDE
3965 const Py_ssize_t expandsize = 10;
3966#else
3967 const Py_ssize_t expandsize = 6;
3968#endif
3969
3970 /* XXX(nnorwitz): rather than over-allocating, it would be
3971 better to choose a different scheme. Perhaps scan the
3972 first N-chars of the string and allocate based on that size.
3973 */
3974 /* Initial allocation is based on the longest-possible unichr
3975 escape.
3976
3977 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
3978 unichr, so in this case it's the longest unichr escape. In
3979 narrow (UTF-16) builds this is five chars per source unichr
3980 since there are two unichrs in the surrogate pair, so in narrow
3981 (UTF-16) builds it's not the longest unichr escape.
3982
3983 In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
3984 so in the narrow (UTF-16) build case it's the longest unichr
3985 escape.
3986 */
3987
3988 if (size == 0)
3989 return PyBytes_FromStringAndSize(NULL((void *)0), 0);
3990
3991 if (size > (PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - 2 - 1) / expandsize)
3992 return PyErr_NoMemory();
3993
3994 repr = PyBytes_FromStringAndSize(NULL((void *)0),
3995 2
3996 + expandsize*size
3997 + 1);
3998 if (repr == NULL((void *)0))
3999 return NULL((void *)0);
4000
4001 p = PyBytes_AS_STRING(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4001, "PyBytes_Check(repr)") : (
void)0), (((PyBytesObject *)(repr))->ob_sval))
;
4002
4003 while (size-- > 0) {
4004 Py_UNICODE ch = *s++;
4005
4006 /* Escape backslashes */
4007 if (ch == '\\') {
4008 *p++ = '\\';
4009 *p++ = (char) ch;
4010 continue;
4011 }
4012
4013#ifdef Py_UNICODE_WIDE
4014 /* Map 21-bit characters to '\U00xxxxxx' */
4015 else if (ch >= 0x10000) {
4016 *p++ = '\\';
4017 *p++ = 'U';
4018 *p++ = hexdigits[(ch >> 28) & 0x0000000F];
4019 *p++ = hexdigits[(ch >> 24) & 0x0000000F];
4020 *p++ = hexdigits[(ch >> 20) & 0x0000000F];
4021 *p++ = hexdigits[(ch >> 16) & 0x0000000F];
4022 *p++ = hexdigits[(ch >> 12) & 0x0000000F];
4023 *p++ = hexdigits[(ch >> 8) & 0x0000000F];
4024 *p++ = hexdigits[(ch >> 4) & 0x0000000F];
4025 *p++ = hexdigits[ch & 0x0000000F];
4026 continue;
4027 }
4028#else
4029 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
4030 else if (ch >= 0xD800 && ch < 0xDC00) {
4031 Py_UNICODE ch2;
4032 Py_UCS4 ucs;
4033
4034 ch2 = *s++;
4035 size--;
4036 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
4037 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
4038 *p++ = '\\';
4039 *p++ = 'U';
4040 *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
4041 *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
4042 *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
4043 *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
4044 *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
4045 *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
4046 *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
4047 *p++ = hexdigits[ucs & 0x0000000F];
4048 continue;
4049 }
4050 /* Fall through: isolated surrogates are copied as-is */
4051 s--;
4052 size++;
4053 }
4054#endif
4055
4056 /* Map 16-bit characters to '\uxxxx' */
4057 if (ch >= 256) {
4058 *p++ = '\\';
4059 *p++ = 'u';
4060 *p++ = hexdigits[(ch >> 12) & 0x000F];
4061 *p++ = hexdigits[(ch >> 8) & 0x000F];
4062 *p++ = hexdigits[(ch >> 4) & 0x000F];
4063 *p++ = hexdigits[ch & 0x000F];
4064 }
4065
4066 /* Map special whitespace to '\t', \n', '\r' */
4067 else if (ch == '\t') {
4068 *p++ = '\\';
4069 *p++ = 't';
4070 }
4071 else if (ch == '\n') {
4072 *p++ = '\\';
4073 *p++ = 'n';
4074 }
4075 else if (ch == '\r') {
4076 *p++ = '\\';
4077 *p++ = 'r';
4078 }
4079
4080 /* Map non-printable US ASCII to '\xhh' */
4081 else if (ch < ' ' || ch >= 0x7F) {
4082 *p++ = '\\';
4083 *p++ = 'x';
4084 *p++ = hexdigits[(ch >> 4) & 0x000F];
4085 *p++ = hexdigits[ch & 0x000F];
4086 }
4087
4088 /* Copy everything else as-is */
4089 else
4090 *p++ = (char) ch;
4091 }
4092
4093 assert(p - PyBytes_AS_STRING(repr) > 0)(__builtin_expect(!(p - ((__builtin_expect(!(((((((PyObject*)
(repr))->ob_type))->tp_flags & ((1L<<27))) !=
0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 4093
, "PyBytes_Check(repr)") : (void)0), (((PyBytesObject *)(repr
))->ob_sval)) > 0), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4093, "p - PyBytes_AS_STRING(repr) > 0") : (void)0)
;
4094 if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4094, "PyBytes_Check(repr)") : (
void)0), (((PyBytesObject *)(repr))->ob_sval))
) < 0)
4095 return NULL((void *)0);
4096 return repr;
4097}
4098
4099PyObject *PyUnicode_AsUnicodeEscapeStringPyUnicodeUCS2_AsUnicodeEscapeString(PyObject *unicode)
4100{
4101 PyObject *s;
4102 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
4103 PyErr_BadArgument();
4104 return NULL((void *)0);
4105 }
4106 s = PyUnicode_EncodeUnicodeEscapePyUnicodeUCS2_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4106, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
4107 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4107, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
);
4108 return s;
4109}
4110
4111/* --- Raw Unicode Escape Codec ------------------------------------------- */
4112
4113PyObject *PyUnicode_DecodeRawUnicodeEscapePyUnicodeUCS2_DecodeRawUnicodeEscape(const char *s,
4114 Py_ssize_t size,
4115 const char *errors)
4116{
4117 const char *starts = s;
4118 Py_ssize_t startinpos;
4119 Py_ssize_t endinpos;
4120 Py_ssize_t outpos;
4121 PyUnicodeObject *v;
4122 Py_UNICODE *p;
4123 const char *end;
4124 const char *bs;
4125 PyObject *errorHandler = NULL((void *)0);
4126 PyObject *exc = NULL((void *)0);
4127
4128 /* Escaped strings will always be longer than the resulting
4129 Unicode string, so we start with size here and then reduce the
4130 length after conversion to the true value. (But decoding error
4131 handler might have to resize the string) */
4132 v = _PyUnicode_New(size);
4133 if (v == NULL((void *)0))
4134 goto onError;
4135 if (size == 0)
4136 return (PyObject *)v;
4137 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4137, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4138 end = s + size;
4139 while (s < end) {
4140 unsigned char c;
4141 Py_UCS4 x;
4142 int i;
4143 int count;
4144
4145 /* Non-escape characters are interpreted as Unicode ordinals */
4146 if (*s != '\\') {
4147 *p++ = (unsigned char)*s++;
4148 continue;
4149 }
4150 startinpos = s-starts;
4151
4152 /* \u-escapes are only interpreted iff the number of leading
4153 backslashes if odd */
4154 bs = s;
4155 for (;s < end;) {
4156 if (*s != '\\')
4157 break;
4158 *p++ = (unsigned char)*s++;
4159 }
4160 if (((s - bs) & 1) == 0 ||
4161 s >= end ||
4162 (*s != 'u' && *s != 'U')) {
4163 continue;
4164 }
4165 p--;
4166 count = *s=='u' ? 4 : 8;
4167 s++;
4168
4169 /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
4170 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4170, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4171 for (x = 0, i = 0; i < count; ++i, ++s) {
4172 c = (unsigned char)*s;
4173 if (!Py_ISXDIGIT(c)(_Py_ctype_table[((unsigned char)((c) & 0xff))] & 0x10
)
) {
4174 endinpos = s-starts;
4175 if (unicode_decode_call_errorhandler(
4176 errors, &errorHandler,
4177 "rawunicodeescape", "truncated \\uXXXX",
4178 &starts, &end, &startinpos, &endinpos, &exc, &s,
4179 &v, &outpos, &p))
4180 goto onError;
4181 goto nextByte;
4182 }
4183 x = (x<<4) & ~0xF;
4184 if (c >= '0' && c <= '9')
4185 x += c - '0';
4186 else if (c >= 'a' && c <= 'f')
4187 x += 10 + c - 'a';
4188 else
4189 x += 10 + c - 'A';
4190 }
4191 if (x <= 0xffff)
4192 /* UCS-2 character */
4193 *p++ = (Py_UNICODE) x;
4194 else if (x <= 0x10ffff) {
4195 /* UCS-4 character. Either store directly, or as
4196 surrogate pair. */
4197#ifdef Py_UNICODE_WIDE
4198 *p++ = (Py_UNICODE) x;
4199#else
4200 x -= 0x10000L;
4201 *p++ = 0xD800 + (Py_UNICODE) (x >> 10);
4202 *p++ = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
4203#endif
4204 } else {
4205 endinpos = s-starts;
4206 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4206, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4207 if (unicode_decode_call_errorhandler(
4208 errors, &errorHandler,
4209 "rawunicodeescape", "\\Uxxxxxxxx out of range",
4210 &starts, &end, &startinpos, &endinpos, &exc, &s,
4211 &v, &outpos, &p))
4212 goto onError;
4213 }
4214 nextByte:
4215 ;
4216 }
4217 if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4217, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
) < 0)
4218 goto onError;
4219 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4219, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4220 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4220, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4221 return (PyObject *)v;
4222
4223 onError:
4224 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4224, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
4225 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4225, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4226 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4226, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4227 return NULL((void *)0);
4228}
4229
4230PyObject *PyUnicode_EncodeRawUnicodeEscapePyUnicodeUCS2_EncodeRawUnicodeEscape(const Py_UNICODE *s,
4231 Py_ssize_t size)
4232{
4233 PyObject *repr;
4234 char *p;
4235 char *q;
4236
4237#ifdef Py_UNICODE_WIDE
4238 const Py_ssize_t expandsize = 10;
4239#else
4240 const Py_ssize_t expandsize = 6;
4241#endif
4242
4243 if (size > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) / expandsize)
4244 return PyErr_NoMemory();
4245
4246 repr = PyBytes_FromStringAndSize(NULL((void *)0), expandsize * size);
4247 if (repr == NULL((void *)0))
4248 return NULL((void *)0);
4249 if (size == 0)
4250 return repr;
4251
4252 p = q = PyBytes_AS_STRING(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4252, "PyBytes_Check(repr)") : (
void)0), (((PyBytesObject *)(repr))->ob_sval))
;
4253 while (size-- > 0) {
4254 Py_UNICODE ch = *s++;
4255#ifdef Py_UNICODE_WIDE
4256 /* Map 32-bit characters to '\Uxxxxxxxx' */
4257 if (ch >= 0x10000) {
4258 *p++ = '\\';
4259 *p++ = 'U';
4260 *p++ = hexdigits[(ch >> 28) & 0xf];
4261 *p++ = hexdigits[(ch >> 24) & 0xf];
4262 *p++ = hexdigits[(ch >> 20) & 0xf];
4263 *p++ = hexdigits[(ch >> 16) & 0xf];
4264 *p++ = hexdigits[(ch >> 12) & 0xf];
4265 *p++ = hexdigits[(ch >> 8) & 0xf];
4266 *p++ = hexdigits[(ch >> 4) & 0xf];
4267 *p++ = hexdigits[ch & 15];
4268 }
4269 else
4270#else
4271 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
4272 if (ch >= 0xD800 && ch < 0xDC00) {
4273 Py_UNICODE ch2;
4274 Py_UCS4 ucs;
4275
4276 ch2 = *s++;
4277 size--;
4278 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
4279 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
4280 *p++ = '\\';
4281 *p++ = 'U';
4282 *p++ = hexdigits[(ucs >> 28) & 0xf];
4283 *p++ = hexdigits[(ucs >> 24) & 0xf];
4284 *p++ = hexdigits[(ucs >> 20) & 0xf];
4285 *p++ = hexdigits[(ucs >> 16) & 0xf];
4286 *p++ = hexdigits[(ucs >> 12) & 0xf];
4287 *p++ = hexdigits[(ucs >> 8) & 0xf];
4288 *p++ = hexdigits[(ucs >> 4) & 0xf];
4289 *p++ = hexdigits[ucs & 0xf];
4290 continue;
4291 }
4292 /* Fall through: isolated surrogates are copied as-is */
4293 s--;
4294 size++;
4295 }
4296#endif
4297 /* Map 16-bit characters to '\uxxxx' */
4298 if (ch >= 256) {
4299 *p++ = '\\';
4300 *p++ = 'u';
4301 *p++ = hexdigits[(ch >> 12) & 0xf];
4302 *p++ = hexdigits[(ch >> 8) & 0xf];
4303 *p++ = hexdigits[(ch >> 4) & 0xf];
4304 *p++ = hexdigits[ch & 15];
4305 }
4306 /* Copy everything else as-is */
4307 else
4308 *p++ = (char) ch;
4309 }
4310 size = p - q;
4311
4312 assert(size > 0)(__builtin_expect(!(size > 0), 0) ? __assert_rtn(__func__,
"Objects/unicodeobject.c", 4312, "size > 0") : (void)0)
;
4313 if (_PyBytes_Resize(&repr, size) < 0)
4314 return NULL((void *)0);
4315 return repr;
4316}
4317
4318PyObject *PyUnicode_AsRawUnicodeEscapeStringPyUnicodeUCS2_AsRawUnicodeEscapeString(PyObject *unicode)
4319{
4320 PyObject *s;
4321 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
4322 PyErr_BadArgument();
4323 return NULL((void *)0);
4324 }
4325 s = PyUnicode_EncodeRawUnicodeEscapePyUnicodeUCS2_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4325, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
4326 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4326, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
);
4327
4328 return s;
4329}
4330
4331/* --- Unicode Internal Codec ------------------------------------------- */
4332
4333PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
4334 Py_ssize_t size,
4335 const char *errors)
4336{
4337 const char *starts = s;
4338 Py_ssize_t startinpos;
4339 Py_ssize_t endinpos;
4340 Py_ssize_t outpos;
4341 PyUnicodeObject *v;
4342 Py_UNICODE *p;
4343 const char *end;
4344 const char *reason;
4345 PyObject *errorHandler = NULL((void *)0);
4346 PyObject *exc = NULL((void *)0);
4347
4348#ifdef Py_UNICODE_WIDE
4349 Py_UNICODE unimax = PyUnicode_GetMaxPyUnicodeUCS2_GetMax();
4350#endif
4351
4352 /* XXX overflow detection missing */
4353 v = _PyUnicode_New((size+Py_UNICODE_SIZE2-1)/ Py_UNICODE_SIZE2);
4354 if (v == NULL((void *)0))
4355 goto onError;
4356 if (PyUnicode_GetSizePyUnicodeUCS2_GetSize((PyObject *)v) == 0)
4357 return (PyObject *)v;
4358 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4358, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4359 end = s + size;
4360
4361 while (s < end) {
4362 memcpy(p, s, sizeof(Py_UNICODE))((__builtin_object_size (p, 0) != (size_t) -1) ? __builtin___memcpy_chk
(p, s, sizeof(Py_UNICODE), __builtin_object_size (p, 0)) : __inline_memcpy_chk
(p, s, sizeof(Py_UNICODE)))
;
4363 /* We have to sanity check the raw data, otherwise doom looms for
4364 some malformed UCS-4 data. */
4365 if (
4366#ifdef Py_UNICODE_WIDE
4367 *p > unimax || *p < 0 ||
4368#endif
4369 end-s < Py_UNICODE_SIZE2
4370 )
4371 {
4372 startinpos = s - starts;
4373 if (end-s < Py_UNICODE_SIZE2) {
4374 endinpos = end-starts;
4375 reason = "truncated input";
4376 }
4377 else {
4378 endinpos = s - starts + Py_UNICODE_SIZE2;
4379 reason = "illegal code point (> 0x10FFFF)";
4380 }
4381 outpos = p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4381, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4382 if (unicode_decode_call_errorhandler(
4383 errors, &errorHandler,
4384 "unicode_internal", reason,
4385 &starts, &end, &startinpos, &endinpos, &exc, &s,
4386 &v, &outpos, &p)) {
4387 goto onError;
4388 }
4389 }
4390 else {
4391 p++;
4392 s += Py_UNICODE_SIZE2;
4393 }
4394 }
4395
4396 if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4396, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
) < 0)
4397 goto onError;
4398 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4398, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4399 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4399, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4400 return (PyObject *)v;
4401
4402 onError:
4403 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4403, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
4404 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4404, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4405 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4405, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4406 return NULL((void *)0);
4407}
4408
4409/* --- Latin-1 Codec ------------------------------------------------------ */
4410
4411PyObject *PyUnicode_DecodeLatin1PyUnicodeUCS2_DecodeLatin1(const char *s,
4412 Py_ssize_t size,
4413 const char *errors)
4414{
4415 PyUnicodeObject *v;
4416 Py_UNICODE *p;
4417 const char *e, *unrolled_end;
4418
4419 /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
4420 if (size == 1) {
4421 Py_UNICODE r = *(unsigned char*)s;
4422 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(&r, 1);
4423 }
4424
4425 v = _PyUnicode_New(size);
4426 if (v == NULL((void *)0))
4427 goto onError;
4428 if (size == 0)
4429 return (PyObject *)v;
4430 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4430, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4431 e = s + size;
4432 /* Unrolling the copy makes it much faster by reducing the looping
4433 overhead. This is similar to what many memcpy() implementations do. */
4434 unrolled_end = e - 4;
4435 while (s < unrolled_end) {
4436 p[0] = (unsigned char) s[0];
4437 p[1] = (unsigned char) s[1];
4438 p[2] = (unsigned char) s[2];
4439 p[3] = (unsigned char) s[3];
4440 s += 4;
4441 p += 4;
4442 }
4443 while (s < e)
4444 *p++ = (unsigned char) *s++;
4445 return (PyObject *)v;
4446
4447 onError:
4448 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4448, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
4449 return NULL((void *)0);
4450}
4451
4452/* create or adjust a UnicodeEncodeError */
4453static void make_encode_exception(PyObject **exceptionObject,
4454 const char *encoding,
4455 const Py_UNICODE *unicode, Py_ssize_t size,
4456 Py_ssize_t startpos, Py_ssize_t endpos,
4457 const char *reason)
4458{
4459 if (*exceptionObject == NULL((void *)0)) {
4460 *exceptionObject = PyUnicodeEncodeError_Create(
4461 encoding, unicode, size, startpos, endpos, reason);
4462 }
4463 else {
4464 if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
4465 goto onError;
4466 if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
4467 goto onError;
4468 if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
4469 goto onError;
4470 return;
4471 onError:
4472 Py_DECREF(*exceptionObject)do { if (_Py_RefTotal-- , --((PyObject*)(*exceptionObject))->
ob_refcnt != 0) { if (((PyObject*)*exceptionObject)->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 4472
, (PyObject *)(*exceptionObject)); } else _Py_Dealloc((PyObject
*)(*exceptionObject)); } while (0)
;
4473 *exceptionObject = NULL((void *)0);
4474 }
4475}
4476
4477/* raises a UnicodeEncodeError */
4478static void raise_encode_exception(PyObject **exceptionObject,
4479 const char *encoding,
4480 const Py_UNICODE *unicode, Py_ssize_t size,
4481 Py_ssize_t startpos, Py_ssize_t endpos,
4482 const char *reason)
4483{
4484 make_encode_exception(exceptionObject,
4485 encoding, unicode, size, startpos, endpos, reason);
4486 if (*exceptionObject != NULL((void *)0))
4487 PyCodec_StrictErrors(*exceptionObject);
4488}
4489
4490/* error handling callback helper:
4491 build arguments, call the callback and check the arguments,
4492 put the result into newpos and return the replacement string, which
4493 has to be freed by the caller */
4494static PyObject *unicode_encode_call_errorhandler(const char *errors,
4495 PyObject **errorHandler,
4496 const char *encoding, const char *reason,
4497 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
4498 Py_ssize_t startpos, Py_ssize_t endpos,
4499 Py_ssize_t *newpos)
4500{
4501 static char *argparse = "On;encoding error handler must return (str/bytes, int) tuple";
4502
4503 PyObject *restuple;
4504 PyObject *resunicode;
4505
4506 if (*errorHandler == NULL((void *)0)) {
4507 *errorHandler = PyCodec_LookupError(errors);
4508 if (*errorHandler == NULL((void *)0))
4509 return NULL((void *)0);
4510 }
4511
4512 make_encode_exception(exceptionObject,
4513 encoding, unicode, size, startpos, endpos, reason);
4514 if (*exceptionObject == NULL((void *)0))
4515 return NULL((void *)0);
4516
4517 restuple = PyObject_CallFunctionObjArgs(
4518 *errorHandler, *exceptionObject, NULL((void *)0));
4519 if (restuple == NULL((void *)0))
4520 return NULL((void *)0);
4521 if (!PyTuple_Check(restuple)((((((PyObject*)(restuple))->ob_type))->tp_flags & (
(1L<<26))) != 0)
) {
4522 PyErr_SetString(PyExc_TypeError, &argparse[3]);
4523 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4523, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
4524 return NULL((void *)0);
4525 }
4526 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(restuple, argparse,
4527 &resunicode, newpos)) {
4528 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4528, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
4529 return NULL((void *)0);
4530 }
4531 if (!PyUnicode_Check(resunicode)((((((PyObject*)(resunicode))->ob_type))->tp_flags &
((1L<<28))) != 0)
&& !PyBytes_Check(resunicode)((((((PyObject*)(resunicode))->ob_type))->tp_flags &
((1L<<27))) != 0)
) {
4532 PyErr_SetString(PyExc_TypeError, &argparse[3]);
4533 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4533, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
4534 return NULL((void *)0);
4535 }
4536 if (*newpos<0)
4537 *newpos = size+*newpos;
4538 if (*newpos<0 || *newpos>size) {
4539 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
4540 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4540, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
4541 return NULL((void *)0);
4542 }
4543 Py_INCREF(resunicode)( _Py_RefTotal++ , ((PyObject*)(resunicode))->ob_refcnt++);
4544 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4544, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
4545 return resunicode;
4546}
4547
4548static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
4549 Py_ssize_t size,
4550 const char *errors,
4551 int limit)
4552{
4553 /* output object */
4554 PyObject *res;
4555 /* pointers to the beginning and end+1 of input */
4556 const Py_UNICODE *startp = p;
4557 const Py_UNICODE *endp = p + size;
4558 /* pointer to the beginning of the unencodable characters */
4559 /* const Py_UNICODE *badp = NULL; */
4560 /* pointer into the output */
4561 char *str;
4562 /* current output position */
4563 Py_ssize_t ressize;
4564 const char *encoding = (limit == 256) ? "latin-1" : "ascii";
4565 const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
4566 PyObject *errorHandler = NULL((void *)0);
4567 PyObject *exc = NULL((void *)0);
4568 /* the following variable is used for caching string comparisons
4569 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
4570 int known_errorHandler = -1;
4571
4572 /* allocate enough for a simple encoding without
4573 replacements, if we need more, we'll resize */
4574 if (size == 0)
4575 return PyBytes_FromStringAndSize(NULL((void *)0), 0);
4576 res = PyBytes_FromStringAndSize(NULL((void *)0), size);
4577 if (res == NULL((void *)0))
4578 return NULL((void *)0);
4579 str = PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4579, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
;
4580 ressize = size;
4581
4582 while (p<endp) {
4583 Py_UNICODE c = *p;
4584
4585 /* can we encode this? */
4586 if (c<limit) {
4587 /* no overflow check, because we know that the space is enough */
4588 *str++ = (char)c;
4589 ++p;
4590 }
4591 else {
4592 Py_ssize_t unicodepos = p-startp;
4593 Py_ssize_t requiredsize;
4594 PyObject *repunicode;
4595 Py_ssize_t repsize;
4596 Py_ssize_t newpos;
4597 Py_ssize_t respos;
4598 Py_UNICODE *uni2;
4599 /* startpos for collecting unencodable chars */
4600 const Py_UNICODE *collstart = p;
4601 const Py_UNICODE *collend = p;
4602 /* find all unecodable characters */
4603 while ((collend < endp) && ((*collend)>=limit))
4604 ++collend;
4605 /* cache callback name lookup (if not done yet, i.e. it's the first error) */
4606 if (known_errorHandler==-1) {
4607 if ((errors==NULL((void *)0)) || (!strcmp(errors, "strict")))
4608 known_errorHandler = 1;
4609 else if (!strcmp(errors, "replace"))
4610 known_errorHandler = 2;
4611 else if (!strcmp(errors, "ignore"))
4612 known_errorHandler = 3;
4613 else if (!strcmp(errors, "xmlcharrefreplace"))
4614 known_errorHandler = 4;
4615 else
4616 known_errorHandler = 0;
4617 }
4618 switch (known_errorHandler) {
4619 case 1: /* strict */
4620 raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
4621 goto onError;
4622 case 2: /* replace */
4623 while (collstart++<collend)
4624 *str++ = '?'; /* fall through */
4625 case 3: /* ignore */
4626 p = collend;
4627 break;
4628 case 4: /* xmlcharrefreplace */
4629 respos = str - PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4629, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
;
4630 /* determine replacement size (temporarily (mis)uses p) */
4631 for (p = collstart, repsize = 0; p < collend; ++p) {
4632 if (*p<10)
4633 repsize += 2+1+1;
4634 else if (*p<100)
4635 repsize += 2+2+1;
4636 else if (*p<1000)
4637 repsize += 2+3+1;
4638 else if (*p<10000)
4639 repsize += 2+4+1;
4640#ifndef Py_UNICODE_WIDE
4641 else
4642 repsize += 2+5+1;
4643#else
4644 else if (*p<100000)
4645 repsize += 2+5+1;
4646 else if (*p<1000000)
4647 repsize += 2+6+1;
4648 else
4649 repsize += 2+7+1;
4650#endif
4651 }
4652 requiredsize = respos+repsize+(endp-collend);
4653 if (requiredsize > ressize) {
4654 if (requiredsize<2*ressize)
4655 requiredsize = 2*ressize;
4656 if (_PyBytes_Resize(&res, requiredsize))
4657 goto onError;
4658 str = PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4658, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
+ respos;
4659 ressize = requiredsize;
4660 }
4661 /* generate replacement (temporarily (mis)uses p) */
4662 for (p = collstart; p < collend; ++p) {
4663 str += sprintf(str, "&#%d;", (int)*p)__builtin___sprintf_chk (str, 0, __builtin_object_size (str, 2
> 1), "&#%d;", (int)*p)
;
4664 }
4665 p = collend;
4666 break;
4667 default:
4668 repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
4669 encoding, reason, startp, size, &exc,
4670 collstart-startp, collend-startp, &newpos);
4671 if (repunicode == NULL((void *)0))
4672 goto onError;
4673 if (PyBytes_Check(repunicode)((((((PyObject*)(repunicode))->ob_type))->tp_flags &
((1L<<27))) != 0)
) {
4674 /* Directly copy bytes result to output. */
4675 repsize = PyBytes_Size(repunicode);
4676 if (repsize > 1) {
4677 /* Make room for all additional bytes. */
4678 respos = str - PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4678, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
;
4679 if (_PyBytes_Resize(&res, ressize+repsize-1)) {
4680 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4680, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
4681 goto onError;
4682 }
4683 str = PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4683, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
+ respos;
4684 ressize += repsize-1;
4685 }
4686 memcpy(str, PyBytes_AsString(repunicode), repsize)((__builtin_object_size (str, 0) != (size_t) -1) ? __builtin___memcpy_chk
(str, PyBytes_AsString(repunicode), repsize, __builtin_object_size
(str, 0)) : __inline_memcpy_chk (str, PyBytes_AsString(repunicode
), repsize))
;
4687 str += repsize;
4688 p = startp + newpos;
4689 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4689, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
4690 break;
4691 }
4692 /* need more space? (at least enough for what we
4693 have+the replacement+the rest of the string, so
4694 we won't have to check space for encodable characters) */
4695 respos = str - PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4695, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
;
4696 repsize = PyUnicode_GET_SIZE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4696, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->length))
;
4697 requiredsize = respos+repsize+(endp-collend);
4698 if (requiredsize > ressize) {
4699 if (requiredsize<2*ressize)
4700 requiredsize = 2*ressize;
4701 if (_PyBytes_Resize(&res, requiredsize)) {
4702 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4702, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
4703 goto onError;
4704 }
4705 str = PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4705, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
+ respos;
4706 ressize = requiredsize;
4707 }
4708 /* check if there is anything unencodable in the replacement
4709 and copy it to the output */
4710 for (uni2 = PyUnicode_AS_UNICODE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4710, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->str))
;repsize-->0; ++uni2, ++str) {
4711 c = *uni2;
4712 if (c >= limit) {
4713 raise_encode_exception(&exc, encoding, startp, size,
4714 unicodepos, unicodepos+1, reason);
4715 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4715, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
4716 goto onError;
4717 }
4718 *str = (char)c;
4719 }
4720 p = startp + newpos;
4721 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4721, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
4722 }
4723 }
4724 }
4725 /* Resize if we allocated to much */
4726 size = str - PyBytes_AS_STRING(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4726, "PyBytes_Check(res)") : (void
)0), (((PyBytesObject *)(res))->ob_sval))
;
4727 if (size < ressize) { /* If this falls res will be NULL */
4728 assert(size >= 0)(__builtin_expect(!(size >= 0), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4728, "size >= 0") : (void)0)
;
4729 if (_PyBytes_Resize(&res, size) < 0)
4730 goto onError;
4731 }
4732
4733 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4733, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4734 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4734, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4735 return res;
4736
4737 onError:
4738 Py_XDECREF(res)do { if ((res) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(res))->ob_refcnt != 0) { if (((PyObject*
)res)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4738, (PyObject *)(res)); } else _Py_Dealloc((PyObject *)(res
)); } while (0); } while (0)
;
4739 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4739, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4740 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4740, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4741 return NULL((void *)0);
4742}
4743
4744PyObject *PyUnicode_EncodeLatin1PyUnicodeUCS2_EncodeLatin1(const Py_UNICODE *p,
4745 Py_ssize_t size,
4746 const char *errors)
4747{
4748 return unicode_encode_ucs1(p, size, errors, 256);
4749}
4750
4751PyObject *PyUnicode_AsLatin1StringPyUnicodeUCS2_AsLatin1String(PyObject *unicode)
4752{
4753 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
4754 PyErr_BadArgument();
4755 return NULL((void *)0);
4756 }
4757 return PyUnicode_EncodeLatin1PyUnicodeUCS2_EncodeLatin1(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4757, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
4758 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4758, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
4759 NULL((void *)0));
4760}
4761
4762/* --- 7-bit ASCII Codec -------------------------------------------------- */
4763
4764PyObject *PyUnicode_DecodeASCIIPyUnicodeUCS2_DecodeASCII(const char *s,
4765 Py_ssize_t size,
4766 const char *errors)
4767{
4768 const char *starts = s;
4769 PyUnicodeObject *v;
4770 Py_UNICODE *p;
4771 Py_ssize_t startinpos;
4772 Py_ssize_t endinpos;
4773 Py_ssize_t outpos;
4774 const char *e;
4775 PyObject *errorHandler = NULL((void *)0);
4776 PyObject *exc = NULL((void *)0);
4777
4778 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
4779 if (size == 1 && *(unsigned char*)s < 128) {
4780 Py_UNICODE r = *(unsigned char*)s;
4781 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(&r, 1);
4782 }
4783
4784 v = _PyUnicode_New(size);
4785 if (v == NULL((void *)0))
4786 goto onError;
4787 if (size == 0)
4788 return (PyObject *)v;
4789 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4789, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4790 e = s + size;
4791 while (s < e) {
4792 register unsigned char c = (unsigned char)*s;
4793 if (c < 128) {
4794 *p++ = c;
4795 ++s;
4796 }
4797 else {
4798 startinpos = s-starts;
4799 endinpos = startinpos + 1;
4800 outpos = p - (Py_UNICODE *)PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4800, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
4801 if (unicode_decode_call_errorhandler(
4802 errors, &errorHandler,
4803 "ascii", "ordinal not in range(128)",
4804 &starts, &e, &startinpos, &endinpos, &exc, &s,
4805 &v, &outpos, &p))
4806 goto onError;
4807 }
4808 }
4809 if (p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4809, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
< PyUnicode_GET_SIZE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4809, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->length))
)
4810 if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 4810, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
) < 0)
4811 goto onError;
4812 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4812, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4813 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4813, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4814 return (PyObject *)v;
4815
4816 onError:
4817 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4817, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
4818 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4818, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
4819 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4819, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
4820 return NULL((void *)0);
4821}
4822
4823PyObject *PyUnicode_EncodeASCIIPyUnicodeUCS2_EncodeASCII(const Py_UNICODE *p,
4824 Py_ssize_t size,
4825 const char *errors)
4826{
4827 return unicode_encode_ucs1(p, size, errors, 128);
4828}
4829
4830PyObject *PyUnicode_AsASCIIStringPyUnicodeUCS2_AsASCIIString(PyObject *unicode)
4831{
4832 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
4833 PyErr_BadArgument();
4834 return NULL((void *)0);
4835 }
4836 return PyUnicode_EncodeASCIIPyUnicodeUCS2_EncodeASCII(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4836, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
4837 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 4837, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
4838 NULL((void *)0));
4839}
4840
4841#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
4842
4843/* --- MBCS codecs for Windows -------------------------------------------- */
4844
4845#if SIZEOF_INT4 < SIZEOF_SIZE_T8
4846#define NEED_RETRY
4847#endif
4848
4849/* XXX This code is limited to "true" double-byte encodings, as
4850 a) it assumes an incomplete character consists of a single byte, and
4851 b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte
4852 encodings, see IsDBCSLeadByteEx documentation. */
4853
4854static int is_dbcs_lead_byte(const char *s, int offset)
4855{
4856 const char *curr = s + offset;
4857
4858 if (IsDBCSLeadByte(*curr)) {
4859 const char *prev = CharPrev(s, curr);
4860 return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2);
4861 }
4862 return 0;
4863}
4864
4865/*
4866 * Decode MBCS string into unicode object. If 'final' is set, converts
4867 * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise.
4868 */
4869static int decode_mbcs(PyUnicodeObject **v,
4870 const char *s, /* MBCS string */
4871 int size, /* sizeof MBCS string */
4872 int final,
4873 const char *errors)
4874{
4875 Py_UNICODE *p;
4876 Py_ssize_t n;
4877 DWORD usize;
4878 DWORD flags;
4879
4880 assert(size >= 0)(__builtin_expect(!(size >= 0), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4880, "size >= 0") : (void)0)
;
4881
4882 /* check and handle 'errors' arg */
4883 if (errors==NULL((void *)0) || strcmp(errors, "strict")==0)
4884 flags = MB_ERR_INVALID_CHARS;
4885 else if (strcmp(errors, "ignore")==0)
4886 flags = 0;
4887 else {
4888 PyErr_Format(PyExc_ValueError,
4889 "mbcs encoding does not support errors='%s'",
4890 errors);
4891 return -1;
4892 }
4893
4894 /* Skip trailing lead-byte unless 'final' is set */
4895 if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1))
4896 --size;
4897
4898 /* First get the size of the result */
4899 if (size > 0) {
4900 usize = MultiByteToWideChar(CP_ACP, flags, s, size, NULL((void *)0), 0);
4901 if (usize==0)
4902 goto mbcs_decode_error;
4903 } else
4904 usize = 0;
4905
4906 if (*v == NULL((void *)0)) {
4907 /* Create unicode object */
4908 *v = _PyUnicode_New(usize);
4909 if (*v == NULL((void *)0))
4910 return -1;
4911 n = 0;
4912 }
4913 else {
4914 /* Extend unicode object */
4915 n = PyUnicode_GET_SIZE(*v)((__builtin_expect(!(((((((PyObject*)(*v))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4915, "PyUnicode_Check(*v)") : (
void)0),(((PyUnicodeObject *)(*v))->length))
;
4916 if (_PyUnicode_Resize(v, n + usize) < 0)
4917 return -1;
4918 }
4919
4920 /* Do the conversion */
4921 if (usize > 0) {
4922 p = PyUnicode_AS_UNICODE(*v)((__builtin_expect(!(((((((PyObject*)(*v))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 4922, "PyUnicode_Check(*v)") : (
void)0),(((PyUnicodeObject *)(*v))->str))
+ n;
4923 if (0 == MultiByteToWideChar(CP_ACP, flags, s, size, p, usize)) {
4924 goto mbcs_decode_error;
4925 }
4926 }
4927 return size;
4928
4929mbcs_decode_error:
4930 /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then
4931 we raise a UnicodeDecodeError - else it is a 'generic'
4932 windows error
4933 */
4934 if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) {
4935 /* Ideally, we should get reason from FormatMessage - this
4936 is the Windows 2000 English version of the message
4937 */
4938 PyObject *exc = NULL((void *)0);
4939 const char *reason = "No mapping for the Unicode character exists "
4940 "in the target multi-byte code page.";
4941 make_decode_exception(&exc, "mbcs", s, size, 0, 0, reason);
4942 if (exc != NULL((void *)0)) {
4943 PyCodec_StrictErrors(exc);
4944 Py_DECREF(exc)do { if (_Py_RefTotal-- , --((PyObject*)(exc))->ob_refcnt !=
0) { if (((PyObject*)exc)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 4944, (PyObject *)(exc)); } else _Py_Dealloc
((PyObject *)(exc)); } while (0)
;
4945 }
4946 } else {
4947 PyErr_SetFromWindowsErrWithFilename(0, NULL((void *)0));
4948 }
4949 return -1;
4950}
4951
4952PyObject *PyUnicode_DecodeMBCSStateful(const char *s,
4953 Py_ssize_t size,
4954 const char *errors,
4955 Py_ssize_t *consumed)
4956{
4957 PyUnicodeObject *v = NULL((void *)0);
4958 int done;
4959
4960 if (consumed)
4961 *consumed = 0;
4962
4963#ifdef NEED_RETRY
4964 retry:
4965 if (size > INT_MAX2147483647)
4966 done = decode_mbcs(&v, s, INT_MAX2147483647, 0, errors);
4967 else
4968#endif
4969 done = decode_mbcs(&v, s, (int)size, !consumed, errors);
4970
4971 if (done < 0) {
4972 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 4972, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
4973 return NULL((void *)0);
4974 }
4975
4976 if (consumed)
4977 *consumed += done;
4978
4979#ifdef NEED_RETRY
4980 if (size > INT_MAX2147483647) {
4981 s += done;
4982 size -= done;
4983 goto retry;
4984 }
4985#endif
4986
4987 return (PyObject *)v;
4988}
4989
4990PyObject *PyUnicode_DecodeMBCS(const char *s,
4991 Py_ssize_t size,
4992 const char *errors)
4993{
4994 return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL((void *)0));
4995}
4996
4997/*
4998 * Convert unicode into string object (MBCS).
4999 * Returns 0 if succeed, -1 otherwise.
5000 */
5001static int encode_mbcs(PyObject **repr,
5002 const Py_UNICODE *p, /* unicode */
5003 int size, /* size of unicode */
5004 const char* errors)
5005{
5006 BOOL usedDefaultChar = FALSE;
5007 BOOL *pusedDefaultChar;
5008 int mbcssize;
5009 Py_ssize_t n;
5010 PyObject *exc = NULL((void *)0);
5011 DWORD flags;
5012
5013 assert(size >= 0)(__builtin_expect(!(size >= 0), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5013, "size >= 0") : (void)0)
;
5014
5015 /* check and handle 'errors' arg */
5016 if (errors==NULL((void *)0) || strcmp(errors, "strict")==0) {
5017 flags = WC_NO_BEST_FIT_CHARS;
5018 pusedDefaultChar = &usedDefaultChar;
5019 } else if (strcmp(errors, "replace")==0) {
5020 flags = 0;
5021 pusedDefaultChar = NULL((void *)0);
5022 } else {
5023 PyErr_Format(PyExc_ValueError,
5024 "mbcs encoding does not support errors='%s'",
5025 errors);
5026 return -1;
5027 }
5028
5029 /* First get the size of the result */
5030 if (size > 0) {
5031 mbcssize = WideCharToMultiByte(CP_ACP, flags, p, size, NULL((void *)0), 0,
5032 NULL((void *)0), pusedDefaultChar);
5033 if (mbcssize == 0) {
5034 PyErr_SetFromWindowsErrWithFilename(0, NULL((void *)0));
5035 return -1;
5036 }
5037 /* If we used a default char, then we failed! */
5038 if (pusedDefaultChar && *pusedDefaultChar)
5039 goto mbcs_encode_error;
5040 } else {
5041 mbcssize = 0;
5042 }
5043
5044 if (*repr == NULL((void *)0)) {
5045 /* Create string object */
5046 *repr = PyBytes_FromStringAndSize(NULL((void *)0), mbcssize);
5047 if (*repr == NULL((void *)0))
5048 return -1;
5049 n = 0;
5050 }
5051 else {
5052 /* Extend string object */
5053 n = PyBytes_Size(*repr);
5054 if (_PyBytes_Resize(repr, n + mbcssize) < 0)
5055 return -1;
5056 }
5057
5058 /* Do the conversion */
5059 if (size > 0) {
5060 char *s = PyBytes_AS_STRING(*repr)((__builtin_expect(!(((((((PyObject*)(*repr))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5060, "PyBytes_Check(*repr)") : (
void)0), (((PyBytesObject *)(*repr))->ob_sval))
+ n;
5061 if (0 == WideCharToMultiByte(CP_ACP, flags, p, size, s, mbcssize,
5062 NULL((void *)0), pusedDefaultChar)) {
5063 PyErr_SetFromWindowsErrWithFilename(0, NULL((void *)0));
5064 return -1;
5065 }
5066 if (pusedDefaultChar && *pusedDefaultChar)
5067 goto mbcs_encode_error;
5068 }
5069 return 0;
5070
5071mbcs_encode_error:
5072 raise_encode_exception(&exc, "mbcs", p, size, 0, 0, "invalid character");
5073 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5073, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
5074 return -1;
5075}
5076
5077PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
5078 Py_ssize_t size,
5079 const char *errors)
5080{
5081 PyObject *repr = NULL((void *)0);
5082 int ret;
5083
5084#ifdef NEED_RETRY
5085 retry:
5086 if (size > INT_MAX2147483647)
5087 ret = encode_mbcs(&repr, p, INT_MAX2147483647, errors);
5088 else
5089#endif
5090 ret = encode_mbcs(&repr, p, (int)size, errors);
5091
5092 if (ret < 0) {
5093 Py_XDECREF(repr)do { if ((repr) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(repr))->ob_refcnt != 0) { if (((PyObject
*)repr)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5093, (PyObject *)(repr)); } else _Py_Dealloc((PyObject *)(
repr)); } while (0); } while (0)
;
5094 return NULL((void *)0);
5095 }
5096
5097#ifdef NEED_RETRY
5098 if (size > INT_MAX2147483647) {
5099 p += INT_MAX2147483647;
5100 size -= INT_MAX2147483647;
5101 goto retry;
5102 }
5103#endif
5104
5105 return repr;
5106}
5107
5108PyObject *PyUnicode_AsMBCSString(PyObject *unicode)
5109{
5110 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
) {
5111 PyErr_BadArgument();
5112 return NULL((void *)0);
5113 }
5114 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5114, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
5115 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5115, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
5116 NULL((void *)0));
5117}
5118
5119#undef NEED_RETRY
5120
5121#endif /* MS_WINDOWS */
5122
5123/* --- Character Mapping Codec -------------------------------------------- */
5124
5125PyObject *PyUnicode_DecodeCharmapPyUnicodeUCS2_DecodeCharmap(const char *s,
5126 Py_ssize_t size,
5127 PyObject *mapping,
5128 const char *errors)
5129{
5130 const char *starts = s;
5131 Py_ssize_t startinpos;
5132 Py_ssize_t endinpos;
5133 Py_ssize_t outpos;
5134 const char *e;
5135 PyUnicodeObject *v;
5136 Py_UNICODE *p;
5137 Py_ssize_t extrachars = 0;
5138 PyObject *errorHandler = NULL((void *)0);
5139 PyObject *exc = NULL((void *)0);
5140 Py_UNICODE *mapstring = NULL((void *)0);
5141 Py_ssize_t maplen = 0;
5142
5143 /* Default to Latin-1 */
5144 if (mapping == NULL((void *)0))
5145 return PyUnicode_DecodeLatin1PyUnicodeUCS2_DecodeLatin1(s, size, errors);
5146
5147 v = _PyUnicode_New(size);
5148 if (v == NULL((void *)0))
5149 goto onError;
5150 if (size == 0)
5151 return (PyObject *)v;
5152 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5152, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
5153 e = s + size;
5154 if (PyUnicode_CheckExact(mapping)((((PyObject*)(mapping))->ob_type) == &PyUnicode_Type)) {
5155 mapstring = PyUnicode_AS_UNICODE(mapping)((__builtin_expect(!(((((((PyObject*)(mapping))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5155, "PyUnicode_Check(mapping)"
) : (void)0),(((PyUnicodeObject *)(mapping))->str))
;
5156 maplen = PyUnicode_GET_SIZE(mapping)((__builtin_expect(!(((((((PyObject*)(mapping))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5156, "PyUnicode_Check(mapping)"
) : (void)0),(((PyUnicodeObject *)(mapping))->length))
;
5157 while (s < e) {
5158 unsigned char ch = *s;
5159 Py_UNICODE x = 0xfffe; /* illegal value */
5160
5161 if (ch < maplen)
5162 x = mapstring[ch];
5163
5164 if (x == 0xfffe) {
5165 /* undefined mapping */
5166 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5166, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
5167 startinpos = s-starts;
5168 endinpos = startinpos+1;
5169 if (unicode_decode_call_errorhandler(
5170 errors, &errorHandler,
5171 "charmap", "character maps to <undefined>",
5172 &starts, &e, &startinpos, &endinpos, &exc, &s,
5173 &v, &outpos, &p)) {
5174 goto onError;
5175 }
5176 continue;
5177 }
5178 *p++ = x;
5179 ++s;
5180 }
5181 }
5182 else {
5183 while (s < e) {
5184 unsigned char ch = *s;
5185 PyObject *w, *x;
5186
5187 /* Get mapping (char ordinal -> integer, Unicode char or None) */
5188 w = PyLong_FromLong((long)ch);
5189 if (w == NULL((void *)0))
5190 goto onError;
5191 x = PyObject_GetItem(mapping, w);
5192 Py_DECREF(w)do { if (_Py_RefTotal-- , --((PyObject*)(w))->ob_refcnt !=
0) { if (((PyObject*)w)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5192, (PyObject *)(w)); } else _Py_Dealloc
((PyObject *)(w)); } while (0)
;
5193 if (x == NULL((void *)0)) {
5194 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
5195 /* No mapping found means: mapping is undefined. */
5196 PyErr_Clear();
5197 x = Py_None(&_Py_NoneStruct);
5198 Py_INCREF(x)( _Py_RefTotal++ , ((PyObject*)(x))->ob_refcnt++);
5199 } else
5200 goto onError;
5201 }
5202
5203 /* Apply mapping */
5204 if (PyLong_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
5205 long value = PyLong_AS_LONG(x)PyLong_AsLong(x);
5206 if (value < 0 || value > 65535) {
5207 PyErr_SetString(PyExc_TypeError,
5208 "character mapping must be in range(65536)");
5209 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5209, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5210 goto onError;
5211 }
5212 *p++ = (Py_UNICODE)value;
5213 }
5214 else if (x == Py_None(&_Py_NoneStruct)) {
5215 /* undefined mapping */
5216 outpos = p-PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5216, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
5217 startinpos = s-starts;
5218 endinpos = startinpos+1;
5219 if (unicode_decode_call_errorhandler(
5220 errors, &errorHandler,
5221 "charmap", "character maps to <undefined>",
5222 &starts, &e, &startinpos, &endinpos, &exc, &s,
5223 &v, &outpos, &p)) {
5224 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5224, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5225 goto onError;
5226 }
5227 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5227, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5228 continue;
5229 }
5230 else if (PyUnicode_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
5231 Py_ssize_t targetsize = PyUnicode_GET_SIZE(x)((__builtin_expect(!(((((((PyObject*)(x))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5231, "PyUnicode_Check(x)") : (void)0),(((PyUnicodeObject *
)(x))->length))
;
5232
5233 if (targetsize == 1)
5234 /* 1-1 mapping */
5235 *p++ = *PyUnicode_AS_UNICODE(x)((__builtin_expect(!(((((((PyObject*)(x))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5235, "PyUnicode_Check(x)") : (void)0),(((PyUnicodeObject *
)(x))->str))
;
5236
5237 else if (targetsize > 1) {
5238 /* 1-n mapping */
5239 if (targetsize > extrachars) {
5240 /* resize first */
5241 Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5241, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
;
5242 Py_ssize_t needed = (targetsize - extrachars) + \
5243 (targetsize << 2);
5244 extrachars += needed;
5245 /* XXX overflow detection missing */
5246 if (_PyUnicode_Resize(&v,
5247 PyUnicode_GET_SIZE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5247, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->length))
+ needed) < 0) {
5248 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5248, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5249 goto onError;
5250 }
5251 p = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5251, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
+ oldpos;
5252 }
5253 Py_UNICODE_COPY(p,((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE), __builtin_object_size ((p), 0)) : __inline_memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE)))
5254 PyUnicode_AS_UNICODE(x),((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE), __builtin_object_size ((p), 0)) : __inline_memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE)))
5255 targetsize)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE), __builtin_object_size ((p), 0)) : __inline_memcpy_chk
((p), (((__builtin_expect(!(((((((PyObject*)(x))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5254, "PyUnicode_Check(x)"
) : (void)0),(((PyUnicodeObject *)(x))->str))), (targetsize
)*sizeof(Py_UNICODE)))
;
5256 p += targetsize;
5257 extrachars -= targetsize;
5258 }
5259 /* 1-0 mapping: skip the character */
5260 }
5261 else {
5262 /* wrong return value */
5263 PyErr_SetString(PyExc_TypeError,
5264 "character mapping must return integer, None or str");
5265 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5265, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5266 goto onError;
5267 }
5268 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5268, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5269 ++s;
5270 }
5271 }
5272 if (p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5272, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
< PyUnicode_GET_SIZE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5272, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->length))
)
5273 if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 5273, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
) < 0)
5274 goto onError;
5275 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5275, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
5276 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5276, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
5277 return (PyObject *)v;
5278
5279 onError:
5280 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5280, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
5281 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5281, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
5282 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5282, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
5283 return NULL((void *)0);
5284}
5285
5286/* Charmap encoding: the lookup table */
5287
5288struct encoding_map{
5289 PyObject_HEADPyObject ob_base;
5290 unsigned char level1[32];
5291 int count2, count3;
5292 unsigned char level23[1];
5293};
5294
5295static PyObject*
5296encoding_map_size(PyObject *obj, PyObject* args)
5297{
5298 struct encoding_map *map = (struct encoding_map*)obj;
5299 return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
5300 128*map->count3);
5301}
5302
5303static PyMethodDef encoding_map_methods[] = {
5304 {"size", encoding_map_size, METH_NOARGS0x0004,
5305 PyDoc_STR("Return the size (in bytes) of this object")"Return the size (in bytes) of this object" },
5306 { 0 }
5307};
5308
5309static void
5310encoding_map_dealloc(PyObject* o)
5311{
5312 PyObject_FREE_PyObject_DebugFree(o);
5313}
5314
5315static PyTypeObject EncodingMapType = {
5316 PyVarObject_HEAD_INIT(NULL, 0){ { 0, 0, 1, ((void *)0) }, 0 },
5317 "EncodingMap", /*tp_name*/
5318 sizeof(struct encoding_map), /*tp_basicsize*/
5319 0, /*tp_itemsize*/
5320 /* methods */
5321 encoding_map_dealloc, /*tp_dealloc*/
5322 0, /*tp_print*/
5323 0, /*tp_getattr*/
5324 0, /*tp_setattr*/
5325 0, /*tp_reserved*/
5326 0, /*tp_repr*/
5327 0, /*tp_as_number*/
5328 0, /*tp_as_sequence*/
5329 0, /*tp_as_mapping*/
5330 0, /*tp_hash*/
5331 0, /*tp_call*/
5332 0, /*tp_str*/
5333 0, /*tp_getattro*/
5334 0, /*tp_setattro*/
5335 0, /*tp_as_buffer*/
5336 Py_TPFLAGS_DEFAULT( 0 | (1L<<18) | 0), /*tp_flags*/
5337 0, /*tp_doc*/
5338 0, /*tp_traverse*/
5339 0, /*tp_clear*/
5340 0, /*tp_richcompare*/
5341 0, /*tp_weaklistoffset*/
5342 0, /*tp_iter*/
5343 0, /*tp_iternext*/
5344 encoding_map_methods, /*tp_methods*/
5345 0, /*tp_members*/
5346 0, /*tp_getset*/
5347 0, /*tp_base*/
5348 0, /*tp_dict*/
5349 0, /*tp_descr_get*/
5350 0, /*tp_descr_set*/
5351 0, /*tp_dictoffset*/
5352 0, /*tp_init*/
5353 0, /*tp_alloc*/
5354 0, /*tp_new*/
5355 0, /*tp_free*/
5356 0, /*tp_is_gc*/
5357};
5358
5359PyObject*
5360PyUnicode_BuildEncodingMap(PyObject* string)
5361{
5362 Py_UNICODE *decode;
5363 PyObject *result;
5364 struct encoding_map *mresult;
5365 int i;
5366 int need_dict = 0;
5367 unsigned char level1[32];
5368 unsigned char level2[512];
5369 unsigned char *mlevel1, *mlevel2, *mlevel3;
5370 int count2 = 0, count3 = 0;
5371
5372 if (!PyUnicode_Check(string)((((((PyObject*)(string))->ob_type))->tp_flags & ((
1L<<28))) != 0)
|| PyUnicode_GetSizePyUnicodeUCS2_GetSize(string) != 256) {
5373 PyErr_BadArgument();
5374 return NULL((void *)0);
5375 }
5376 decode = PyUnicode_AS_UNICODE(string)((__builtin_expect(!(((((((PyObject*)(string))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5376, "PyUnicode_Check(string)")
: (void)0),(((PyUnicodeObject *)(string))->str))
;
5377 memset(level1, 0xFF, sizeof level1)((__builtin_object_size (level1, 0) != (size_t) -1) ? __builtin___memset_chk
(level1, 0xFF, sizeof level1, __builtin_object_size (level1,
0)) : __inline_memset_chk (level1, 0xFF, sizeof level1))
;
5378 memset(level2, 0xFF, sizeof level2)((__builtin_object_size (level2, 0) != (size_t) -1) ? __builtin___memset_chk
(level2, 0xFF, sizeof level2, __builtin_object_size (level2,
0)) : __inline_memset_chk (level2, 0xFF, sizeof level2))
;
5379
5380 /* If there isn't a one-to-one mapping of NULL to \0,
5381 or if there are non-BMP characters, we need to use
5382 a mapping dictionary. */
5383 if (decode[0] != 0)
5384 need_dict = 1;
5385 for (i = 1; i < 256; i++) {
5386 int l1, l2;
5387 if (decode[i] == 0
5388#ifdef Py_UNICODE_WIDE
5389 || decode[i] > 0xFFFF
5390#endif
5391 ) {
5392 need_dict = 1;
5393 break;
5394 }
5395 if (decode[i] == 0xFFFE)
5396 /* unmapped character */
5397 continue;
5398 l1 = decode[i] >> 11;
5399 l2 = decode[i] >> 7;
5400 if (level1[l1] == 0xFF)
5401 level1[l1] = count2++;
5402 if (level2[l2] == 0xFF)
5403 level2[l2] = count3++;
5404 }
5405
5406 if (count2 >= 0xFF || count3 >= 0xFF)
5407 need_dict = 1;
5408
5409 if (need_dict) {
5410 PyObject *result = PyDict_New();
5411 PyObject *key, *value;
5412 if (!result)
5413 return NULL((void *)0);
5414 for (i = 0; i < 256; i++) {
5415 key = PyLong_FromLong(decode[i]);
5416 value = PyLong_FromLong(i);
5417 if (!key || !value)
5418 goto failed1;
5419 if (PyDict_SetItem(result, key, value) == -1)
5420 goto failed1;
5421 Py_DECREF(key)do { if (_Py_RefTotal-- , --((PyObject*)(key))->ob_refcnt !=
0) { if (((PyObject*)key)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5421, (PyObject *)(key)); } else _Py_Dealloc
((PyObject *)(key)); } while (0)
;
5422 Py_DECREF(value)do { if (_Py_RefTotal-- , --((PyObject*)(value))->ob_refcnt
!= 0) { if (((PyObject*)value)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5422, (PyObject *)(value)); } else
_Py_Dealloc((PyObject *)(value)); } while (0)
;
5423 }
5424 return result;
5425 failed1:
5426 Py_XDECREF(key)do { if ((key) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(key))->ob_refcnt != 0) { if (((PyObject*
)key)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5426, (PyObject *)(key)); } else _Py_Dealloc((PyObject *)(key
)); } while (0); } while (0)
;
5427 Py_XDECREF(value)do { if ((value) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(value))->ob_refcnt != 0) { if (((PyObject
*)value)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5427, (PyObject *)(value)); } else _Py_Dealloc((PyObject *)
(value)); } while (0); } while (0)
;
5428 Py_DECREF(result)do { if (_Py_RefTotal-- , --((PyObject*)(result))->ob_refcnt
!= 0) { if (((PyObject*)result)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5428, (PyObject *)(result)); } else
_Py_Dealloc((PyObject *)(result)); } while (0)
;
5429 return NULL((void *)0);
5430 }
5431
5432 /* Create a three-level trie */
5433 result = PyObject_MALLOC_PyObject_DebugMalloc(sizeof(struct encoding_map) +
5434 16*count2 + 128*count3 - 1);
5435 if (!result)
5436 return PyErr_NoMemory();
5437 PyObject_Init(result, &EncodingMapType);
5438 mresult = (struct encoding_map*)result;
5439 mresult->count2 = count2;
5440 mresult->count3 = count3;
5441 mlevel1 = mresult->level1;
5442 mlevel2 = mresult->level23;
5443 mlevel3 = mresult->level23 + 16*count2;
5444 memcpy(mlevel1, level1, 32)((__builtin_object_size (mlevel1, 0) != (size_t) -1) ? __builtin___memcpy_chk
(mlevel1, level1, 32, __builtin_object_size (mlevel1, 0)) : __inline_memcpy_chk
(mlevel1, level1, 32))
;
5445 memset(mlevel2, 0xFF, 16*count2)((__builtin_object_size (mlevel2, 0) != (size_t) -1) ? __builtin___memset_chk
(mlevel2, 0xFF, 16*count2, __builtin_object_size (mlevel2, 0
)) : __inline_memset_chk (mlevel2, 0xFF, 16*count2))
;
5446 memset(mlevel3, 0, 128*count3)((__builtin_object_size (mlevel3, 0) != (size_t) -1) ? __builtin___memset_chk
(mlevel3, 0, 128*count3, __builtin_object_size (mlevel3, 0))
: __inline_memset_chk (mlevel3, 0, 128*count3))
;
5447 count3 = 0;
5448 for (i = 1; i < 256; i++) {
5449 int o1, o2, o3, i2, i3;
5450 if (decode[i] == 0xFFFE)
5451 /* unmapped character */
5452 continue;
5453 o1 = decode[i]>>11;
5454 o2 = (decode[i]>>7) & 0xF;
5455 i2 = 16*mlevel1[o1] + o2;
5456 if (mlevel2[i2] == 0xFF)
5457 mlevel2[i2] = count3++;
5458 o3 = decode[i] & 0x7F;
5459 i3 = 128*mlevel2[i2] + o3;
5460 mlevel3[i3] = i;
5461 }
5462 return result;
5463}
5464
5465static int
5466encoding_map_lookup(Py_UNICODE c, PyObject *mapping)
5467{
5468 struct encoding_map *map = (struct encoding_map*)mapping;
5469 int l1 = c>>11;
5470 int l2 = (c>>7) & 0xF;
5471 int l3 = c & 0x7F;
5472 int i;
5473
5474#ifdef Py_UNICODE_WIDE
5475 if (c > 0xFFFF) {
5476 return -1;
5477 }
5478#endif
5479 if (c == 0)
5480 return 0;
5481 /* level 1*/
5482 i = map->level1[l1];
5483 if (i == 0xFF) {
5484 return -1;
5485 }
5486 /* level 2*/
5487 i = map->level23[16*i+l2];
5488 if (i == 0xFF) {
5489 return -1;
5490 }
5491 /* level 3 */
5492 i = map->level23[16*map->count2 + 128*i + l3];
5493 if (i == 0) {
5494 return -1;
5495 }
5496 return i;
5497}
5498
5499/* Lookup the character ch in the mapping. If the character
5500 can't be found, Py_None is returned (or NULL, if another
5501 error occurred). */
5502static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping)
5503{
5504 PyObject *w = PyLong_FromLong((long)c);
5505 PyObject *x;
5506
5507 if (w == NULL((void *)0))
5508 return NULL((void *)0);
5509 x = PyObject_GetItem(mapping, w);
5510 Py_DECREF(w)do { if (_Py_RefTotal-- , --((PyObject*)(w))->ob_refcnt !=
0) { if (((PyObject*)w)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5510, (PyObject *)(w)); } else _Py_Dealloc
((PyObject *)(w)); } while (0)
;
5511 if (x == NULL((void *)0)) {
5512 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
5513 /* No mapping found means: mapping is undefined. */
5514 PyErr_Clear();
5515 x = Py_None(&_Py_NoneStruct);
5516 Py_INCREF(x)( _Py_RefTotal++ , ((PyObject*)(x))->ob_refcnt++);
5517 return x;
5518 } else
5519 return NULL((void *)0);
5520 }
5521 else if (x == Py_None(&_Py_NoneStruct))
5522 return x;
5523 else if (PyLong_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
5524 long value = PyLong_AS_LONG(x)PyLong_AsLong(x);
5525 if (value < 0 || value > 255) {
5526 PyErr_SetString(PyExc_TypeError,
5527 "character mapping must be in range(256)");
5528 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5528, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5529 return NULL((void *)0);
5530 }
5531 return x;
5532 }
5533 else if (PyBytes_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
27))) != 0)
)
5534 return x;
5535 else {
5536 /* wrong return value */
5537 PyErr_Format(PyExc_TypeError,
5538 "character mapping must return integer, bytes or None, not %.400s",
5539 x->ob_type->tp_name);
5540 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5540, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5541 return NULL((void *)0);
5542 }
5543}
5544
5545static int
5546charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
5547{
5548 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5548, "PyBytes_Check(*outobj)"
) : (void)0),(((PyVarObject*)(*outobj))->ob_size))
;
5549 /* exponentially overallocate to minimize reallocations */
5550 if (requiredsize < 2*outsize)
5551 requiredsize = 2*outsize;
5552 if (_PyBytes_Resize(outobj, requiredsize))
5553 return -1;
5554 return 0;
5555}
5556
5557typedef enum charmapencode_result {
5558 enc_SUCCESS, enc_FAILED, enc_EXCEPTION
5559}charmapencode_result;
5560/* lookup the character, put the result in the output string and adjust
5561 various state variables. Resize the output bytes object if not enough
5562 space is available. Return a new reference to the object that
5563 was put in the output buffer, or Py_None, if the mapping was undefined
5564 (in which case no character was written) or NULL, if a
5565 reallocation error occurred. The caller must decref the result */
5566static
5567charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping,
5568 PyObject **outobj, Py_ssize_t *outpos)
5569{
5570 PyObject *rep;
5571 char *outstart;
5572 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5572, "PyBytes_Check(*outobj)"
) : (void)0),(((PyVarObject*)(*outobj))->ob_size))
;
5573
5574 if (Py_TYPE(mapping)(((PyObject*)(mapping))->ob_type) == &EncodingMapType) {
5575 int res = encoding_map_lookup(c, mapping);
5576 Py_ssize_t requiredsize = *outpos+1;
5577 if (res == -1)
5578 return enc_FAILED;
5579 if (outsize<requiredsize)
5580 if (charmapencode_resize(outobj, outpos, requiredsize))
5581 return enc_EXCEPTION;
5582 outstart = PyBytes_AS_STRING(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5582, "PyBytes_Check(*outobj)"
) : (void)0), (((PyBytesObject *)(*outobj))->ob_sval))
;
5583 outstart[(*outpos)++] = (char)res;
5584 return enc_SUCCESS;
5585 }
5586
5587 rep = charmapencode_lookup(c, mapping);
5588 if (rep==NULL((void *)0))
5589 return enc_EXCEPTION;
5590 else if (rep==Py_None(&_Py_NoneStruct)) {
5591 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5591, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5592 return enc_FAILED;
5593 } else {
5594 if (PyLong_Check(rep)((((((PyObject*)(rep))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
5595 Py_ssize_t requiredsize = *outpos+1;
5596 if (outsize<requiredsize)
5597 if (charmapencode_resize(outobj, outpos, requiredsize)) {
5598 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5598, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5599 return enc_EXCEPTION;
5600 }
5601 outstart = PyBytes_AS_STRING(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5601, "PyBytes_Check(*outobj)"
) : (void)0), (((PyBytesObject *)(*outobj))->ob_sval))
;
5602 outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep)PyLong_AsLong(rep);
5603 }
5604 else {
5605 const char *repchars = PyBytes_AS_STRING(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5605, "PyBytes_Check(rep)") : (void
)0), (((PyBytesObject *)(rep))->ob_sval))
;
5606 Py_ssize_t repsize = PyBytes_GET_SIZE(rep)((__builtin_expect(!(((((((PyObject*)(rep))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5606, "PyBytes_Check(rep)") : (void
)0),(((PyVarObject*)(rep))->ob_size))
;
5607 Py_ssize_t requiredsize = *outpos+repsize;
5608 if (outsize<requiredsize)
5609 if (charmapencode_resize(outobj, outpos, requiredsize)) {
5610 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5610, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5611 return enc_EXCEPTION;
5612 }
5613 outstart = PyBytes_AS_STRING(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5613, "PyBytes_Check(*outobj)"
) : (void)0), (((PyBytesObject *)(*outobj))->ob_sval))
;
5614 memcpy(outstart + *outpos, repchars, repsize)((__builtin_object_size (outstart + *outpos, 0) != (size_t) -
1) ? __builtin___memcpy_chk (outstart + *outpos, repchars, repsize
, __builtin_object_size (outstart + *outpos, 0)) : __inline_memcpy_chk
(outstart + *outpos, repchars, repsize))
;
5615 *outpos += repsize;
5616 }
5617 }
5618 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5618, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5619 return enc_SUCCESS;
5620}
5621
5622/* handle an error in PyUnicode_EncodeCharmap
5623 Return 0 on success, -1 on error */
5624static
5625int charmap_encoding_error(
5626 const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping,
5627 PyObject **exceptionObject,
5628 int *known_errorHandler, PyObject **errorHandler, const char *errors,
5629 PyObject **res, Py_ssize_t *respos)
5630{
5631 PyObject *repunicode = NULL((void *)0); /* initialize to prevent gcc warning */
5632 Py_ssize_t repsize;
5633 Py_ssize_t newpos;
5634 Py_UNICODE *uni2;
5635 /* startpos for collecting unencodable chars */
5636 Py_ssize_t collstartpos = *inpos;
5637 Py_ssize_t collendpos = *inpos+1;
5638 Py_ssize_t collpos;
5639 char *encoding = "charmap";
5640 char *reason = "character maps to <undefined>";
5641 charmapencode_result x;
5642
5643 /* find all unencodable characters */
5644 while (collendpos < size) {
5645 PyObject *rep;
5646 if (Py_TYPE(mapping)(((PyObject*)(mapping))->ob_type) == &EncodingMapType) {
5647 int res = encoding_map_lookup(p[collendpos], mapping);
5648 if (res != -1)
5649 break;
5650 ++collendpos;
5651 continue;
5652 }
5653
5654 rep = charmapencode_lookup(p[collendpos], mapping);
5655 if (rep==NULL((void *)0))
5656 return -1;
5657 else if (rep!=Py_None(&_Py_NoneStruct)) {
5658 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5658, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5659 break;
5660 }
5661 Py_DECREF(rep)do { if (_Py_RefTotal-- , --((PyObject*)(rep))->ob_refcnt !=
0) { if (((PyObject*)rep)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5661, (PyObject *)(rep)); } else _Py_Dealloc
((PyObject *)(rep)); } while (0)
;
5662 ++collendpos;
5663 }
5664 /* cache callback name lookup
5665 * (if not done yet, i.e. it's the first error) */
5666 if (*known_errorHandler==-1) {
5667 if ((errors==NULL((void *)0)) || (!strcmp(errors, "strict")))
5668 *known_errorHandler = 1;
5669 else if (!strcmp(errors, "replace"))
5670 *known_errorHandler = 2;
5671 else if (!strcmp(errors, "ignore"))
5672 *known_errorHandler = 3;
5673 else if (!strcmp(errors, "xmlcharrefreplace"))
5674 *known_errorHandler = 4;
5675 else
5676 *known_errorHandler = 0;
5677 }
5678 switch (*known_errorHandler) {
5679 case 1: /* strict */
5680 raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
5681 return -1;
5682 case 2: /* replace */
5683 for (collpos = collstartpos; collpos<collendpos; ++collpos) {
5684 x = charmapencode_output('?', mapping, res, respos);
5685 if (x==enc_EXCEPTION) {
5686 return -1;
5687 }
5688 else if (x==enc_FAILED) {
5689 raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
5690 return -1;
5691 }
5692 }
5693 /* fall through */
5694 case 3: /* ignore */
5695 *inpos = collendpos;
5696 break;
5697 case 4: /* xmlcharrefreplace */
5698 /* generate replacement (temporarily (mis)uses p) */
5699 for (collpos = collstartpos; collpos < collendpos; ++collpos) {
5700 char buffer[2+29+1+1];
5701 char *cp;
5702 sprintf(buffer, "&#%d;", (int)p[collpos])__builtin___sprintf_chk (buffer, 0, __builtin_object_size (buffer
, 2 > 1), "&#%d;", (int)p[collpos])
;
5703 for (cp = buffer; *cp; ++cp) {
5704 x = charmapencode_output(*cp, mapping, res, respos);
5705 if (x==enc_EXCEPTION)
5706 return -1;
5707 else if (x==enc_FAILED) {
5708 raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
5709 return -1;
5710 }
5711 }
5712 }
5713 *inpos = collendpos;
5714 break;
5715 default:
5716 repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
5717 encoding, reason, p, size, exceptionObject,
5718 collstartpos, collendpos, &newpos);
5719 if (repunicode == NULL((void *)0))
5720 return -1;
5721 if (PyBytes_Check(repunicode)((((((PyObject*)(repunicode))->ob_type))->tp_flags &
((1L<<27))) != 0)
) {
5722 /* Directly copy bytes result to output. */
5723 Py_ssize_t outsize = PyBytes_Size(*res);
5724 Py_ssize_t requiredsize;
5725 repsize = PyBytes_Size(repunicode);
5726 requiredsize = *respos + repsize;
5727 if (requiredsize > outsize)
5728 /* Make room for all additional bytes. */
5729 if (charmapencode_resize(res, respos, requiredsize)) {
5730 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5730, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
5731 return -1;
5732 }
5733 memcpy(PyBytes_AsString(*res) + *respos,((__builtin_object_size (PyBytes_AsString(*res) + *respos, 0)
!= (size_t) -1) ? __builtin___memcpy_chk (PyBytes_AsString(*
res) + *respos, PyBytes_AsString(repunicode), repsize, __builtin_object_size
(PyBytes_AsString(*res) + *respos, 0)) : __inline_memcpy_chk
(PyBytes_AsString(*res) + *respos, PyBytes_AsString(repunicode
), repsize))
5734 PyBytes_AsString(repunicode), repsize)((__builtin_object_size (PyBytes_AsString(*res) + *respos, 0)
!= (size_t) -1) ? __builtin___memcpy_chk (PyBytes_AsString(*
res) + *respos, PyBytes_AsString(repunicode), repsize, __builtin_object_size
(PyBytes_AsString(*res) + *respos, 0)) : __inline_memcpy_chk
(PyBytes_AsString(*res) + *respos, PyBytes_AsString(repunicode
), repsize))
;
5735 *respos += repsize;
5736 *inpos = newpos;
5737 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5737, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
5738 break;
5739 }
5740 /* generate replacement */
5741 repsize = PyUnicode_GET_SIZE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5741, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->length))
;
5742 for (uni2 = PyUnicode_AS_UNICODE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5742, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->str))
; repsize-->0; ++uni2) {
5743 x = charmapencode_output(*uni2, mapping, res, respos);
5744 if (x==enc_EXCEPTION) {
5745 return -1;
5746 }
5747 else if (x==enc_FAILED) {
5748 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5748, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
5749 raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
5750 return -1;
5751 }
5752 }
5753 *inpos = newpos;
5754 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5754, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
5755 }
5756 return 0;
5757}
5758
5759PyObject *PyUnicode_EncodeCharmapPyUnicodeUCS2_EncodeCharmap(const Py_UNICODE *p,
5760 Py_ssize_t size,
5761 PyObject *mapping,
5762 const char *errors)
5763{
5764 /* output object */
5765 PyObject *res = NULL((void *)0);
5766 /* current input position */
5767 Py_ssize_t inpos = 0;
5768 /* current output position */
5769 Py_ssize_t respos = 0;
5770 PyObject *errorHandler = NULL((void *)0);
5771 PyObject *exc = NULL((void *)0);
5772 /* the following variable is used for caching string comparisons
5773 * -1=not initialized, 0=unknown, 1=strict, 2=replace,
5774 * 3=ignore, 4=xmlcharrefreplace */
5775 int known_errorHandler = -1;
5776
5777 /* Default to Latin-1 */
5778 if (mapping == NULL((void *)0))
5779 return PyUnicode_EncodeLatin1PyUnicodeUCS2_EncodeLatin1(p, size, errors);
5780
5781 /* allocate enough for a simple encoding without
5782 replacements, if we need more, we'll resize */
5783 res = PyBytes_FromStringAndSize(NULL((void *)0), size);
5784 if (res == NULL((void *)0))
5785 goto onError;
5786 if (size == 0)
5787 return res;
5788
5789 while (inpos<size) {
5790 /* try to encode it */
5791 charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
5792 if (x==enc_EXCEPTION) /* error */
5793 goto onError;
5794 if (x==enc_FAILED) { /* unencodable character */
5795 if (charmap_encoding_error(p, size, &inpos, mapping,
5796 &exc,
5797 &known_errorHandler, &errorHandler, errors,
5798 &res, &respos)) {
5799 goto onError;
5800 }
5801 }
5802 else
5803 /* done with this character => adjust input position */
5804 ++inpos;
5805 }
5806
5807 /* Resize if we allocated to much */
5808 if (respos<PyBytes_GET_SIZE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 5808, "PyBytes_Check(res)") : (void
)0),(((PyVarObject*)(res))->ob_size))
)
5809 if (_PyBytes_Resize(&res, respos) < 0)
5810 goto onError;
5811
5812 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5812, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
5813 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5813, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
5814 return res;
5815
5816 onError:
5817 Py_XDECREF(res)do { if ((res) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(res))->ob_refcnt != 0) { if (((PyObject*
)res)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5817, (PyObject *)(res)); } else _Py_Dealloc((PyObject *)(res
)); } while (0); } while (0)
;
5818 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 5818, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
5819 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5819, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
5820 return NULL((void *)0);
5821}
5822
5823PyObject *PyUnicode_AsCharmapStringPyUnicodeUCS2_AsCharmapString(PyObject *unicode,
5824 PyObject *mapping)
5825{
5826 if (!PyUnicode_Check(unicode)((((((PyObject*)(unicode))->ob_type))->tp_flags & (
(1L<<28))) != 0)
|| mapping == NULL((void *)0)) {
5827 PyErr_BadArgument();
5828 return NULL((void *)0);
5829 }
5830 return PyUnicode_EncodeCharmapPyUnicodeUCS2_EncodeCharmap(PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5830, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
,
5831 PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5831, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
,
5832 mapping,
5833 NULL((void *)0));
5834}
5835
5836/* create or adjust a UnicodeTranslateError */
5837static void make_translate_exception(PyObject **exceptionObject,
5838 const Py_UNICODE *unicode, Py_ssize_t size,
5839 Py_ssize_t startpos, Py_ssize_t endpos,
5840 const char *reason)
5841{
5842 if (*exceptionObject == NULL((void *)0)) {
5843 *exceptionObject = PyUnicodeTranslateError_Create(
5844 unicode, size, startpos, endpos, reason);
5845 }
5846 else {
5847 if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
5848 goto onError;
5849 if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
5850 goto onError;
5851 if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
5852 goto onError;
5853 return;
5854 onError:
5855 Py_DECREF(*exceptionObject)do { if (_Py_RefTotal-- , --((PyObject*)(*exceptionObject))->
ob_refcnt != 0) { if (((PyObject*)*exceptionObject)->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 5855
, (PyObject *)(*exceptionObject)); } else _Py_Dealloc((PyObject
*)(*exceptionObject)); } while (0)
;
5856 *exceptionObject = NULL((void *)0);
5857 }
5858}
5859
5860/* raises a UnicodeTranslateError */
5861static void raise_translate_exception(PyObject **exceptionObject,
5862 const Py_UNICODE *unicode, Py_ssize_t size,
5863 Py_ssize_t startpos, Py_ssize_t endpos,
5864 const char *reason)
5865{
5866 make_translate_exception(exceptionObject,
5867 unicode, size, startpos, endpos, reason);
5868 if (*exceptionObject != NULL((void *)0))
5869 PyCodec_StrictErrors(*exceptionObject);
5870}
5871
5872/* error handling callback helper:
5873 build arguments, call the callback and check the arguments,
5874 put the result into newpos and return the replacement string, which
5875 has to be freed by the caller */
5876static PyObject *unicode_translate_call_errorhandler(const char *errors,
5877 PyObject **errorHandler,
5878 const char *reason,
5879 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject,
5880 Py_ssize_t startpos, Py_ssize_t endpos,
5881 Py_ssize_t *newpos)
5882{
5883 static char *argparse = "O!n;translating error handler must return (str, int) tuple";
5884
5885 Py_ssize_t i_newpos;
5886 PyObject *restuple;
5887 PyObject *resunicode;
5888
5889 if (*errorHandler == NULL((void *)0)) {
5890 *errorHandler = PyCodec_LookupError(errors);
5891 if (*errorHandler == NULL((void *)0))
5892 return NULL((void *)0);
5893 }
5894
5895 make_translate_exception(exceptionObject,
5896 unicode, size, startpos, endpos, reason);
5897 if (*exceptionObject == NULL((void *)0))
5898 return NULL((void *)0);
5899
5900 restuple = PyObject_CallFunctionObjArgs(
5901 *errorHandler, *exceptionObject, NULL((void *)0));
5902 if (restuple == NULL((void *)0))
5903 return NULL((void *)0);
5904 if (!PyTuple_Check(restuple)((((((PyObject*)(restuple))->ob_type))->tp_flags & (
(1L<<26))) != 0)
) {
5905 PyErr_SetString(PyExc_TypeError, &argparse[4]);
5906 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5906, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
5907 return NULL((void *)0);
5908 }
5909 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(restuple, argparse, &PyUnicode_Type,
5910 &resunicode, &i_newpos)) {
5911 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5911, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
5912 return NULL((void *)0);
5913 }
5914 if (i_newpos<0)
5915 *newpos = size+i_newpos;
5916 else
5917 *newpos = i_newpos;
5918 if (*newpos<0 || *newpos>size) {
5919 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
5920 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5920, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
5921 return NULL((void *)0);
5922 }
5923 Py_INCREF(resunicode)( _Py_RefTotal++ , ((PyObject*)(resunicode))->ob_refcnt++);
5924 Py_DECREF(restuple)do { if (_Py_RefTotal-- , --((PyObject*)(restuple))->ob_refcnt
!= 0) { if (((PyObject*)restuple)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5924, (PyObject *)(restuple)); } else
_Py_Dealloc((PyObject *)(restuple)); } while (0)
;
5925 return resunicode;
5926}
5927
5928/* Lookup the character ch in the mapping and put the result in result,
5929 which must be decrefed by the caller.
5930 Return 0 on success, -1 on error */
5931static
5932int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
5933{
5934 PyObject *w = PyLong_FromLong((long)c);
5935 PyObject *x;
5936
5937 if (w == NULL((void *)0))
5938 return -1;
5939 x = PyObject_GetItem(mapping, w);
5940 Py_DECREF(w)do { if (_Py_RefTotal-- , --((PyObject*)(w))->ob_refcnt !=
0) { if (((PyObject*)w)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5940, (PyObject *)(w)); } else _Py_Dealloc
((PyObject *)(w)); } while (0)
;
5941 if (x == NULL((void *)0)) {
5942 if (PyErr_ExceptionMatches(PyExc_LookupError)) {
5943 /* No mapping found means: use 1:1 mapping. */
5944 PyErr_Clear();
5945 *result = NULL((void *)0);
5946 return 0;
5947 } else
5948 return -1;
5949 }
5950 else if (x == Py_None(&_Py_NoneStruct)) {
5951 *result = x;
5952 return 0;
5953 }
5954 else if (PyLong_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
5955 long value = PyLong_AS_LONG(x)PyLong_AsLong(x);
5956 long max = PyUnicode_GetMaxPyUnicodeUCS2_GetMax();
5957 if (value < 0 || value > max) {
5958 PyErr_Format(PyExc_TypeError,
5959 "character mapping must be in range(0x%x)", max+1);
5960 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5960, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5961 return -1;
5962 }
5963 *result = x;
5964 return 0;
5965 }
5966 else if (PyUnicode_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
5967 *result = x;
5968 return 0;
5969 }
5970 else {
5971 /* wrong return value */
5972 PyErr_SetString(PyExc_TypeError,
5973 "character mapping must return integer, None or str");
5974 Py_DECREF(x)do { if (_Py_RefTotal-- , --((PyObject*)(x))->ob_refcnt !=
0) { if (((PyObject*)x)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 5974, (PyObject *)(x)); } else _Py_Dealloc
((PyObject *)(x)); } while (0)
;
5975 return -1;
5976 }
5977}
5978/* ensure that *outobj is at least requiredsize characters long,
5979 if not reallocate and adjust various state variables.
5980 Return 0 on success, -1 on error */
5981static
5982int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
5983 Py_ssize_t requiredsize)
5984{
5985 Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5985, "PyUnicode_Check(*outobj)"
) : (void)0),(((PyUnicodeObject *)(*outobj))->length))
;
5986 if (requiredsize > oldsize) {
5987 /* remember old output position */
5988 Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5988, "PyUnicode_Check(*outobj)"
) : (void)0),(((PyUnicodeObject *)(*outobj))->str))
;
5989 /* exponentially overallocate to minimize reallocations */
5990 if (requiredsize < 2 * oldsize)
5991 requiredsize = 2 * oldsize;
5992 if (PyUnicode_ResizePyUnicodeUCS2_Resize(outobj, requiredsize) < 0)
5993 return -1;
5994 *outp = PyUnicode_AS_UNICODE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 5994, "PyUnicode_Check(*outobj)"
) : (void)0),(((PyUnicodeObject *)(*outobj))->str))
+ outpos;
5995 }
5996 return 0;
5997}
5998/* lookup the character, put the result in the output string and adjust
5999 various state variables. Return a new reference to the object that
6000 was put in the output buffer in *result, or Py_None, if the mapping was
6001 undefined (in which case no character was written).
6002 The called must decref result.
6003 Return 0 on success, -1 on error. */
6004static
6005int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
6006 Py_ssize_t insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
6007 PyObject **res)
6008{
6009 if (charmaptranslate_lookup(*curinp, mapping, res))
6010 return -1;
6011 if (*res==NULL((void *)0)) {
6012 /* not found => default to 1:1 mapping */
6013 *(*outp)++ = *curinp;
6014 }
6015 else if (*res==Py_None(&_Py_NoneStruct))
6016 ;
6017 else if (PyLong_Check(*res)((((((PyObject*)(*res))->ob_type))->tp_flags & ((1L
<<24))) != 0)
) {
6018 /* no overflow check, because we know that the space is enough */
6019 *(*outp)++ = (Py_UNICODE)PyLong_AS_LONG(*res)PyLong_AsLong(*res);
6020 }
6021 else if (PyUnicode_Check(*res)((((((PyObject*)(*res))->ob_type))->tp_flags & ((1L
<<28))) != 0)
) {
6022 Py_ssize_t repsize = PyUnicode_GET_SIZE(*res)((__builtin_expect(!(((((((PyObject*)(*res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6022, "PyUnicode_Check(*res)") :
(void)0),(((PyUnicodeObject *)(*res))->length))
;
6023 if (repsize==1) {
6024 /* no overflow check, because we know that the space is enough */
6025 *(*outp)++ = *PyUnicode_AS_UNICODE(*res)((__builtin_expect(!(((((((PyObject*)(*res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6025, "PyUnicode_Check(*res)") :
(void)0),(((PyUnicodeObject *)(*res))->str))
;
6026 }
6027 else if (repsize!=0) {
6028 /* more than one character */
6029 Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)((__builtin_expect(!(((((((PyObject*)(*outobj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6029, "PyUnicode_Check(*outobj)"
) : (void)0),(((PyUnicodeObject *)(*outobj))->str))
) +
6030 (insize - (curinp-startinp)) +
6031 repsize - 1;
6032 if (charmaptranslate_makespace(outobj, outp, requiredsize))
6033 return -1;
6034 memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize)((__builtin_object_size (*outp, 0) != (size_t) -1) ? __builtin___memcpy_chk
(*outp, ((__builtin_expect(!(((((((PyObject*)(*res))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6034, "PyUnicode_Check(*res)"
) : (void)0),(((PyUnicodeObject *)(*res))->str)), sizeof(Py_UNICODE
)*repsize, __builtin_object_size (*outp, 0)) : __inline_memcpy_chk
(*outp, ((__builtin_expect(!(((((((PyObject*)(*res))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6034, "PyUnicode_Check(*res)"
) : (void)0),(((PyUnicodeObject *)(*res))->str)), sizeof(Py_UNICODE
)*repsize))
;
6035 *outp += repsize;
6036 }
6037 }
6038 else
6039 return -1;
6040 return 0;
6041}
6042
6043PyObject *PyUnicode_TranslateCharmapPyUnicodeUCS2_TranslateCharmap(const Py_UNICODE *p,
6044 Py_ssize_t size,
6045 PyObject *mapping,
6046 const char *errors)
6047{
6048 /* output object */
6049 PyObject *res = NULL((void *)0);
6050 /* pointers to the beginning and end+1 of input */
6051 const Py_UNICODE *startp = p;
6052 const Py_UNICODE *endp = p + size;
6053 /* pointer into the output */
6054 Py_UNICODE *str;
6055 /* current output position */
6056 Py_ssize_t respos = 0;
6057 char *reason = "character maps to <undefined>";
6058 PyObject *errorHandler = NULL((void *)0);
6059 PyObject *exc = NULL((void *)0);
6060 /* the following variable is used for caching string comparisons
6061 * -1=not initialized, 0=unknown, 1=strict, 2=replace,
6062 * 3=ignore, 4=xmlcharrefreplace */
6063 int known_errorHandler = -1;
6064
6065 if (mapping == NULL((void *)0)) {
6066 PyErr_BadArgument();
6067 return NULL((void *)0);
6068 }
6069
6070 /* allocate enough for a simple 1:1 translation without
6071 replacements, if we need more, we'll resize */
6072 res = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), size);
6073 if (res == NULL((void *)0))
6074 goto onError;
6075 if (size == 0)
6076 return res;
6077 str = PyUnicode_AS_UNICODE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6077, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->str))
;
6078
6079 while (p<endp) {
6080 /* try to encode it */
6081 PyObject *x = NULL((void *)0);
6082 if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
6083 Py_XDECREF(x)do { if ((x) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(x))->ob_refcnt != 0) { if (((PyObject*)x)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6083, (PyObject *)(x)); } else _Py_Dealloc((PyObject *)(x))
; } while (0); } while (0)
;
6084 goto onError;
6085 }
6086 Py_XDECREF(x)do { if ((x) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(x))->ob_refcnt != 0) { if (((PyObject*)x)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6086, (PyObject *)(x)); } else _Py_Dealloc((PyObject *)(x))
; } while (0); } while (0)
;
6087 if (x!=Py_None(&_Py_NoneStruct)) /* it worked => adjust input pointer */
6088 ++p;
6089 else { /* untranslatable character */
6090 PyObject *repunicode = NULL((void *)0); /* initialize to prevent gcc warning */
6091 Py_ssize_t repsize;
6092 Py_ssize_t newpos;
6093 Py_UNICODE *uni2;
6094 /* startpos for collecting untranslatable chars */
6095 const Py_UNICODE *collstart = p;
6096 const Py_UNICODE *collend = p+1;
6097 const Py_UNICODE *coll;
6098
6099 /* find all untranslatable characters */
6100 while (collend < endp) {
6101 if (charmaptranslate_lookup(*collend, mapping, &x))
6102 goto onError;
6103 Py_XDECREF(x)do { if ((x) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(x))->ob_refcnt != 0) { if (((PyObject*)x)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6103, (PyObject *)(x)); } else _Py_Dealloc((PyObject *)(x))
; } while (0); } while (0)
;
6104 if (x!=Py_None(&_Py_NoneStruct))
6105 break;
6106 ++collend;
6107 }
6108 /* cache callback name lookup
6109 * (if not done yet, i.e. it's the first error) */
6110 if (known_errorHandler==-1) {
6111 if ((errors==NULL((void *)0)) || (!strcmp(errors, "strict")))
6112 known_errorHandler = 1;
6113 else if (!strcmp(errors, "replace"))
6114 known_errorHandler = 2;
6115 else if (!strcmp(errors, "ignore"))
6116 known_errorHandler = 3;
6117 else if (!strcmp(errors, "xmlcharrefreplace"))
6118 known_errorHandler = 4;
6119 else
6120 known_errorHandler = 0;
6121 }
6122 switch (known_errorHandler) {
6123 case 1: /* strict */
6124 raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
6125 goto onError;
6126 case 2: /* replace */
6127 /* No need to check for space, this is a 1:1 replacement */
6128 for (coll = collstart; coll<collend; ++coll)
6129 *str++ = '?';
6130 /* fall through */
6131 case 3: /* ignore */
6132 p = collend;
6133 break;
6134 case 4: /* xmlcharrefreplace */
6135 /* generate replacement (temporarily (mis)uses p) */
6136 for (p = collstart; p < collend; ++p) {
6137 char buffer[2+29+1+1];
6138 char *cp;
6139 sprintf(buffer, "&#%d;", (int)*p)__builtin___sprintf_chk (buffer, 0, __builtin_object_size (buffer
, 2 > 1), "&#%d;", (int)*p)
;
6140 if (charmaptranslate_makespace(&res, &str,
6141 (str-PyUnicode_AS_UNICODE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6141, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->str))
)+strlen(buffer)+(endp-collend)))
6142 goto onError;
6143 for (cp = buffer; *cp; ++cp)
6144 *str++ = *cp;
6145 }
6146 p = collend;
6147 break;
6148 default:
6149 repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
6150 reason, startp, size, &exc,
6151 collstart-startp, collend-startp, &newpos);
6152 if (repunicode == NULL((void *)0))
6153 goto onError;
6154 /* generate replacement */
6155 repsize = PyUnicode_GET_SIZE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6155, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->length))
;
6156 if (charmaptranslate_makespace(&res, &str,
6157 (str-PyUnicode_AS_UNICODE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6157, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->str))
)+repsize+(endp-collend))) {
6158 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6158, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
6159 goto onError;
6160 }
6161 for (uni2 = PyUnicode_AS_UNICODE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6161, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->str))
; repsize-->0; ++uni2)
6162 *str++ = *uni2;
6163 p = startp + newpos;
6164 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6164, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
6165 }
6166 }
6167 }
6168 /* Resize if we allocated to much */
6169 respos = str-PyUnicode_AS_UNICODE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6169, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->str))
;
6170 if (respos<PyUnicode_GET_SIZE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6170, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->length))
) {
6171 if (PyUnicode_ResizePyUnicodeUCS2_Resize(&res, respos) < 0)
6172 goto onError;
6173 }
6174 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6174, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
6175 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6175, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
6176 return res;
6177
6178 onError:
6179 Py_XDECREF(res)do { if ((res) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(res))->ob_refcnt != 0) { if (((PyObject*
)res)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6179, (PyObject *)(res)); } else _Py_Dealloc((PyObject *)(res
)); } while (0); } while (0)
;
6180 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6180, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
6181 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6181, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
6182 return NULL((void *)0);
6183}
6184
6185PyObject *PyUnicode_TranslatePyUnicodeUCS2_Translate(PyObject *str,
6186 PyObject *mapping,
6187 const char *errors)
6188{
6189 PyObject *result;
6190
6191 str = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str);
6192 if (str == NULL((void *)0))
6193 goto onError;
6194 result = PyUnicode_TranslateCharmapPyUnicodeUCS2_TranslateCharmap(PyUnicode_AS_UNICODE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6194, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->str))
,
6195 PyUnicode_GET_SIZE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6195, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->length))
,
6196 mapping,
6197 errors);
6198 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6198, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
6199 return result;
6200
6201 onError:
6202 Py_XDECREF(str)do { if ((str) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(str))->ob_refcnt != 0) { if (((PyObject*
)str)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6202, (PyObject *)(str)); } else _Py_Dealloc((PyObject *)(str
)); } while (0); } while (0)
;
6203 return NULL((void *)0);
6204}
6205
6206PyObject *
6207PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
6208 Py_ssize_t length)
6209{
6210 PyObject *result;
6211 Py_UNICODE *p; /* write pointer into result */
6212 Py_ssize_t i;
6213 /* Copy to a new string */
6214 result = (PyObject *)_PyUnicode_New(length);
6215 Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length)((__builtin_object_size ((((__builtin_expect(!(((((((PyObject
*)(result))->ob_type))->tp_flags & ((1L<<28))
) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 6215, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject
*)(result))->str))), 0) != (size_t) -1) ? __builtin___memcpy_chk
((((__builtin_expect(!(((((((PyObject*)(result))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6215, "PyUnicode_Check(result)"
) : (void)0),(((PyUnicodeObject *)(result))->str))), (s), (
length)*sizeof(Py_UNICODE), __builtin_object_size ((((__builtin_expect
(!(((((((PyObject*)(result))->ob_type))->tp_flags &
((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 6215, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject
*)(result))->str))), 0)) : __inline_memcpy_chk ((((__builtin_expect
(!(((((((PyObject*)(result))->ob_type))->tp_flags &
((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 6215, "PyUnicode_Check(result)") : (void)0),(((PyUnicodeObject
*)(result))->str))), (s), (length)*sizeof(Py_UNICODE)))
;
6216 if (result == NULL((void *)0))
6217 return result;
6218 p = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6218, "PyUnicode_Check(result)")
: (void)0),(((PyUnicodeObject *)(result))->str))
;
6219 /* Iterate over code points */
6220 for (i = 0; i < length; i++) {
6221 Py_UNICODE ch =s[i];
6222 if (ch > 127) {
6223 int decimal = Py_UNICODE_TODECIMAL(ch)_PyUnicode_ToDecimalDigit(ch);
6224 if (decimal >= 0)
6225 p[i] = '0' + decimal;
6226 }
6227 }
6228 return result;
6229}
6230/* --- Decimal Encoder ---------------------------------------------------- */
6231
6232int PyUnicode_EncodeDecimalPyUnicodeUCS2_EncodeDecimal(Py_UNICODE *s,
6233 Py_ssize_t length,
6234 char *output,
6235 const char *errors)
6236{
6237 Py_UNICODE *p, *end;
6238 PyObject *errorHandler = NULL((void *)0);
6239 PyObject *exc = NULL((void *)0);
6240 const char *encoding = "decimal";
6241 const char *reason = "invalid decimal Unicode string";
6242 /* the following variable is used for caching string comparisons
6243 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
6244 int known_errorHandler = -1;
6245
6246 if (output == NULL((void *)0)) {
6247 PyErr_BadArgument();
6248 return -1;
6249 }
6250
6251 p = s;
6252 end = s + length;
6253 while (p < end) {
6254 register Py_UNICODE ch = *p;
6255 int decimal;
6256 PyObject *repunicode;
6257 Py_ssize_t repsize;
6258 Py_ssize_t newpos;
6259 Py_UNICODE *uni2;
6260 Py_UNICODE *collstart;
6261 Py_UNICODE *collend;
6262
6263 if (Py_UNICODE_ISSPACE(ch)((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace
(ch))
) {
6264 *output++ = ' ';
6265 ++p;
6266 continue;
6267 }
6268 decimal = Py_UNICODE_TODECIMAL(ch)_PyUnicode_ToDecimalDigit(ch);
6269 if (decimal >= 0) {
6270 *output++ = '0' + decimal;
6271 ++p;
6272 continue;
6273 }
6274 if (0 < ch && ch < 256) {
6275 *output++ = (char)ch;
6276 ++p;
6277 continue;
6278 }
6279 /* All other characters are considered unencodable */
6280 collstart = p;
6281 collend = p+1;
6282 while (collend < end) {
6283 if ((0 < *collend && *collend < 256) ||
6284 !Py_UNICODE_ISSPACE(*collend)((*collend) < 128U ? _Py_ascii_whitespace[(*collend)] : _PyUnicode_IsWhitespace
(*collend))
||
6285 Py_UNICODE_TODECIMAL(*collend)_PyUnicode_ToDecimalDigit(*collend))
6286 break;
6287 }
6288 /* cache callback name lookup
6289 * (if not done yet, i.e. it's the first error) */
6290 if (known_errorHandler==-1) {
6291 if ((errors==NULL((void *)0)) || (!strcmp(errors, "strict")))
6292 known_errorHandler = 1;
6293 else if (!strcmp(errors, "replace"))
6294 known_errorHandler = 2;
6295 else if (!strcmp(errors, "ignore"))
6296 known_errorHandler = 3;
6297 else if (!strcmp(errors, "xmlcharrefreplace"))
6298 known_errorHandler = 4;
6299 else
6300 known_errorHandler = 0;
6301 }
6302 switch (known_errorHandler) {
6303 case 1: /* strict */
6304 raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
6305 goto onError;
6306 case 2: /* replace */
6307 for (p = collstart; p < collend; ++p)
6308 *output++ = '?';
6309 /* fall through */
6310 case 3: /* ignore */
6311 p = collend;
6312 break;
6313 case 4: /* xmlcharrefreplace */
6314 /* generate replacement (temporarily (mis)uses p) */
6315 for (p = collstart; p < collend; ++p)
6316 output += sprintf(output, "&#%d;", (int)*p)__builtin___sprintf_chk (output, 0, __builtin_object_size (output
, 2 > 1), "&#%d;", (int)*p)
;
6317 p = collend;
6318 break;
6319 default:
6320 repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
6321 encoding, reason, s, length, &exc,
6322 collstart-s, collend-s, &newpos);
6323 if (repunicode == NULL((void *)0))
6324 goto onError;
6325 if (!PyUnicode_Check(repunicode)((((((PyObject*)(repunicode))->ob_type))->tp_flags &
((1L<<28))) != 0)
) {
6326 /* Byte results not supported, since they have no decimal property. */
6327 PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
6328 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6328, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
6329 goto onError;
6330 }
6331 /* generate replacement */
6332 repsize = PyUnicode_GET_SIZE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6332, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->length))
;
6333 for (uni2 = PyUnicode_AS_UNICODE(repunicode)((__builtin_expect(!(((((((PyObject*)(repunicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6333, "PyUnicode_Check(repunicode)"
) : (void)0),(((PyUnicodeObject *)(repunicode))->str))
; repsize-->0; ++uni2) {
6334 Py_UNICODE ch = *uni2;
6335 if (Py_UNICODE_ISSPACE(ch)((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace
(ch))
)
6336 *output++ = ' ';
6337 else {
6338 decimal = Py_UNICODE_TODECIMAL(ch)_PyUnicode_ToDecimalDigit(ch);
6339 if (decimal >= 0)
6340 *output++ = '0' + decimal;
6341 else if (0 < ch && ch < 256)
6342 *output++ = (char)ch;
6343 else {
6344 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6344, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
6345 raise_encode_exception(&exc, encoding,
6346 s, length, collstart-s, collend-s, reason);
6347 goto onError;
6348 }
6349 }
6350 }
6351 p = s + newpos;
6352 Py_DECREF(repunicode)do { if (_Py_RefTotal-- , --((PyObject*)(repunicode))->ob_refcnt
!= 0) { if (((PyObject*)repunicode)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6352, (PyObject *)(repunicode)); }
else _Py_Dealloc((PyObject *)(repunicode)); } while (0)
;
6353 }
6354 }
6355 /* 0-terminate the output string */
6356 *output++ = '\0';
Value stored to 'output' is never read
6357 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6357, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
6358 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6358, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
6359 return 0;
6360
6361 onError:
6362 Py_XDECREF(exc)do { if ((exc) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(exc))->ob_refcnt != 0) { if (((PyObject*
)exc)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6362, (PyObject *)(exc)); } else _Py_Dealloc((PyObject *)(exc
)); } while (0); } while (0)
;
6363 Py_XDECREF(errorHandler)do { if ((errorHandler) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(errorHandler))->ob_refcnt != 0) { if (
((PyObject*)errorHandler)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6363, (PyObject *)(errorHandler))
; } else _Py_Dealloc((PyObject *)(errorHandler)); } while (0)
; } while (0)
;
6364 return -1;
6365}
6366
6367/* --- Helpers ------------------------------------------------------------ */
6368
6369#include "stringlib/unicodedefs.h"
6370#include "stringlib/fastsearch.h"
6371
6372#include "stringlib/count.h"
6373#include "stringlib/find.h"
6374#include "stringlib/partition.h"
6375#include "stringlib/split.h"
6376
6377#define _Py_InsertThousandsGrouping_PyUnicode_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping
6378#define _Py_InsertThousandsGroupingLocale_PyUnicode_InsertThousandsGroupingLocale _PyUnicode_InsertThousandsGroupingLocale
6379#include "stringlib/localeutil.h"
6380
6381/* helper macro to fixup start/end slice values */
6382#define ADJUST_INDICES(start, end, len)if (end > len) end = len; else if (end < 0) { end += len
; if (end < 0) end = 0; } if (start < 0) { start += len
; if (start < 0) start = 0; }
\
6383 if (end > len) \
6384 end = len; \
6385 else if (end < 0) { \
6386 end += len; \
6387 if (end < 0) \
6388 end = 0; \
6389 } \
6390 if (start < 0) { \
6391 start += len; \
6392 if (start < 0) \
6393 start = 0; \
6394 }
6395
6396Py_ssize_t PyUnicode_CountPyUnicodeUCS2_Count(PyObject *str,
6397 PyObject *substr,
6398 Py_ssize_t start,
6399 Py_ssize_t end)
6400{
6401 Py_ssize_t result;
6402 PyUnicodeObject* str_obj;
6403 PyUnicodeObject* sub_obj;
6404
6405 str_obj = (PyUnicodeObject*) PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str);
6406 if (!str_obj)
6407 return -1;
6408 sub_obj = (PyUnicodeObject*) PyUnicode_FromObjectPyUnicodeUCS2_FromObject(substr);
6409 if (!sub_obj) {
6410 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6410, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
6411 return -1;
6412 }
6413
6414 ADJUST_INDICES(start, end, str_obj->length)if (end > str_obj->length) end = str_obj->length; else
if (end < 0) { end += str_obj->length; if (end < 0)
end = 0; } if (start < 0) { start += str_obj->length; if
(start < 0) start = 0; }
;
6415 result = stringlib_count(
6416 str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
6417 PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1))
6418 );
6419
6420 Py_DECREF(sub_obj)do { if (_Py_RefTotal-- , --((PyObject*)(sub_obj))->ob_refcnt
!= 0) { if (((PyObject*)sub_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6420, (PyObject *)(sub_obj)); } else
_Py_Dealloc((PyObject *)(sub_obj)); } while (0)
;
6421 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6421, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
6422
6423 return result;
6424}
6425
6426Py_ssize_t PyUnicode_FindPyUnicodeUCS2_Find(PyObject *str,
6427 PyObject *sub,
6428 Py_ssize_t start,
6429 Py_ssize_t end,
6430 int direction)
6431{
6432 Py_ssize_t result;
6433
6434 str = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str);
6435 if (!str)
6436 return -2;
6437 sub = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(sub);
6438 if (!sub) {
6439 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6439, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
6440 return -2;
6441 }
6442
6443 if (direction > 0)
6444 result = stringlib_find_slice(
6445 PyUnicode_AS_UNICODE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6445, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->str))
, PyUnicode_GET_SIZE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6445, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->length))
,
6446 PyUnicode_AS_UNICODE(sub)((__builtin_expect(!(((((((PyObject*)(sub))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6446, "PyUnicode_Check(sub)") : (
void)0),(((PyUnicodeObject *)(sub))->str))
, PyUnicode_GET_SIZE(sub)((__builtin_expect(!(((((((PyObject*)(sub))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6446, "PyUnicode_Check(sub)") : (
void)0),(((PyUnicodeObject *)(sub))->length))
,
6447 start, end
6448 );
6449 else
6450 result = stringlib_rfind_slice(
6451 PyUnicode_AS_UNICODE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6451, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->str))
, PyUnicode_GET_SIZE(str)((__builtin_expect(!(((((((PyObject*)(str))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6451, "PyUnicode_Check(str)") : (
void)0),(((PyUnicodeObject *)(str))->length))
,
6452 PyUnicode_AS_UNICODE(sub)((__builtin_expect(!(((((((PyObject*)(sub))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6452, "PyUnicode_Check(sub)") : (
void)0),(((PyUnicodeObject *)(sub))->str))
, PyUnicode_GET_SIZE(sub)((__builtin_expect(!(((((((PyObject*)(sub))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6452, "PyUnicode_Check(sub)") : (
void)0),(((PyUnicodeObject *)(sub))->length))
,
6453 start, end
6454 );
6455
6456 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6456, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
6457 Py_DECREF(sub)do { if (_Py_RefTotal-- , --((PyObject*)(sub))->ob_refcnt !=
0) { if (((PyObject*)sub)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6457, (PyObject *)(sub)); } else _Py_Dealloc
((PyObject *)(sub)); } while (0)
;
6458
6459 return result;
6460}
6461
6462static
6463int tailmatch(PyUnicodeObject *self,
6464 PyUnicodeObject *substring,
6465 Py_ssize_t start,
6466 Py_ssize_t end,
6467 int direction)
6468{
6469 if (substring->length == 0)
6470 return 1;
6471
6472 ADJUST_INDICES(start, end, self->length)if (end > self->length) end = self->length; else if (
end < 0) { end += self->length; if (end < 0) end = 0
; } if (start < 0) { start += self->length; if (start <
0) start = 0; }
;
6473 end -= substring->length;
6474 if (end < start)
6475 return 0;
6476
6477 if (direction > 0) {
6478 if (Py_UNICODE_MATCH(self, end, substring)((*((self)->str + (end)) == *((substring)->str)) &&
((*((self)->str + (end) + (substring)->length-1) == *(
(substring)->str + (substring)->length-1))) && !
memcmp((self)->str + (end), (substring)->str, (substring
)->length*sizeof(Py_UNICODE)))
)
6479 return 1;
6480 } else {
6481 if (Py_UNICODE_MATCH(self, start, substring)((*((self)->str + (start)) == *((substring)->str)) &&
((*((self)->str + (start) + (substring)->length-1) == *
((substring)->str + (substring)->length-1))) &&
!memcmp((self)->str + (start), (substring)->str, (substring
)->length*sizeof(Py_UNICODE)))
)
6482 return 1;
6483 }
6484
6485 return 0;
6486}
6487
6488Py_ssize_t PyUnicode_TailmatchPyUnicodeUCS2_Tailmatch(PyObject *str,
6489 PyObject *substr,
6490 Py_ssize_t start,
6491 Py_ssize_t end,
6492 int direction)
6493{
6494 Py_ssize_t result;
6495
6496 str = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str);
6497 if (str == NULL((void *)0))
6498 return -1;
6499 substr = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(substr);
6500 if (substr == NULL((void *)0)) {
6501 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6501, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
6502 return -1;
6503 }
6504
6505 result = tailmatch((PyUnicodeObject *)str,
6506 (PyUnicodeObject *)substr,
6507 start, end, direction);
6508 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6508, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
6509 Py_DECREF(substr)do { if (_Py_RefTotal-- , --((PyObject*)(substr))->ob_refcnt
!= 0) { if (((PyObject*)substr)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6509, (PyObject *)(substr)); } else
_Py_Dealloc((PyObject *)(substr)); } while (0)
;
6510 return result;
6511}
6512
6513/* Apply fixfct filter to the Unicode object self and return a
6514 reference to the modified object */
6515
6516static
6517PyObject *fixup(PyUnicodeObject *self,
6518 int (*fixfct)(PyUnicodeObject *s))
6519{
6520
6521 PyUnicodeObject *u;
6522
6523 u = (PyUnicodeObject*) PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), self->length);
6524 if (u == NULL((void *)0))
6525 return NULL((void *)0);
6526
6527 Py_UNICODE_COPY(u->str, self->str, self->length)((__builtin_object_size ((u->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
), __builtin_object_size ((u->str), 0)) : __inline_memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
)))
;
6528
6529 if (!fixfct(u) && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
6530 /* fixfct should return TRUE if it modified the buffer. If
6531 FALSE, return a reference to the original buffer instead
6532 (to save space, not time) */
6533 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
6534 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6534, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
6535 return (PyObject*) self;
6536 }
6537 return (PyObject*) u;
6538}
6539
6540static
6541int fixupper(PyUnicodeObject *self)
6542{
6543 Py_ssize_t len = self->length;
6544 Py_UNICODE *s = self->str;
6545 int status = 0;
6546
6547 while (len-- > 0) {
6548 register Py_UNICODE ch;
6549
6550 ch = Py_UNICODE_TOUPPER(*s)_PyUnicode_ToUppercase(*s);
6551 if (ch != *s) {
6552 status = 1;
6553 *s = ch;
6554 }
6555 s++;
6556 }
6557
6558 return status;
6559}
6560
6561static
6562int fixlower(PyUnicodeObject *self)
6563{
6564 Py_ssize_t len = self->length;
6565 Py_UNICODE *s = self->str;
6566 int status = 0;
6567
6568 while (len-- > 0) {
6569 register Py_UNICODE ch;
6570
6571 ch = Py_UNICODE_TOLOWER(*s)_PyUnicode_ToLowercase(*s);
6572 if (ch != *s) {
6573 status = 1;
6574 *s = ch;
6575 }
6576 s++;
6577 }
6578
6579 return status;
6580}
6581
6582static
6583int fixswapcase(PyUnicodeObject *self)
6584{
6585 Py_ssize_t len = self->length;
6586 Py_UNICODE *s = self->str;
6587 int status = 0;
6588
6589 while (len-- > 0) {
6590 if (Py_UNICODE_ISUPPER(*s)_PyUnicode_IsUppercase(*s)) {
6591 *s = Py_UNICODE_TOLOWER(*s)_PyUnicode_ToLowercase(*s);
6592 status = 1;
6593 } else if (Py_UNICODE_ISLOWER(*s)_PyUnicode_IsLowercase(*s)) {
6594 *s = Py_UNICODE_TOUPPER(*s)_PyUnicode_ToUppercase(*s);
6595 status = 1;
6596 }
6597 s++;
6598 }
6599
6600 return status;
6601}
6602
6603static
6604int fixcapitalize(PyUnicodeObject *self)
6605{
6606 Py_ssize_t len = self->length;
6607 Py_UNICODE *s = self->str;
6608 int status = 0;
6609
6610 if (len == 0)
6611 return 0;
6612 if (Py_UNICODE_ISLOWER(*s)_PyUnicode_IsLowercase(*s)) {
6613 *s = Py_UNICODE_TOUPPER(*s)_PyUnicode_ToUppercase(*s);
6614 status = 1;
6615 }
6616 s++;
6617 while (--len > 0) {
6618 if (Py_UNICODE_ISUPPER(*s)_PyUnicode_IsUppercase(*s)) {
6619 *s = Py_UNICODE_TOLOWER(*s)_PyUnicode_ToLowercase(*s);
6620 status = 1;
6621 }
6622 s++;
6623 }
6624 return status;
6625}
6626
6627static
6628int fixtitle(PyUnicodeObject *self)
6629{
6630 register Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6630, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
6631 register Py_UNICODE *e;
6632 int previous_is_cased;
6633
6634 /* Shortcut for single character strings */
6635 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6635, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1) {
6636 Py_UNICODE ch = Py_UNICODE_TOTITLE(*p)_PyUnicode_ToTitlecase(*p);
6637 if (*p != ch) {
6638 *p = ch;
6639 return 1;
6640 }
6641 else
6642 return 0;
6643 }
6644
6645 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6645, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
6646 previous_is_cased = 0;
6647 for (; p < e; p++) {
6648 register const Py_UNICODE ch = *p;
6649
6650 if (previous_is_cased)
6651 *p = Py_UNICODE_TOLOWER(ch)_PyUnicode_ToLowercase(ch);
6652 else
6653 *p = Py_UNICODE_TOTITLE(ch)_PyUnicode_ToTitlecase(ch);
6654
6655 if (Py_UNICODE_ISLOWER(ch)_PyUnicode_IsLowercase(ch) ||
6656 Py_UNICODE_ISUPPER(ch)_PyUnicode_IsUppercase(ch) ||
6657 Py_UNICODE_ISTITLE(ch)_PyUnicode_IsTitlecase(ch))
6658 previous_is_cased = 1;
6659 else
6660 previous_is_cased = 0;
6661 }
6662 return 1;
6663}
6664
6665PyObject *
6666PyUnicode_JoinPyUnicodeUCS2_Join(PyObject *separator, PyObject *seq)
6667{
6668 const Py_UNICODE blank = ' ';
6669 const Py_UNICODE *sep = &blank;
6670 Py_ssize_t seplen = 1;
6671 PyUnicodeObject *res = NULL((void *)0); /* the result */
6672 Py_UNICODE *res_p; /* pointer to free byte in res's string area */
6673 PyObject *fseq; /* PySequence_Fast(seq) */
6674 Py_ssize_t seqlen; /* len(fseq) -- number of items in sequence */
6675 PyObject **items;
6676 PyObject *item;
6677 Py_ssize_t sz, i;
6678
6679 fseq = PySequence_Fast(seq, "");
6680 if (fseq == NULL((void *)0)) {
6681 return NULL((void *)0);
6682 }
6683
6684 /* NOTE: the following code can't call back into Python code,
6685 * so we are sure that fseq won't be mutated.
6686 */
6687
6688 seqlen = PySequence_Fast_GET_SIZE(fseq)(((((((PyObject*)(fseq))->ob_type))->tp_flags & ((1L
<<25))) != 0) ? (((PyVarObject*)(fseq))->ob_size) : (
((PyVarObject*)(fseq))->ob_size))
;
6689 /* If empty sequence, return u"". */
6690 if (seqlen == 0) {
6691 res = _PyUnicode_New(0); /* empty sequence; return u"" */
6692 goto Done;
6693 }
6694 items = PySequence_Fast_ITEMS(fseq)(((((((PyObject*)(fseq))->ob_type))->tp_flags & ((1L
<<25))) != 0) ? ((PyListObject *)(fseq))->ob_item : (
(PyTupleObject *)(fseq))->ob_item)
;
6695 /* If singleton sequence with an exact Unicode, return that. */
6696 if (seqlen == 1) {
6697 item = items[0];
6698 if (PyUnicode_CheckExact(item)((((PyObject*)(item))->ob_type) == &PyUnicode_Type)) {
6699 Py_INCREF(item)( _Py_RefTotal++ , ((PyObject*)(item))->ob_refcnt++);
6700 res = (PyUnicodeObject *)item;
6701 goto Done;
6702 }
6703 }
6704 else {
6705 /* Set up sep and seplen */
6706 if (separator == NULL((void *)0)) {
6707 sep = &blank;
6708 seplen = 1;
6709 }
6710 else {
6711 if (!PyUnicode_Check(separator)((((((PyObject*)(separator))->ob_type))->tp_flags &
((1L<<28))) != 0)
) {
6712 PyErr_Format(PyExc_TypeError,
6713 "separator: expected str instance,"
6714 " %.80s found",
6715 Py_TYPE(separator)(((PyObject*)(separator))->ob_type)->tp_name);
6716 goto onError;
6717 }
6718 sep = PyUnicode_AS_UNICODE(separator)((__builtin_expect(!(((((((PyObject*)(separator))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6718, "PyUnicode_Check(separator)"
) : (void)0),(((PyUnicodeObject *)(separator))->str))
;
6719 seplen = PyUnicode_GET_SIZE(separator)((__builtin_expect(!(((((((PyObject*)(separator))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6719, "PyUnicode_Check(separator)"
) : (void)0),(((PyUnicodeObject *)(separator))->length))
;
6720 }
6721 }
6722
6723 /* There are at least two things to join, or else we have a subclass
6724 * of str in the sequence.
6725 * Do a pre-pass to figure out the total amount of space we'll
6726 * need (sz), and see whether all argument are strings.
6727 */
6728 sz = 0;
6729 for (i = 0; i < seqlen; i++) {
6730 const Py_ssize_t old_sz = sz;
6731 item = items[i];
6732 if (!PyUnicode_Check(item)((((((PyObject*)(item))->ob_type))->tp_flags & ((1L
<<28))) != 0)
) {
6733 PyErr_Format(PyExc_TypeError,
6734 "sequence item %zd: expected str instance,"
6735 " %.80s found",
6736 i, Py_TYPE(item)(((PyObject*)(item))->ob_type)->tp_name);
6737 goto onError;
6738 }
6739 sz += PyUnicode_GET_SIZE(item)((__builtin_expect(!(((((((PyObject*)(item))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6739, "PyUnicode_Check(item)") :
(void)0),(((PyUnicodeObject *)(item))->length))
;
6740 if (i != 0)
6741 sz += seplen;
6742 if (sz < old_sz || sz > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1))) {
6743 PyErr_SetString(PyExc_OverflowError,
6744 "join() result is too long for a Python string");
6745 goto onError;
6746 }
6747 }
6748
6749 res = _PyUnicode_New(sz);
6750 if (res == NULL((void *)0))
6751 goto onError;
6752
6753 /* Catenate everything. */
6754 res_p = PyUnicode_AS_UNICODE(res)((__builtin_expect(!(((((((PyObject*)(res))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6754, "PyUnicode_Check(res)") : (
void)0),(((PyUnicodeObject *)(res))->str))
;
6755 for (i = 0; i < seqlen; ++i) {
6756 Py_ssize_t itemlen;
6757 item = items[i];
6758 itemlen = PyUnicode_GET_SIZE(item)((__builtin_expect(!(((((((PyObject*)(item))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6758, "PyUnicode_Check(item)") :
(void)0),(((PyUnicodeObject *)(item))->length))
;
6759 /* Copy item, and maybe the separator. */
6760 if (i) {
6761 Py_UNICODE_COPY(res_p, sep, seplen)((__builtin_object_size ((res_p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((res_p), (sep), (seplen)*sizeof(Py_UNICODE), __builtin_object_size
((res_p), 0)) : __inline_memcpy_chk ((res_p), (sep), (seplen
)*sizeof(Py_UNICODE)))
;
6762 res_p += seplen;
6763 }
6764 Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), itemlen)((__builtin_object_size ((res_p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((res_p), (((__builtin_expect(!(((((((PyObject*)(item))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6764, "PyUnicode_Check(item)"
) : (void)0),(((PyUnicodeObject *)(item))->str))), (itemlen
)*sizeof(Py_UNICODE), __builtin_object_size ((res_p), 0)) : __inline_memcpy_chk
((res_p), (((__builtin_expect(!(((((((PyObject*)(item))->
ob_type))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 6764, "PyUnicode_Check(item)"
) : (void)0),(((PyUnicodeObject *)(item))->str))), (itemlen
)*sizeof(Py_UNICODE)))
;
6765 res_p += itemlen;
6766 }
6767
6768 Done:
6769 Py_DECREF(fseq)do { if (_Py_RefTotal-- , --((PyObject*)(fseq))->ob_refcnt
!= 0) { if (((PyObject*)fseq)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6769, (PyObject *)(fseq)); } else
_Py_Dealloc((PyObject *)(fseq)); } while (0)
;
6770 return (PyObject *)res;
6771
6772 onError:
6773 Py_DECREF(fseq)do { if (_Py_RefTotal-- , --((PyObject*)(fseq))->ob_refcnt
!= 0) { if (((PyObject*)fseq)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6773, (PyObject *)(fseq)); } else
_Py_Dealloc((PyObject *)(fseq)); } while (0)
;
6774 Py_XDECREF(res)do { if ((res) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(res))->ob_refcnt != 0) { if (((PyObject*
)res)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 6774, (PyObject *)(res)); } else _Py_Dealloc((PyObject *)(res
)); } while (0); } while (0)
;
6775 return NULL((void *)0);
6776}
6777
6778static
6779PyUnicodeObject *pad(PyUnicodeObject *self,
6780 Py_ssize_t left,
6781 Py_ssize_t right,
6782 Py_UNICODE fill)
6783{
6784 PyUnicodeObject *u;
6785
6786 if (left < 0)
6787 left = 0;
6788 if (right < 0)
6789 right = 0;
6790
6791 if (left == 0 && right == 0 && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
6792 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
6793 return self;
6794 }
6795
6796 if (left > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - self->length ||
6797 right > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - (left + self->length)) {
6798 PyErr_SetString(PyExc_OverflowError, "padded string is too long");
6799 return NULL((void *)0);
6800 }
6801 u = _PyUnicode_New(left + self->length + right);
6802 if (u) {
6803 if (left)
6804 Py_UNICODE_FILL(u->str, fill, left)do {Py_ssize_t i_; Py_UNICODE *t_ = (u->str); Py_UNICODE v_
= (fill); for (i_ = 0; i_ < (left); i_++) t_[i_] = v_; } while
(0)
;
6805 Py_UNICODE_COPY(u->str + left, self->str, self->length)((__builtin_object_size ((u->str + left), 0) != (size_t) -
1) ? __builtin___memcpy_chk ((u->str + left), (self->str
), (self->length)*sizeof(Py_UNICODE), __builtin_object_size
((u->str + left), 0)) : __inline_memcpy_chk ((u->str +
left), (self->str), (self->length)*sizeof(Py_UNICODE))
)
;
6806 if (right)
6807 Py_UNICODE_FILL(u->str + left + self->length, fill, right)do {Py_ssize_t i_; Py_UNICODE *t_ = (u->str + left + self->
length); Py_UNICODE v_ = (fill); for (i_ = 0; i_ < (right)
; i_++) t_[i_] = v_; } while (0)
;
6808 }
6809
6810 return u;
6811}
6812
6813PyObject *PyUnicode_SplitlinesPyUnicodeUCS2_Splitlines(PyObject *string, int keepends)
6814{
6815 PyObject *list;
6816
6817 string = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(string);
6818 if (string == NULL((void *)0))
6819 return NULL((void *)0);
6820
6821 list = stringlib_splitlines(
6822 (PyObject*) string, PyUnicode_AS_UNICODE(string)((__builtin_expect(!(((((((PyObject*)(string))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6822, "PyUnicode_Check(string)")
: (void)0),(((PyUnicodeObject *)(string))->str))
,
6823 PyUnicode_GET_SIZE(string)((__builtin_expect(!(((((((PyObject*)(string))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 6823, "PyUnicode_Check(string)")
: (void)0),(((PyUnicodeObject *)(string))->length))
, keepends);
6824
6825 Py_DECREF(string)do { if (_Py_RefTotal-- , --((PyObject*)(string))->ob_refcnt
!= 0) { if (((PyObject*)string)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 6825, (PyObject *)(string)); } else
_Py_Dealloc((PyObject *)(string)); } while (0)
;
6826 return list;
6827}
6828
6829static
6830PyObject *split(PyUnicodeObject *self,
6831 PyUnicodeObject *substring,
6832 Py_ssize_t maxcount)
6833{
6834 if (maxcount < 0)
6835 maxcount = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
6836
6837 if (substring == NULL((void *)0))
6838 return stringlib_split_whitespace(
6839 (PyObject*) self, self->str, self->length, maxcount
6840 );
6841
6842 return stringlib_split(
6843 (PyObject*) self, self->str, self->length,
6844 substring->str, substring->length,
6845 maxcount
6846 );
6847}
6848
6849static
6850PyObject *rsplit(PyUnicodeObject *self,
6851 PyUnicodeObject *substring,
6852 Py_ssize_t maxcount)
6853{
6854 if (maxcount < 0)
6855 maxcount = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
6856
6857 if (substring == NULL((void *)0))
6858 return stringlib_rsplit_whitespace(
6859 (PyObject*) self, self->str, self->length, maxcount
6860 );
6861
6862 return stringlib_rsplit(
6863 (PyObject*) self, self->str, self->length,
6864 substring->str, substring->length,
6865 maxcount
6866 );
6867}
6868
6869static
6870PyObject *replace(PyUnicodeObject *self,
6871 PyUnicodeObject *str1,
6872 PyUnicodeObject *str2,
6873 Py_ssize_t maxcount)
6874{
6875 PyUnicodeObject *u;
6876
6877 if (maxcount < 0)
6878 maxcount = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
6879 else if (maxcount == 0 || self->length == 0)
6880 goto nothing;
6881
6882 if (str1->length == str2->length) {
6883 Py_ssize_t i;
6884 /* same length */
6885 if (str1->length == 0)
6886 goto nothing;
6887 if (str1->length == 1) {
6888 /* replace characters */
6889 Py_UNICODE u1, u2;
6890 if (!findchar(self->str, self->length, str1->str[0]))
6891 goto nothing;
6892 u = (PyUnicodeObject*) PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), self->length);
6893 if (!u)
6894 return NULL((void *)0);
6895 Py_UNICODE_COPY(u->str, self->str, self->length)((__builtin_object_size ((u->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
), __builtin_object_size ((u->str), 0)) : __inline_memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
)))
;
6896 u1 = str1->str[0];
6897 u2 = str2->str[0];
6898 for (i = 0; i < u->length; i++)
6899 if (u->str[i] == u1) {
6900 if (--maxcount < 0)
6901 break;
6902 u->str[i] = u2;
6903 }
6904 } else {
6905 i = stringlib_find(
6906 self->str, self->length, str1->str, str1->length, 0
6907 );
6908 if (i < 0)
6909 goto nothing;
6910 u = (PyUnicodeObject*) PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), self->length);
6911 if (!u)
6912 return NULL((void *)0);
6913 Py_UNICODE_COPY(u->str, self->str, self->length)((__builtin_object_size ((u->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
), __builtin_object_size ((u->str), 0)) : __inline_memcpy_chk
((u->str), (self->str), (self->length)*sizeof(Py_UNICODE
)))
;
6914
6915 /* change everything in-place, starting with this one */
6916 Py_UNICODE_COPY(u->str+i, str2->str, str2->length)((__builtin_object_size ((u->str+i), 0) != (size_t) -1) ? __builtin___memcpy_chk
((u->str+i), (str2->str), (str2->length)*sizeof(Py_UNICODE
), __builtin_object_size ((u->str+i), 0)) : __inline_memcpy_chk
((u->str+i), (str2->str), (str2->length)*sizeof(Py_UNICODE
)))
;
6917 i += str1->length;
6918
6919 while ( --maxcount > 0) {
6920 i = stringlib_find(self->str+i, self->length-i,
6921 str1->str, str1->length,
6922 i);
6923 if (i == -1)
6924 break;
6925 Py_UNICODE_COPY(u->str+i, str2->str, str2->length)((__builtin_object_size ((u->str+i), 0) != (size_t) -1) ? __builtin___memcpy_chk
((u->str+i), (str2->str), (str2->length)*sizeof(Py_UNICODE
), __builtin_object_size ((u->str+i), 0)) : __inline_memcpy_chk
((u->str+i), (str2->str), (str2->length)*sizeof(Py_UNICODE
)))
;
6926 i += str1->length;
6927 }
6928 }
6929 } else {
6930
6931 Py_ssize_t n, i, j;
6932 Py_ssize_t product, new_size, delta;
6933 Py_UNICODE *p;
6934
6935 /* replace strings */
6936 n = stringlib_count(self->str, self->length, str1->str, str1->length,
6937 maxcount);
6938 if (n == 0)
6939 goto nothing;
6940 /* new_size = self->length + n * (str2->length - str1->length)); */
6941 delta = (str2->length - str1->length);
6942 if (delta == 0) {
6943 new_size = self->length;
6944 } else {
6945 product = n * (str2->length - str1->length);
6946 if ((product / (str2->length - str1->length)) != n) {
6947 PyErr_SetString(PyExc_OverflowError,
6948 "replace string is too long");
6949 return NULL((void *)0);
6950 }
6951 new_size = self->length + product;
6952 if (new_size < 0) {
6953 PyErr_SetString(PyExc_OverflowError,
6954 "replace string is too long");
6955 return NULL((void *)0);
6956 }
6957 }
6958 u = _PyUnicode_New(new_size);
6959 if (!u)
6960 return NULL((void *)0);
6961 i = 0;
6962 p = u->str;
6963 if (str1->length > 0) {
6964 while (n-- > 0) {
6965 /* look for next match */
6966 j = stringlib_find(self->str+i, self->length-i,
6967 str1->str, str1->length,
6968 i);
6969 if (j == -1)
6970 break;
6971 else if (j > i) {
6972 /* copy unchanged part [i:j] */
6973 Py_UNICODE_COPY(p, self->str+i, j-i)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (self->str+i), (j-i)*sizeof(Py_UNICODE), __builtin_object_size
((p), 0)) : __inline_memcpy_chk ((p), (self->str+i), (j-i
)*sizeof(Py_UNICODE)))
;
6974 p += j - i;
6975 }
6976 /* copy substitution string */
6977 if (str2->length > 0) {
6978 Py_UNICODE_COPY(p, str2->str, str2->length)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (str2->str), (str2->length)*sizeof(Py_UNICODE), __builtin_object_size
((p), 0)) : __inline_memcpy_chk ((p), (str2->str), (str2->
length)*sizeof(Py_UNICODE)))
;
6979 p += str2->length;
6980 }
6981 i = j + str1->length;
6982 }
6983 if (i < self->length)
6984 /* copy tail [i:] */
6985 Py_UNICODE_COPY(p, self->str+i, self->length-i)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (self->str+i), (self->length-i)*sizeof(Py_UNICODE
), __builtin_object_size ((p), 0)) : __inline_memcpy_chk ((p)
, (self->str+i), (self->length-i)*sizeof(Py_UNICODE)))
;
6986 } else {
6987 /* interleave */
6988 while (n > 0) {
6989 Py_UNICODE_COPY(p, str2->str, str2->length)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (str2->str), (str2->length)*sizeof(Py_UNICODE), __builtin_object_size
((p), 0)) : __inline_memcpy_chk ((p), (str2->str), (str2->
length)*sizeof(Py_UNICODE)))
;
6990 p += str2->length;
6991 if (--n <= 0)
6992 break;
6993 *p++ = self->str[i++];
6994 }
6995 Py_UNICODE_COPY(p, self->str+i, self->length-i)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (self->str+i), (self->length-i)*sizeof(Py_UNICODE
), __builtin_object_size ((p), 0)) : __inline_memcpy_chk ((p)
, (self->str+i), (self->length-i)*sizeof(Py_UNICODE)))
;
6996 }
6997 }
6998 return (PyObject *) u;
6999
7000 nothing:
7001 /* nothing to replace; return original string (when possible) */
7002 if (PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
7003 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
7004 return (PyObject *) self;
7005 }
7006 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(self->str, self->length);
7007}
7008
7009/* --- Unicode Object Methods --------------------------------------------- */
7010
7011PyDoc_STRVAR(title__doc__,static char title__doc__[] = "S.title() -> str\n\nReturn a titlecased version of S, i.e. words start with title case\ncharacters, all remaining cased characters have lower case."
7012 "S.title() -> str\n\static char title__doc__[] = "S.title() -> str\n\nReturn a titlecased version of S, i.e. words start with title case\ncharacters, all remaining cased characters have lower case."
7013\n\static char title__doc__[] = "S.title() -> str\n\nReturn a titlecased version of S, i.e. words start with title case\ncharacters, all remaining cased characters have lower case."
7014Return a titlecased version of S, i.e. words start with title case\n\static char title__doc__[] = "S.title() -> str\n\nReturn a titlecased version of S, i.e. words start with title case\ncharacters, all remaining cased characters have lower case."
7015characters, all remaining cased characters have lower case.")static char title__doc__[] = "S.title() -> str\n\nReturn a titlecased version of S, i.e. words start with title case\ncharacters, all remaining cased characters have lower case.";
7016
7017static PyObject*
7018unicode_title(PyUnicodeObject *self)
7019{
7020 return fixup(self, fixtitle);
7021}
7022
7023PyDoc_STRVAR(capitalize__doc__,static char capitalize__doc__[] = "S.capitalize() -> str\n\nReturn a capitalized version of S, i.e. make the first character\nhave upper case and the rest lower case."
7024 "S.capitalize() -> str\n\static char capitalize__doc__[] = "S.capitalize() -> str\n\nReturn a capitalized version of S, i.e. make the first character\nhave upper case and the rest lower case."
7025\n\static char capitalize__doc__[] = "S.capitalize() -> str\n\nReturn a capitalized version of S, i.e. make the first character\nhave upper case and the rest lower case."
7026Return a capitalized version of S, i.e. make the first character\n\static char capitalize__doc__[] = "S.capitalize() -> str\n\nReturn a capitalized version of S, i.e. make the first character\nhave upper case and the rest lower case."
7027have upper case and the rest lower case.")static char capitalize__doc__[] = "S.capitalize() -> str\n\nReturn a capitalized version of S, i.e. make the first character\nhave upper case and the rest lower case.";
7028
7029static PyObject*
7030unicode_capitalize(PyUnicodeObject *self)
7031{
7032 return fixup(self, fixcapitalize);
7033}
7034
7035#if 0
7036PyDoc_STRVAR(capwords__doc__,static char capwords__doc__[] = "S.capwords() -> str\n\nApply .capitalize() to all words in S and return the result with\nnormalized whitespace (all whitespace strings are replaced by ' ')."
7037 "S.capwords() -> str\n\static char capwords__doc__[] = "S.capwords() -> str\n\nApply .capitalize() to all words in S and return the result with\nnormalized whitespace (all whitespace strings are replaced by ' ')."
7038\n\static char capwords__doc__[] = "S.capwords() -> str\n\nApply .capitalize() to all words in S and return the result with\nnormalized whitespace (all whitespace strings are replaced by ' ')."
7039Apply .capitalize() to all words in S and return the result with\n\static char capwords__doc__[] = "S.capwords() -> str\n\nApply .capitalize() to all words in S and return the result with\nnormalized whitespace (all whitespace strings are replaced by ' ')."
7040normalized whitespace (all whitespace strings are replaced by ' ').")static char capwords__doc__[] = "S.capwords() -> str\n\nApply .capitalize() to all words in S and return the result with\nnormalized whitespace (all whitespace strings are replaced by ' ').";
7041
7042static PyObject*
7043unicode_capwords(PyUnicodeObject *self)
7044{
7045 PyObject *list;
7046 PyObject *item;
7047 Py_ssize_t i;
7048
7049 /* Split into words */
7050 list = split(self, NULL((void *)0), -1);
7051 if (!list)
7052 return NULL((void *)0);
7053
7054 /* Capitalize each word */
7055 for (i = 0; i < PyList_GET_SIZE(list)(((PyVarObject*)(list))->ob_size); i++) {
7056 item = fixup((PyUnicodeObject *)PyList_GET_ITEM(list, i)(((PyListObject *)(list))->ob_item[i]),
7057 fixcapitalize);
7058 if (item == NULL((void *)0))
7059 goto onError;
7060 Py_DECREF(PyList_GET_ITEM(list, i))do { if (_Py_RefTotal-- , --((PyObject*)((((PyListObject *)(list
))->ob_item[i])))->ob_refcnt != 0) { if (((PyObject*)((
(PyListObject *)(list))->ob_item[i]))->ob_refcnt < 0
) _Py_NegativeRefcount("Objects/unicodeobject.c", 7060, (PyObject
*)((((PyListObject *)(list))->ob_item[i]))); } else _Py_Dealloc
((PyObject *)((((PyListObject *)(list))->ob_item[i]))); } while
(0)
;
7061 PyList_SET_ITEM(list, i, item)(((PyListObject *)(list))->ob_item[i] = (item));
7062 }
7063
7064 /* Join the words to form a new string */
7065 item = PyUnicode_JoinPyUnicodeUCS2_Join(NULL((void *)0), list);
7066
7067 onError:
7068 Py_DECREF(list)do { if (_Py_RefTotal-- , --((PyObject*)(list))->ob_refcnt
!= 0) { if (((PyObject*)list)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7068, (PyObject *)(list)); } else
_Py_Dealloc((PyObject *)(list)); } while (0)
;
7069 return (PyObject *)item;
7070}
7071#endif
7072
7073/* Argument converter. Coerces to a single unicode character */
7074
7075static int
7076convert_uc(PyObject *obj, void *addr)
7077{
7078 Py_UNICODE *fillcharloc = (Py_UNICODE *)addr;
7079 PyObject *uniobj;
7080 Py_UNICODE *unistr;
7081
7082 uniobj = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(obj);
7083 if (uniobj == NULL((void *)0)) {
7084 PyErr_SetString(PyExc_TypeError,
7085 "The fill character cannot be converted to Unicode");
7086 return 0;
7087 }
7088 if (PyUnicode_GET_SIZE(uniobj)((__builtin_expect(!(((((((PyObject*)(uniobj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7088, "PyUnicode_Check(uniobj)")
: (void)0),(((PyUnicodeObject *)(uniobj))->length))
!= 1) {
7089 PyErr_SetString(PyExc_TypeError,
7090 "The fill character must be exactly one character long");
7091 Py_DECREF(uniobj)do { if (_Py_RefTotal-- , --((PyObject*)(uniobj))->ob_refcnt
!= 0) { if (((PyObject*)uniobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7091, (PyObject *)(uniobj)); } else
_Py_Dealloc((PyObject *)(uniobj)); } while (0)
;
7092 return 0;
7093 }
7094 unistr = PyUnicode_AS_UNICODE(uniobj)((__builtin_expect(!(((((((PyObject*)(uniobj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7094, "PyUnicode_Check(uniobj)")
: (void)0),(((PyUnicodeObject *)(uniobj))->str))
;
7095 *fillcharloc = unistr[0];
7096 Py_DECREF(uniobj)do { if (_Py_RefTotal-- , --((PyObject*)(uniobj))->ob_refcnt
!= 0) { if (((PyObject*)uniobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7096, (PyObject *)(uniobj)); } else
_Py_Dealloc((PyObject *)(uniobj)); } while (0)
;
7097 return 1;
7098}
7099
7100PyDoc_STRVAR(center__doc__,static char center__doc__[] = "S.center(width[, fillchar]) -> str\n\nReturn S centered in a string of length width. Padding is\ndone using the specified fill character (default is a space)"
7101 "S.center(width[, fillchar]) -> str\n\static char center__doc__[] = "S.center(width[, fillchar]) -> str\n\nReturn S centered in a string of length width. Padding is\ndone using the specified fill character (default is a space)"
7102\n\static char center__doc__[] = "S.center(width[, fillchar]) -> str\n\nReturn S centered in a string of length width. Padding is\ndone using the specified fill character (default is a space)"
7103Return S centered in a string of length width. Padding is\n\static char center__doc__[] = "S.center(width[, fillchar]) -> str\n\nReturn S centered in a string of length width. Padding is\ndone using the specified fill character (default is a space)"
7104done using the specified fill character (default is a space)")static char center__doc__[] = "S.center(width[, fillchar]) -> str\n\nReturn S centered in a string of length width. Padding is\ndone using the specified fill character (default is a space)";
7105
7106static PyObject *
7107unicode_center(PyUnicodeObject *self, PyObject *args)
7108{
7109 Py_ssize_t marg, left;
7110 Py_ssize_t width;
7111 Py_UNICODE fillchar = ' ';
7112
7113 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "n|O&:center", &width, convert_uc, &fillchar))
7114 return NULL((void *)0);
7115
7116 if (self->length >= width && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
7117 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
7118 return (PyObject*) self;
7119 }
7120
7121 marg = width - self->length;
7122 left = marg / 2 + (marg & width & 1);
7123
7124 return (PyObject*) pad(self, left, marg - left, fillchar);
7125}
7126
7127#if 0
7128
7129/* This code should go into some future Unicode collation support
7130 module. The basic comparison should compare ordinals on a naive
7131 basis (this is what Java does and thus Jython too). */
7132
7133/* speedy UTF-16 code point order comparison */
7134/* gleaned from: */
7135/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
7136
7137static short utf16Fixup[32] =
7138{
7139 0, 0, 0, 0, 0, 0, 0, 0,
7140 0, 0, 0, 0, 0, 0, 0, 0,
7141 0, 0, 0, 0, 0, 0, 0, 0,
7142 0, 0, 0, 0x2000, -0x800, -0x800, -0x800, -0x800
7143};
7144
7145static int
7146unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
7147{
7148 Py_ssize_t len1, len2;
7149
7150 Py_UNICODE *s1 = str1->str;
7151 Py_UNICODE *s2 = str2->str;
7152
7153 len1 = str1->length;
7154 len2 = str2->length;
7155
7156 while (len1 > 0 && len2 > 0) {
7157 Py_UNICODE c1, c2;
7158
7159 c1 = *s1++;
7160 c2 = *s2++;
7161
7162 if (c1 > (1<<11) * 26)
7163 c1 += utf16Fixup[c1>>11];
7164 if (c2 > (1<<11) * 26)
7165 c2 += utf16Fixup[c2>>11];
7166 /* now c1 and c2 are in UTF-32-compatible order */
7167
7168 if (c1 != c2)
7169 return (c1 < c2) ? -1 : 1;
7170
7171 len1--; len2--;
7172 }
7173
7174 return (len1 < len2) ? -1 : (len1 != len2);
7175}
7176
7177#else
7178
7179static int
7180unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
7181{
7182 register Py_ssize_t len1, len2;
7183
7184 Py_UNICODE *s1 = str1->str;
7185 Py_UNICODE *s2 = str2->str;
7186
7187 len1 = str1->length;
7188 len2 = str2->length;
7189
7190 while (len1 > 0 && len2 > 0) {
7191 Py_UNICODE c1, c2;
7192
7193 c1 = *s1++;
7194 c2 = *s2++;
7195
7196 if (c1 != c2)
7197 return (c1 < c2) ? -1 : 1;
7198
7199 len1--; len2--;
7200 }
7201
7202 return (len1 < len2) ? -1 : (len1 != len2);
7203}
7204
7205#endif
7206
7207int PyUnicode_ComparePyUnicodeUCS2_Compare(PyObject *left,
7208 PyObject *right)
7209{
7210 if (PyUnicode_Check(left)((((((PyObject*)(left))->ob_type))->tp_flags & ((1L
<<28))) != 0)
&& PyUnicode_Check(right)((((((PyObject*)(right))->ob_type))->tp_flags & ((1L
<<28))) != 0)
)
7211 return unicode_compare((PyUnicodeObject *)left,
7212 (PyUnicodeObject *)right);
7213 PyErr_Format(PyExc_TypeError,
7214 "Can't compare %.100s and %.100s",
7215 left->ob_type->tp_name,
7216 right->ob_type->tp_name);
7217 return -1;
7218}
7219
7220int
7221PyUnicode_CompareWithASCIIStringPyUnicodeUCS2_CompareWithASCIIString(PyObject* uni, const char* str)
7222{
7223 int i;
7224 Py_UNICODE *id;
7225 assert(PyUnicode_Check(uni))(__builtin_expect(!(((((((PyObject*)(uni))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7225, "PyUnicode_Check(uni)") : (
void)0)
;
7226 id = PyUnicode_AS_UNICODE(uni)((__builtin_expect(!(((((((PyObject*)(uni))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7226, "PyUnicode_Check(uni)") : (
void)0),(((PyUnicodeObject *)(uni))->str))
;
7227 /* Compare Unicode string and source character set string */
7228 for (i = 0; id[i] && str[i]; i++)
7229 if (id[i] != str[i])
7230 return ((int)id[i] < (int)str[i]) ? -1 : 1;
7231 /* This check keeps Python strings that end in '\0' from comparing equal
7232 to C strings identical up to that point. */
7233 if (PyUnicode_GET_SIZE(uni)((__builtin_expect(!(((((((PyObject*)(uni))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7233, "PyUnicode_Check(uni)") : (
void)0),(((PyUnicodeObject *)(uni))->length))
!= i || id[i])
7234 return 1; /* uni is longer */
7235 if (str[i])
7236 return -1; /* str is longer */
7237 return 0;
7238}
7239
7240
7241#define TEST_COND(cond)((cond) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject *) &
_Py_FalseStruct))
\
7242 ((cond) ? Py_True((PyObject *) &_Py_TrueStruct) : Py_False((PyObject *) &_Py_FalseStruct))
7243
7244PyObject *PyUnicode_RichComparePyUnicodeUCS2_RichCompare(PyObject *left,
7245 PyObject *right,
7246 int op)
7247{
7248 int result;
7249
7250 if (PyUnicode_Check(left)((((((PyObject*)(left))->ob_type))->tp_flags & ((1L
<<28))) != 0)
&& PyUnicode_Check(right)((((((PyObject*)(right))->ob_type))->tp_flags & ((1L
<<28))) != 0)
) {
7251 PyObject *v;
7252 if (((PyUnicodeObject *) left)->length !=
7253 ((PyUnicodeObject *) right)->length) {
7254 if (op == Py_EQ2) {
7255 Py_INCREF(Py_False)( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_FalseStruct
)))->ob_refcnt++)
;
7256 return Py_False((PyObject *) &_Py_FalseStruct);
7257 }
7258 if (op == Py_NE3) {
7259 Py_INCREF(Py_True)( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_TrueStruct
)))->ob_refcnt++)
;
7260 return Py_True((PyObject *) &_Py_TrueStruct);
7261 }
7262 }
7263 if (left == right)
7264 result = 0;
7265 else
7266 result = unicode_compare((PyUnicodeObject *)left,
7267 (PyUnicodeObject *)right);
7268
7269 /* Convert the return value to a Boolean */
7270 switch (op) {
7271 case Py_EQ2:
7272 v = TEST_COND(result == 0)((result == 0) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7273 break;
7274 case Py_NE3:
7275 v = TEST_COND(result != 0)((result != 0) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7276 break;
7277 case Py_LE1:
7278 v = TEST_COND(result <= 0)((result <= 0) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7279 break;
7280 case Py_GE5:
7281 v = TEST_COND(result >= 0)((result >= 0) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7282 break;
7283 case Py_LT0:
7284 v = TEST_COND(result == -1)((result == -1) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7285 break;
7286 case Py_GT4:
7287 v = TEST_COND(result == 1)((result == 1) ? ((PyObject *) &_Py_TrueStruct) : ((PyObject
*) &_Py_FalseStruct))
;
7288 break;
7289 default:
7290 PyErr_BadArgument();
7291 return NULL((void *)0);
7292 }
7293 Py_INCREF(v)( _Py_RefTotal++ , ((PyObject*)(v))->ob_refcnt++);
7294 return v;
7295 }
7296
7297 Py_INCREF(Py_NotImplemented)( _Py_RefTotal++ , ((PyObject*)((&_Py_NotImplementedStruct
)))->ob_refcnt++)
;
7298 return Py_NotImplemented(&_Py_NotImplementedStruct);
7299}
7300
7301int PyUnicode_ContainsPyUnicodeUCS2_Contains(PyObject *container,
7302 PyObject *element)
7303{
7304 PyObject *str, *sub;
7305 int result;
7306
7307 /* Coerce the two arguments */
7308 sub = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(element);
7309 if (!sub) {
7310 PyErr_Format(PyExc_TypeError,
7311 "'in <string>' requires string as left operand, not %s",
7312 element->ob_type->tp_name);
7313 return -1;
7314 }
7315
7316 str = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(container);
7317 if (!str) {
7318 Py_DECREF(sub)do { if (_Py_RefTotal-- , --((PyObject*)(sub))->ob_refcnt !=
0) { if (((PyObject*)sub)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7318, (PyObject *)(sub)); } else _Py_Dealloc
((PyObject *)(sub)); } while (0)
;
7319 return -1;
7320 }
7321
7322 result = stringlib_contains_obj(str, sub);
7323
7324 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7324, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
7325 Py_DECREF(sub)do { if (_Py_RefTotal-- , --((PyObject*)(sub))->ob_refcnt !=
0) { if (((PyObject*)sub)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7325, (PyObject *)(sub)); } else _Py_Dealloc
((PyObject *)(sub)); } while (0)
;
7326
7327 return result;
7328}
7329
7330/* Concat to string or Unicode object giving a new Unicode object. */
7331
7332PyObject *PyUnicode_ConcatPyUnicodeUCS2_Concat(PyObject *left,
7333 PyObject *right)
7334{
7335 PyUnicodeObject *u = NULL((void *)0), *v = NULL((void *)0), *w;
7336
7337 /* Coerce the two arguments */
7338 u = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(left);
7339 if (u == NULL((void *)0))
7340 goto onError;
7341 v = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(right);
7342 if (v == NULL((void *)0))
7343 goto onError;
7344
7345 /* Shortcuts */
7346 if (v == unicode_empty) {
7347 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7347, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
7348 return (PyObject *)u;
7349 }
7350 if (u == unicode_empty) {
7351 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7351, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
7352 return (PyObject *)v;
7353 }
7354
7355 /* Concat the two Unicode strings */
7356 w = _PyUnicode_New(u->length + v->length);
7357 if (w == NULL((void *)0))
7358 goto onError;
7359 Py_UNICODE_COPY(w->str, u->str, u->length)((__builtin_object_size ((w->str), 0) != (size_t) -1) ? __builtin___memcpy_chk
((w->str), (u->str), (u->length)*sizeof(Py_UNICODE)
, __builtin_object_size ((w->str), 0)) : __inline_memcpy_chk
((w->str), (u->str), (u->length)*sizeof(Py_UNICODE)
))
;
7360 Py_UNICODE_COPY(w->str + u->length, v->str, v->length)((__builtin_object_size ((w->str + u->length), 0) != (size_t
) -1) ? __builtin___memcpy_chk ((w->str + u->length), (
v->str), (v->length)*sizeof(Py_UNICODE), __builtin_object_size
((w->str + u->length), 0)) : __inline_memcpy_chk ((w->
str + u->length), (v->str), (v->length)*sizeof(Py_UNICODE
)))
;
7361
7362 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7362, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
7363 Py_DECREF(v)do { if (_Py_RefTotal-- , --((PyObject*)(v))->ob_refcnt !=
0) { if (((PyObject*)v)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7363, (PyObject *)(v)); } else _Py_Dealloc
((PyObject *)(v)); } while (0)
;
7364 return (PyObject *)w;
7365
7366 onError:
7367 Py_XDECREF(u)do { if ((u) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(u))->ob_refcnt != 0) { if (((PyObject*)u)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 7367, (PyObject *)(u)); } else _Py_Dealloc((PyObject *)(u))
; } while (0); } while (0)
;
7368 Py_XDECREF(v)do { if ((v) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(v))->ob_refcnt != 0) { if (((PyObject*)v)->
ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 7368, (PyObject *)(v)); } else _Py_Dealloc((PyObject *)(v))
; } while (0); } while (0)
;
7369 return NULL((void *)0);
7370}
7371
7372void
7373PyUnicode_AppendPyUnicodeUCS2_Append(PyObject **pleft, PyObject *right)
7374{
7375 PyObject *new;
7376 if (*pleft == NULL((void *)0))
7377 return;
7378 if (right == NULL((void *)0) || !PyUnicode_Check(*pleft)((((((PyObject*)(*pleft))->ob_type))->tp_flags & ((
1L<<28))) != 0)
) {
7379 Py_DECREF(*pleft)do { if (_Py_RefTotal-- , --((PyObject*)(*pleft))->ob_refcnt
!= 0) { if (((PyObject*)*pleft)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7379, (PyObject *)(*pleft)); } else
_Py_Dealloc((PyObject *)(*pleft)); } while (0)
;
7380 *pleft = NULL((void *)0);
7381 return;
7382 }
7383 new = PyUnicode_ConcatPyUnicodeUCS2_Concat(*pleft, right);
7384 Py_DECREF(*pleft)do { if (_Py_RefTotal-- , --((PyObject*)(*pleft))->ob_refcnt
!= 0) { if (((PyObject*)*pleft)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7384, (PyObject *)(*pleft)); } else
_Py_Dealloc((PyObject *)(*pleft)); } while (0)
;
7385 *pleft = new;
7386}
7387
7388void
7389PyUnicode_AppendAndDelPyUnicodeUCS2_AppendAndDel(PyObject **pleft, PyObject *right)
7390{
7391 PyUnicode_AppendPyUnicodeUCS2_Append(pleft, right);
7392 Py_XDECREF(right)do { if ((right) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(right))->ob_refcnt != 0) { if (((PyObject
*)right)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 7392, (PyObject *)(right)); } else _Py_Dealloc((PyObject *)
(right)); } while (0); } while (0)
;
7393}
7394
7395PyDoc_STRVAR(count__doc__,static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation."
7396 "S.count(sub[, start[, end]]) -> int\n\static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation."
7397\n\static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation."
7398Return the number of non-overlapping occurrences of substring sub in\n\static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation."
7399string S[start:end]. Optional arguments start and end are\n\static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation."
7400interpreted as in slice notation.")static char count__doc__[] = "S.count(sub[, start[, end]]) -> int\n\nReturn the number of non-overlapping occurrences of substring sub in\nstring S[start:end]. Optional arguments start and end are\ninterpreted as in slice notation.";
7401
7402static PyObject *
7403unicode_count(PyUnicodeObject *self, PyObject *args)
7404{
7405 PyUnicodeObject *substring;
7406 Py_ssize_t start = 0;
7407 Py_ssize_t end = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
7408 PyObject *result;
7409
7410 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "O|O&O&:count", &substring,
7411 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
7412 return NULL((void *)0);
7413
7414 substring = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(
7415 (PyObject *)substring);
7416 if (substring == NULL((void *)0))
7417 return NULL((void *)0);
7418
7419 ADJUST_INDICES(start, end, self->length)if (end > self->length) end = self->length; else if (
end < 0) { end += self->length; if (end < 0) end = 0
; } if (start < 0) { start += self->length; if (start <
0) start = 0; }
;
7420 result = PyLong_FromSsize_t(
7421 stringlib_count(self->str + start, end - start,
7422 substring->str, substring->length,
7423 PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)))
7424 );
7425
7426 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7426, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
7427
7428 return result;
7429}
7430
7431PyDoc_STRVAR(encode__doc__,static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7432 "S.encode(encoding='utf-8', errors='strict') -> bytes\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7433\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7434Encode S using the codec registered for encoding. Default encoding\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7435is 'utf-8'. errors may be given to set a different error\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7436handling scheme. Default is 'strict' meaning that encoding errors raise\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7437a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7438'xmlcharrefreplace' as well as any other name registered with\n\static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors."
7439codecs.register_error that can handle UnicodeEncodeErrors.")static char encode__doc__[] = "S.encode(encoding='utf-8', errors='strict') -> bytes\n\nEncode S using the codec registered for encoding. Default encoding\nis 'utf-8'. errors may be given to set a different error\nhandling scheme. Default is 'strict' meaning that encoding errors raise\na UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n'xmlcharrefreplace' as well as any other name registered with\ncodecs.register_error that can handle UnicodeEncodeErrors.";
7440
7441static PyObject *
7442unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
7443{
7444 static char *kwlist[] = {"encoding", "errors", 0};
7445 char *encoding = NULL((void *)0);
7446 char *errors = NULL((void *)0);
7447
7448 if (!PyArg_ParseTupleAndKeywords_PyArg_ParseTupleAndKeywords_SizeT(args, kwargs, "|ss:encode",
7449 kwlist, &encoding, &errors))
7450 return NULL((void *)0);
7451 return PyUnicode_AsEncodedStringPyUnicodeUCS2_AsEncodedString((PyObject *)self, encoding, errors);
7452}
7453
7454PyDoc_STRVAR(expandtabs__doc__,static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> str\n\nReturn a copy of S where all tab characters are expanded using spaces.\nIf tabsize is not given, a tab size of 8 characters is assumed."
7455 "S.expandtabs([tabsize]) -> str\n\static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> str\n\nReturn a copy of S where all tab characters are expanded using spaces.\nIf tabsize is not given, a tab size of 8 characters is assumed."
7456\n\static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> str\n\nReturn a copy of S where all tab characters are expanded using spaces.\nIf tabsize is not given, a tab size of 8 characters is assumed."
7457Return a copy of S where all tab characters are expanded using spaces.\n\static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> str\n\nReturn a copy of S where all tab characters are expanded using spaces.\nIf tabsize is not given, a tab size of 8 characters is assumed."
7458If tabsize is not given, a tab size of 8 characters is assumed.")static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> str\n\nReturn a copy of S where all tab characters are expanded using spaces.\nIf tabsize is not given, a tab size of 8 characters is assumed.";
7459
7460static PyObject*
7461unicode_expandtabs(PyUnicodeObject *self, PyObject *args)
7462{
7463 Py_UNICODE *e;
7464 Py_UNICODE *p;
7465 Py_UNICODE *q;
7466 Py_UNICODE *qe;
7467 Py_ssize_t i, j, incr;
7468 PyUnicodeObject *u;
7469 int tabsize = 8;
7470
7471 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "|i:expandtabs", &tabsize))
7472 return NULL((void *)0);
7473
7474 /* First pass: determine size of output string */
7475 i = 0; /* chars up to and including most recent \n or \r */
7476 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
7477 e = self->str + self->length; /* end of input */
7478 for (p = self->str; p < e; p++)
7479 if (*p == '\t') {
7480 if (tabsize > 0) {
7481 incr = tabsize - (j % tabsize); /* cannot overflow */
7482 if (j > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - incr)
7483 goto overflow1;
7484 j += incr;
7485 }
7486 }
7487 else {
7488 if (j > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - 1)
7489 goto overflow1;
7490 j++;
7491 if (*p == '\n' || *p == '\r') {
7492 if (i > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - j)
7493 goto overflow1;
7494 i += j;
7495 j = 0;
7496 }
7497 }
7498
7499 if (i > PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) - j)
7500 goto overflow1;
7501
7502 /* Second pass: create output string and fill it */
7503 u = _PyUnicode_New(i + j);
7504 if (!u)
7505 return NULL((void *)0);
7506
7507 j = 0; /* same as in first pass */
7508 q = u->str; /* next output char */
7509 qe = u->str + u->length; /* end of output */
7510
7511 for (p = self->str; p < e; p++)
7512 if (*p == '\t') {
7513 if (tabsize > 0) {
7514 i = tabsize - (j % tabsize);
7515 j += i;
7516 while (i--) {
7517 if (q >= qe)
7518 goto overflow2;
7519 *q++ = ' ';
7520 }
7521 }
7522 }
7523 else {
7524 if (q >= qe)
7525 goto overflow2;
7526 *q++ = *p;
7527 j++;
7528 if (*p == '\n' || *p == '\r')
7529 j = 0;
7530 }
7531
7532 return (PyObject*) u;
7533
7534 overflow2:
7535 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7535, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
7536 overflow1:
7537 PyErr_SetString(PyExc_OverflowError, "new string is too long");
7538 return NULL((void *)0);
7539}
7540
7541PyDoc_STRVAR(find__doc__,static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7542 "S.find(sub[, start[, end]]) -> int\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7543\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7544Return the lowest index in S where substring sub is found,\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7545such that sub is contained within s[start:end]. Optional\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7546arguments start and end are interpreted as in slice notation.\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7547\n\static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
7548Return -1 on failure.")static char find__doc__[] = "S.find(sub[, start[, end]]) -> int\n\nReturn the lowest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure.";
7549
7550static PyObject *
7551unicode_find(PyUnicodeObject *self, PyObject *args)
7552{
7553 PyObject *substring;
7554 Py_ssize_t start;
7555 Py_ssize_t end;
7556 Py_ssize_t result;
7557
7558 if (!_ParseTupleFinds(args, &substring, &start, &end))
7559 return NULL((void *)0);
7560
7561 result = stringlib_find_slice(
7562 PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7562, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
, PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7562, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
,
7563 PyUnicode_AS_UNICODE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 7563, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->str))
, PyUnicode_GET_SIZE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 7563, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->length))
,
7564 start, end
7565 );
7566
7567 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7567, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
7568
7569 return PyLong_FromSsize_t(result);
7570}
7571
7572static PyObject *
7573unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
7574{
7575 if (index < 0 || index >= self->length) {
7576 PyErr_SetString(PyExc_IndexError, "string index out of range");
7577 return NULL((void *)0);
7578 }
7579
7580 return (PyObject*) PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(&self->str[index], 1);
7581}
7582
7583/* Believe it or not, this produces the same value for ASCII strings
7584 as string_hash(). */
7585static Py_hash_t
7586unicode_hash(PyUnicodeObject *self)
7587{
7588 Py_ssize_t len;
7589 Py_UNICODE *p;
7590 Py_hash_t x;
7591
7592 if (self->hash != -1)
7593 return self->hash;
7594 len = Py_SIZE(self)(((PyVarObject*)(self))->ob_size);
7595 p = self->str;
7596 x = *p << 7;
7597 while (--len >= 0)
7598 x = (1000003*x) ^ *p++;
7599 x ^= Py_SIZE(self)(((PyVarObject*)(self))->ob_size);
7600 if (x == -1)
7601 x = -2;
7602 self->hash = x;
7603 return x;
7604}
7605
7606PyDoc_STRVAR(index__doc__,static char index__doc__[] = "S.index(sub[, start[, end]]) -> int\n\nLike S.find() but raise ValueError when the substring is not found."
7607 "S.index(sub[, start[, end]]) -> int\n\static char index__doc__[] = "S.index(sub[, start[, end]]) -> int\n\nLike S.find() but raise ValueError when the substring is not found."
7608\n\static char index__doc__[] = "S.index(sub[, start[, end]]) -> int\n\nLike S.find() but raise ValueError when the substring is not found."
7609Like S.find() but raise ValueError when the substring is not found.")static char index__doc__[] = "S.index(sub[, start[, end]]) -> int\n\nLike S.find() but raise ValueError when the substring is not found.";
7610
7611static PyObject *
7612unicode_index(PyUnicodeObject *self, PyObject *args)
7613{
7614 Py_ssize_t result;
7615 PyObject *substring;
7616 Py_ssize_t start;
7617 Py_ssize_t end;
7618
7619 if (!_ParseTupleFinds(args, &substring, &start, &end))
7620 return NULL((void *)0);
7621
7622 result = stringlib_find_slice(
7623 PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7623, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
, PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7623, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
,
7624 PyUnicode_AS_UNICODE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 7624, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->str))
, PyUnicode_GET_SIZE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 7624, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->length))
,
7625 start, end
7626 );
7627
7628 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 7628, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
7629
7630 if (result < 0) {
7631 PyErr_SetString(PyExc_ValueError, "substring not found");
7632 return NULL((void *)0);
7633 }
7634
7635 return PyLong_FromSsize_t(result);
7636}
7637
7638PyDoc_STRVAR(islower__doc__,static char islower__doc__[] = "S.islower() -> bool\n\nReturn True if all cased characters in S are lowercase and there is\nat least one cased character in S, False otherwise."
7639 "S.islower() -> bool\n\static char islower__doc__[] = "S.islower() -> bool\n\nReturn True if all cased characters in S are lowercase and there is\nat least one cased character in S, False otherwise."
7640\n\static char islower__doc__[] = "S.islower() -> bool\n\nReturn True if all cased characters in S are lowercase and there is\nat least one cased character in S, False otherwise."
7641Return True if all cased characters in S are lowercase and there is\n\static char islower__doc__[] = "S.islower() -> bool\n\nReturn True if all cased characters in S are lowercase and there is\nat least one cased character in S, False otherwise."
7642at least one cased character in S, False otherwise.")static char islower__doc__[] = "S.islower() -> bool\n\nReturn True if all cased characters in S are lowercase and there is\nat least one cased character in S, False otherwise.";
7643
7644static PyObject*
7645unicode_islower(PyUnicodeObject *self)
7646{
7647 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7647, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7648 register const Py_UNICODE *e;
7649 int cased;
7650
7651 /* Shortcut for single character strings */
7652 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7652, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1)
7653 return PyBool_FromLong(Py_UNICODE_ISLOWER(*p)_PyUnicode_IsLowercase(*p));
7654
7655 /* Special case for empty strings */
7656 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7656, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7657 return PyBool_FromLong(0);
7658
7659 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7659, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7660 cased = 0;
7661 for (; p < e; p++) {
7662 register const Py_UNICODE ch = *p;
7663
7664 if (Py_UNICODE_ISUPPER(ch)_PyUnicode_IsUppercase(ch) || Py_UNICODE_ISTITLE(ch)_PyUnicode_IsTitlecase(ch))
7665 return PyBool_FromLong(0);
7666 else if (!cased && Py_UNICODE_ISLOWER(ch)_PyUnicode_IsLowercase(ch))
7667 cased = 1;
7668 }
7669 return PyBool_FromLong(cased);
7670}
7671
7672PyDoc_STRVAR(isupper__doc__,static char isupper__doc__[] = "S.isupper() -> bool\n\nReturn True if all cased characters in S are uppercase and there is\nat least one cased character in S, False otherwise."
7673 "S.isupper() -> bool\n\static char isupper__doc__[] = "S.isupper() -> bool\n\nReturn True if all cased characters in S are uppercase and there is\nat least one cased character in S, False otherwise."
7674\n\static char isupper__doc__[] = "S.isupper() -> bool\n\nReturn True if all cased characters in S are uppercase and there is\nat least one cased character in S, False otherwise."
7675Return True if all cased characters in S are uppercase and there is\n\static char isupper__doc__[] = "S.isupper() -> bool\n\nReturn True if all cased characters in S are uppercase and there is\nat least one cased character in S, False otherwise."
7676at least one cased character in S, False otherwise.")static char isupper__doc__[] = "S.isupper() -> bool\n\nReturn True if all cased characters in S are uppercase and there is\nat least one cased character in S, False otherwise.";
7677
7678static PyObject*
7679unicode_isupper(PyUnicodeObject *self)
7680{
7681 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7681, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7682 register const Py_UNICODE *e;
7683 int cased;
7684
7685 /* Shortcut for single character strings */
7686 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7686, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1)
7687 return PyBool_FromLong(Py_UNICODE_ISUPPER(*p)_PyUnicode_IsUppercase(*p) != 0);
7688
7689 /* Special case for empty strings */
7690 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7690, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7691 return PyBool_FromLong(0);
7692
7693 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7693, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7694 cased = 0;
7695 for (; p < e; p++) {
7696 register const Py_UNICODE ch = *p;
7697
7698 if (Py_UNICODE_ISLOWER(ch)_PyUnicode_IsLowercase(ch) || Py_UNICODE_ISTITLE(ch)_PyUnicode_IsTitlecase(ch))
7699 return PyBool_FromLong(0);
7700 else if (!cased && Py_UNICODE_ISUPPER(ch)_PyUnicode_IsUppercase(ch))
7701 cased = 1;
7702 }
7703 return PyBool_FromLong(cased);
7704}
7705
7706PyDoc_STRVAR(istitle__doc__,static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7707 "S.istitle() -> bool\n\static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7708\n\static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7709Return True if S is a titlecased string and there is at least one\n\static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7710character in S, i.e. upper- and titlecase characters may only\n\static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7711follow uncased characters and lowercase characters only cased ones.\n\static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise."
7712Return False otherwise.")static char istitle__doc__[] = "S.istitle() -> bool\n\nReturn True if S is a titlecased string and there is at least one\ncharacter in S, i.e. upper- and titlecase characters may only\nfollow uncased characters and lowercase characters only cased ones.\nReturn False otherwise.";
7713
7714static PyObject*
7715unicode_istitle(PyUnicodeObject *self)
7716{
7717 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7717, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7718 register const Py_UNICODE *e;
7719 int cased, previous_is_cased;
7720
7721 /* Shortcut for single character strings */
7722 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7722, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1)
7723 return PyBool_FromLong((Py_UNICODE_ISTITLE(*p)_PyUnicode_IsTitlecase(*p) != 0) ||
7724 (Py_UNICODE_ISUPPER(*p)_PyUnicode_IsUppercase(*p) != 0));
7725
7726 /* Special case for empty strings */
7727 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7727, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7728 return PyBool_FromLong(0);
7729
7730 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7730, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7731 cased = 0;
7732 previous_is_cased = 0;
7733 for (; p < e; p++) {
7734 register const Py_UNICODE ch = *p;
7735
7736 if (Py_UNICODE_ISUPPER(ch)_PyUnicode_IsUppercase(ch) || Py_UNICODE_ISTITLE(ch)_PyUnicode_IsTitlecase(ch)) {
7737 if (previous_is_cased)
7738 return PyBool_FromLong(0);
7739 previous_is_cased = 1;
7740 cased = 1;
7741 }
7742 else if (Py_UNICODE_ISLOWER(ch)_PyUnicode_IsLowercase(ch)) {
7743 if (!previous_is_cased)
7744 return PyBool_FromLong(0);
7745 previous_is_cased = 1;
7746 cased = 1;
7747 }
7748 else
7749 previous_is_cased = 0;
7750 }
7751 return PyBool_FromLong(cased);
7752}
7753
7754PyDoc_STRVAR(isspace__doc__,static char isspace__doc__[] = "S.isspace() -> bool\n\nReturn True if all characters in S are whitespace\nand there is at least one character in S, False otherwise."
7755 "S.isspace() -> bool\n\static char isspace__doc__[] = "S.isspace() -> bool\n\nReturn True if all characters in S are whitespace\nand there is at least one character in S, False otherwise."
7756\n\static char isspace__doc__[] = "S.isspace() -> bool\n\nReturn True if all characters in S are whitespace\nand there is at least one character in S, False otherwise."
7757Return True if all characters in S are whitespace\n\static char isspace__doc__[] = "S.isspace() -> bool\n\nReturn True if all characters in S are whitespace\nand there is at least one character in S, False otherwise."
7758and there is at least one character in S, False otherwise.")static char isspace__doc__[] = "S.isspace() -> bool\n\nReturn True if all characters in S are whitespace\nand there is at least one character in S, False otherwise.";
7759
7760static PyObject*
7761unicode_isspace(PyUnicodeObject *self)
7762{
7763 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7763, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7764 register const Py_UNICODE *e;
7765
7766 /* Shortcut for single character strings */
7767 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7767, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7768 Py_UNICODE_ISSPACE(*p)((*p) < 128U ? _Py_ascii_whitespace[(*p)] : _PyUnicode_IsWhitespace
(*p))
)
7769 return PyBool_FromLong(1);
7770
7771 /* Special case for empty strings */
7772 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7772, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7773 return PyBool_FromLong(0);
7774
7775 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7775, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7776 for (; p < e; p++) {
7777 if (!Py_UNICODE_ISSPACE(*p)((*p) < 128U ? _Py_ascii_whitespace[(*p)] : _PyUnicode_IsWhitespace
(*p))
)
7778 return PyBool_FromLong(0);
7779 }
7780 return PyBool_FromLong(1);
7781}
7782
7783PyDoc_STRVAR(isalpha__doc__,static char isalpha__doc__[] = "S.isalpha() -> bool\n\nReturn True if all characters in S are alphabetic\nand there is at least one character in S, False otherwise."
7784 "S.isalpha() -> bool\n\static char isalpha__doc__[] = "S.isalpha() -> bool\n\nReturn True if all characters in S are alphabetic\nand there is at least one character in S, False otherwise."
7785\n\static char isalpha__doc__[] = "S.isalpha() -> bool\n\nReturn True if all characters in S are alphabetic\nand there is at least one character in S, False otherwise."
7786Return True if all characters in S are alphabetic\n\static char isalpha__doc__[] = "S.isalpha() -> bool\n\nReturn True if all characters in S are alphabetic\nand there is at least one character in S, False otherwise."
7787and there is at least one character in S, False otherwise.")static char isalpha__doc__[] = "S.isalpha() -> bool\n\nReturn True if all characters in S are alphabetic\nand there is at least one character in S, False otherwise.";
7788
7789static PyObject*
7790unicode_isalpha(PyUnicodeObject *self)
7791{
7792 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7792, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7793 register const Py_UNICODE *e;
7794
7795 /* Shortcut for single character strings */
7796 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7796, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7797 Py_UNICODE_ISALPHA(*p)_PyUnicode_IsAlpha(*p))
7798 return PyBool_FromLong(1);
7799
7800 /* Special case for empty strings */
7801 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7801, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7802 return PyBool_FromLong(0);
7803
7804 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7804, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7805 for (; p < e; p++) {
7806 if (!Py_UNICODE_ISALPHA(*p)_PyUnicode_IsAlpha(*p))
7807 return PyBool_FromLong(0);
7808 }
7809 return PyBool_FromLong(1);
7810}
7811
7812PyDoc_STRVAR(isalnum__doc__,static char isalnum__doc__[] = "S.isalnum() -> bool\n\nReturn True if all characters in S are alphanumeric\nand there is at least one character in S, False otherwise."
7813 "S.isalnum() -> bool\n\static char isalnum__doc__[] = "S.isalnum() -> bool\n\nReturn True if all characters in S are alphanumeric\nand there is at least one character in S, False otherwise."
7814\n\static char isalnum__doc__[] = "S.isalnum() -> bool\n\nReturn True if all characters in S are alphanumeric\nand there is at least one character in S, False otherwise."
7815Return True if all characters in S are alphanumeric\n\static char isalnum__doc__[] = "S.isalnum() -> bool\n\nReturn True if all characters in S are alphanumeric\nand there is at least one character in S, False otherwise."
7816and there is at least one character in S, False otherwise.")static char isalnum__doc__[] = "S.isalnum() -> bool\n\nReturn True if all characters in S are alphanumeric\nand there is at least one character in S, False otherwise.";
7817
7818static PyObject*
7819unicode_isalnum(PyUnicodeObject *self)
7820{
7821 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7821, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7822 register const Py_UNICODE *e;
7823
7824 /* Shortcut for single character strings */
7825 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7825, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7826 Py_UNICODE_ISALNUM(*p)(_PyUnicode_IsAlpha(*p) || _PyUnicode_IsDecimalDigit(*p) || _PyUnicode_IsDigit
(*p) || _PyUnicode_IsNumeric(*p))
)
7827 return PyBool_FromLong(1);
7828
7829 /* Special case for empty strings */
7830 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7830, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7831 return PyBool_FromLong(0);
7832
7833 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7833, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7834 for (; p < e; p++) {
7835 if (!Py_UNICODE_ISALNUM(*p)(_PyUnicode_IsAlpha(*p) || _PyUnicode_IsDecimalDigit(*p) || _PyUnicode_IsDigit
(*p) || _PyUnicode_IsNumeric(*p))
)
7836 return PyBool_FromLong(0);
7837 }
7838 return PyBool_FromLong(1);
7839}
7840
7841PyDoc_STRVAR(isdecimal__doc__,static char isdecimal__doc__[] = "S.isdecimal() -> bool\n\nReturn True if there are only decimal characters in S,\nFalse otherwise."
7842 "S.isdecimal() -> bool\n\static char isdecimal__doc__[] = "S.isdecimal() -> bool\n\nReturn True if there are only decimal characters in S,\nFalse otherwise."
7843\n\static char isdecimal__doc__[] = "S.isdecimal() -> bool\n\nReturn True if there are only decimal characters in S,\nFalse otherwise."
7844Return True if there are only decimal characters in S,\n\static char isdecimal__doc__[] = "S.isdecimal() -> bool\n\nReturn True if there are only decimal characters in S,\nFalse otherwise."
7845False otherwise.")static char isdecimal__doc__[] = "S.isdecimal() -> bool\n\nReturn True if there are only decimal characters in S,\nFalse otherwise.";
7846
7847static PyObject*
7848unicode_isdecimal(PyUnicodeObject *self)
7849{
7850 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7850, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7851 register const Py_UNICODE *e;
7852
7853 /* Shortcut for single character strings */
7854 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7854, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7855 Py_UNICODE_ISDECIMAL(*p)_PyUnicode_IsDecimalDigit(*p))
7856 return PyBool_FromLong(1);
7857
7858 /* Special case for empty strings */
7859 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7859, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7860 return PyBool_FromLong(0);
7861
7862 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7862, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7863 for (; p < e; p++) {
7864 if (!Py_UNICODE_ISDECIMAL(*p)_PyUnicode_IsDecimalDigit(*p))
7865 return PyBool_FromLong(0);
7866 }
7867 return PyBool_FromLong(1);
7868}
7869
7870PyDoc_STRVAR(isdigit__doc__,static char isdigit__doc__[] = "S.isdigit() -> bool\n\nReturn True if all characters in S are digits\nand there is at least one character in S, False otherwise."
7871 "S.isdigit() -> bool\n\static char isdigit__doc__[] = "S.isdigit() -> bool\n\nReturn True if all characters in S are digits\nand there is at least one character in S, False otherwise."
7872\n\static char isdigit__doc__[] = "S.isdigit() -> bool\n\nReturn True if all characters in S are digits\nand there is at least one character in S, False otherwise."
7873Return True if all characters in S are digits\n\static char isdigit__doc__[] = "S.isdigit() -> bool\n\nReturn True if all characters in S are digits\nand there is at least one character in S, False otherwise."
7874and there is at least one character in S, False otherwise.")static char isdigit__doc__[] = "S.isdigit() -> bool\n\nReturn True if all characters in S are digits\nand there is at least one character in S, False otherwise.";
7875
7876static PyObject*
7877unicode_isdigit(PyUnicodeObject *self)
7878{
7879 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7879, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7880 register const Py_UNICODE *e;
7881
7882 /* Shortcut for single character strings */
7883 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7883, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7884 Py_UNICODE_ISDIGIT(*p)_PyUnicode_IsDigit(*p))
7885 return PyBool_FromLong(1);
7886
7887 /* Special case for empty strings */
7888 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7888, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7889 return PyBool_FromLong(0);
7890
7891 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7891, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7892 for (; p < e; p++) {
7893 if (!Py_UNICODE_ISDIGIT(*p)_PyUnicode_IsDigit(*p))
7894 return PyBool_FromLong(0);
7895 }
7896 return PyBool_FromLong(1);
7897}
7898
7899PyDoc_STRVAR(isnumeric__doc__,static char isnumeric__doc__[] = "S.isnumeric() -> bool\n\nReturn True if there are only numeric characters in S,\nFalse otherwise."
7900 "S.isnumeric() -> bool\n\static char isnumeric__doc__[] = "S.isnumeric() -> bool\n\nReturn True if there are only numeric characters in S,\nFalse otherwise."
7901\n\static char isnumeric__doc__[] = "S.isnumeric() -> bool\n\nReturn True if there are only numeric characters in S,\nFalse otherwise."
7902Return True if there are only numeric characters in S,\n\static char isnumeric__doc__[] = "S.isnumeric() -> bool\n\nReturn True if there are only numeric characters in S,\nFalse otherwise."
7903False otherwise.")static char isnumeric__doc__[] = "S.isnumeric() -> bool\n\nReturn True if there are only numeric characters in S,\nFalse otherwise.";
7904
7905static PyObject*
7906unicode_isnumeric(PyUnicodeObject *self)
7907{
7908 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7908, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7909 register const Py_UNICODE *e;
7910
7911 /* Shortcut for single character strings */
7912 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7912, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 &&
7913 Py_UNICODE_ISNUMERIC(*p)_PyUnicode_IsNumeric(*p))
7914 return PyBool_FromLong(1);
7915
7916 /* Special case for empty strings */
7917 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7917, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7918 return PyBool_FromLong(0);
7919
7920 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7920, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7921 for (; p < e; p++) {
7922 if (!Py_UNICODE_ISNUMERIC(*p)_PyUnicode_IsNumeric(*p))
7923 return PyBool_FromLong(0);
7924 }
7925 return PyBool_FromLong(1);
7926}
7927
7928int
7929PyUnicode_IsIdentifierPyUnicodeUCS2_IsIdentifier(PyObject *self)
7930{
7931 register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self)((__builtin_expect(!(((((((PyObject*)((PyUnicodeObject*)self)
)->ob_type))->tp_flags & ((1L<<28))) != 0)), 0
) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 7931, "PyUnicode_Check((PyUnicodeObject*)self)"
) : (void)0),(((PyUnicodeObject *)((PyUnicodeObject*)self))->
str))
;
7932 register const Py_UNICODE *e;
7933
7934 /* Special case for empty strings */
7935 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7935, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 0)
7936 return 0;
7937
7938 /* PEP 3131 says that the first character must be in
7939 XID_Start and subsequent characters in XID_Continue,
7940 and for the ASCII range, the 2.x rules apply (i.e
7941 start with letters and underscore, continue with
7942 letters, digits, underscore). However, given the current
7943 definition of XID_Start and XID_Continue, it is sufficient
7944 to check just for these, except that _ must be allowed
7945 as starting an identifier. */
7946 if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
7947 return 0;
7948
7949 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7949, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7950 for (p++; p < e; p++) {
7951 if (!_PyUnicode_IsXidContinue(*p))
7952 return 0;
7953 }
7954 return 1;
7955}
7956
7957PyDoc_STRVAR(isidentifier__doc__,static char isidentifier__doc__[] = "S.isidentifier() -> bool\n\nReturn True if S is a valid identifier according\nto the language definition."
7958 "S.isidentifier() -> bool\n\static char isidentifier__doc__[] = "S.isidentifier() -> bool\n\nReturn True if S is a valid identifier according\nto the language definition."
7959\n\static char isidentifier__doc__[] = "S.isidentifier() -> bool\n\nReturn True if S is a valid identifier according\nto the language definition."
7960Return True if S is a valid identifier according\n\static char isidentifier__doc__[] = "S.isidentifier() -> bool\n\nReturn True if S is a valid identifier according\nto the language definition."
7961to the language definition.")static char isidentifier__doc__[] = "S.isidentifier() -> bool\n\nReturn True if S is a valid identifier according\nto the language definition.";
7962
7963static PyObject*
7964unicode_isidentifier(PyObject *self)
7965{
7966 return PyBool_FromLong(PyUnicode_IsIdentifierPyUnicodeUCS2_IsIdentifier(self));
7967}
7968
7969PyDoc_STRVAR(isprintable__doc__,static char isprintable__doc__[] = "S.isprintable() -> bool\n\nReturn True if all characters in S are considered\nprintable in repr() or S is empty, False otherwise."
7970 "S.isprintable() -> bool\n\static char isprintable__doc__[] = "S.isprintable() -> bool\n\nReturn True if all characters in S are considered\nprintable in repr() or S is empty, False otherwise."
7971\n\static char isprintable__doc__[] = "S.isprintable() -> bool\n\nReturn True if all characters in S are considered\nprintable in repr() or S is empty, False otherwise."
7972Return True if all characters in S are considered\n\static char isprintable__doc__[] = "S.isprintable() -> bool\n\nReturn True if all characters in S are considered\nprintable in repr() or S is empty, False otherwise."
7973printable in repr() or S is empty, False otherwise.")static char isprintable__doc__[] = "S.isprintable() -> bool\n\nReturn True if all characters in S are considered\nprintable in repr() or S is empty, False otherwise.";
7974
7975static PyObject*
7976unicode_isprintable(PyObject *self)
7977{
7978 register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7978, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
7979 register const Py_UNICODE *e;
7980
7981 /* Shortcut for single character strings */
7982 if (PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7982, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
== 1 && Py_UNICODE_ISPRINTABLE(*p)_PyUnicode_IsPrintable(*p)) {
7983 Py_RETURN_TRUEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_TrueStruct
)))->ob_refcnt++), ((PyObject *) &_Py_TrueStruct)
;
7984 }
7985
7986 e = p + PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 7986, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
7987 for (; p < e; p++) {
7988 if (!Py_UNICODE_ISPRINTABLE(*p)_PyUnicode_IsPrintable(*p)) {
7989 Py_RETURN_FALSEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_FalseStruct
)))->ob_refcnt++), ((PyObject *) &_Py_FalseStruct)
;
7990 }
7991 }
7992 Py_RETURN_TRUEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_TrueStruct
)))->ob_refcnt++), ((PyObject *) &_Py_TrueStruct)
;
7993}
7994
7995PyDoc_STRVAR(join__doc__,static char join__doc__[] = "S.join(iterable) -> str\n\nReturn a string which is the concatenation of the strings in the\niterable. The separator between elements is S."
7996 "S.join(iterable) -> str\n\static char join__doc__[] = "S.join(iterable) -> str\n\nReturn a string which is the concatenation of the strings in the\niterable. The separator between elements is S."
7997\n\static char join__doc__[] = "S.join(iterable) -> str\n\nReturn a string which is the concatenation of the strings in the\niterable. The separator between elements is S."
7998Return a string which is the concatenation of the strings in the\n\static char join__doc__[] = "S.join(iterable) -> str\n\nReturn a string which is the concatenation of the strings in the\niterable. The separator between elements is S."
7999iterable. The separator between elements is S.")static char join__doc__[] = "S.join(iterable) -> str\n\nReturn a string which is the concatenation of the strings in the\niterable. The separator between elements is S.";
8000
8001static PyObject*
8002unicode_join(PyObject *self, PyObject *data)
8003{
8004 return PyUnicode_JoinPyUnicodeUCS2_Join(self, data);
8005}
8006
8007static Py_ssize_t
8008unicode_length(PyUnicodeObject *self)
8009{
8010 return self->length;
8011}
8012
8013PyDoc_STRVAR(ljust__doc__,static char ljust__doc__[] = "S.ljust(width[, fillchar]) -> str\n\nReturn S left-justified in a Unicode string of length width. Padding is\ndone using the specified fill character (default is a space)."
8014 "S.ljust(width[, fillchar]) -> str\n\static char ljust__doc__[] = "S.ljust(width[, fillchar]) -> str\n\nReturn S left-justified in a Unicode string of length width. Padding is\ndone using the specified fill character (default is a space)."
8015\n\static char ljust__doc__[] = "S.ljust(width[, fillchar]) -> str\n\nReturn S left-justified in a Unicode string of length width. Padding is\ndone using the specified fill character (default is a space)."
8016Return S left-justified in a Unicode string of length width. Padding is\n\static char ljust__doc__[] = "S.ljust(width[, fillchar]) -> str\n\nReturn S left-justified in a Unicode string of length width. Padding is\ndone using the specified fill character (default is a space)."
8017done using the specified fill character (default is a space).")static char ljust__doc__[] = "S.ljust(width[, fillchar]) -> str\n\nReturn S left-justified in a Unicode string of length width. Padding is\ndone using the specified fill character (default is a space).";
8018
8019static PyObject *
8020unicode_ljust(PyUnicodeObject *self, PyObject *args)
8021{
8022 Py_ssize_t width;
8023 Py_UNICODE fillchar = ' ';
8024
8025 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "n|O&:ljust", &width, convert_uc, &fillchar))
8026 return NULL((void *)0);
8027
8028 if (self->length >= width && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8029 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8030 return (PyObject*) self;
8031 }
8032
8033 return (PyObject*) pad(self, 0, width - self->length, fillchar);
8034}
8035
8036PyDoc_STRVAR(lower__doc__,static char lower__doc__[] = "S.lower() -> str\n\nReturn a copy of the string S converted to lowercase."
8037 "S.lower() -> str\n\static char lower__doc__[] = "S.lower() -> str\n\nReturn a copy of the string S converted to lowercase."
8038\n\static char lower__doc__[] = "S.lower() -> str\n\nReturn a copy of the string S converted to lowercase."
8039Return a copy of the string S converted to lowercase.")static char lower__doc__[] = "S.lower() -> str\n\nReturn a copy of the string S converted to lowercase.";
8040
8041static PyObject*
8042unicode_lower(PyUnicodeObject *self)
8043{
8044 return fixup(self, fixlower);
8045}
8046
8047#define LEFTSTRIP0 0
8048#define RIGHTSTRIP1 1
8049#define BOTHSTRIP2 2
8050
8051/* Arrays indexed by above */
8052static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
8053
8054#define STRIPNAME(i)(stripformat[i]+3) (stripformat[i]+3)
8055
8056/* externally visible for str.strip(unicode) */
8057PyObject *
8058_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
8059{
8060 Py_UNICODE *s = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8060, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
8061 Py_ssize_t len = PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8061, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
8062 Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj)((__builtin_expect(!(((((((PyObject*)(sepobj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8062, "PyUnicode_Check(sepobj)")
: (void)0),(((PyUnicodeObject *)(sepobj))->str))
;
8063 Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj)((__builtin_expect(!(((((((PyObject*)(sepobj))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8063, "PyUnicode_Check(sepobj)")
: (void)0),(((PyUnicodeObject *)(sepobj))->length))
;
8064 Py_ssize_t i, j;
8065
8066 BLOOM_MASKunsigned long sepmask = make_bloom_mask(sep, seplen);
8067
8068 i = 0;
8069 if (striptype != RIGHTSTRIP1) {
8070 while (i < len && BLOOM_MEMBER(sepmask, s[i], sep, seplen)((sepmask & (1UL << ((s[i]) & (64 - 1))))) &&
unicode_member(s[i], sep, seplen)
) {
8071 i++;
8072 }
8073 }
8074
8075 j = len;
8076 if (striptype != LEFTSTRIP0) {
8077 do {
8078 j--;
8079 } while (j >= i && BLOOM_MEMBER(sepmask, s[j], sep, seplen)((sepmask & (1UL << ((s[j]) & (64 - 1))))) &&
unicode_member(s[j], sep, seplen)
);
8080 j++;
8081 }
8082
8083 if (i == 0 && j == len && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8084 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8085 return (PyObject*)self;
8086 }
8087 else
8088 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(s+i, j-i);
8089}
8090
8091
8092static PyObject *
8093do_strip(PyUnicodeObject *self, int striptype)
8094{
8095 Py_UNICODE *s = PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8095, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
;
8096 Py_ssize_t len = PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8096, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
, i, j;
8097
8098 i = 0;
8099 if (striptype != RIGHTSTRIP1) {
8100 while (i < len && Py_UNICODE_ISSPACE(s[i])((s[i]) < 128U ? _Py_ascii_whitespace[(s[i])] : _PyUnicode_IsWhitespace
(s[i]))
) {
8101 i++;
8102 }
8103 }
8104
8105 j = len;
8106 if (striptype != LEFTSTRIP0) {
8107 do {
8108 j--;
8109 } while (j >= i && Py_UNICODE_ISSPACE(s[j])((s[j]) < 128U ? _Py_ascii_whitespace[(s[j])] : _PyUnicode_IsWhitespace
(s[j]))
);
8110 j++;
8111 }
8112
8113 if (i == 0 && j == len && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8114 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8115 return (PyObject*)self;
8116 }
8117 else
8118 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(s+i, j-i);
8119}
8120
8121
8122static PyObject *
8123do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
8124{
8125 PyObject *sep = NULL((void *)0);
8126
8127 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, (char *)stripformat[striptype], &sep))
8128 return NULL((void *)0);
8129
8130 if (sep != NULL((void *)0) && sep != Py_None(&_Py_NoneStruct)) {
8131 if (PyUnicode_Check(sep)((((((PyObject*)(sep))->ob_type))->tp_flags & ((1L<<
28))) != 0)
)
8132 return _PyUnicode_XStrip(self, striptype, sep);
8133 else {
8134 PyErr_Format(PyExc_TypeError,
8135 "%s arg must be None or str",
8136 STRIPNAME(striptype)(stripformat[striptype]+3));
8137 return NULL((void *)0);
8138 }
8139 }
8140
8141 return do_strip(self, striptype);
8142}
8143
8144
8145PyDoc_STRVAR(strip__doc__,static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8146 "S.strip([chars]) -> str\n\static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8147\n\static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8148Return a copy of the string S with leading and trailing\n\static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8149whitespace removed.\n\static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8150If chars is given and not None, remove characters in chars instead.")static char strip__doc__[] = "S.strip([chars]) -> str\n\nReturn a copy of the string S with leading and trailing\nwhitespace removed.\nIf chars is given and not None, remove characters in chars instead.";
8151
8152static PyObject *
8153unicode_strip(PyUnicodeObject *self, PyObject *args)
8154{
8155 if (PyTuple_GET_SIZE(args)(((PyVarObject*)(args))->ob_size) == 0)
8156 return do_strip(self, BOTHSTRIP2); /* Common case */
8157 else
8158 return do_argstrip(self, BOTHSTRIP2, args);
8159}
8160
8161
8162PyDoc_STRVAR(lstrip__doc__,static char lstrip__doc__[] = "S.lstrip([chars]) -> str\n\nReturn a copy of the string S with leading whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8163 "S.lstrip([chars]) -> str\n\static char lstrip__doc__[] = "S.lstrip([chars]) -> str\n\nReturn a copy of the string S with leading whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8164\n\static char lstrip__doc__[] = "S.lstrip([chars]) -> str\n\nReturn a copy of the string S with leading whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8165Return a copy of the string S with leading whitespace removed.\n\static char lstrip__doc__[] = "S.lstrip([chars]) -> str\n\nReturn a copy of the string S with leading whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8166If chars is given and not None, remove characters in chars instead.")static char lstrip__doc__[] = "S.lstrip([chars]) -> str\n\nReturn a copy of the string S with leading whitespace removed.\nIf chars is given and not None, remove characters in chars instead.";
8167
8168static PyObject *
8169unicode_lstrip(PyUnicodeObject *self, PyObject *args)
8170{
8171 if (PyTuple_GET_SIZE(args)(((PyVarObject*)(args))->ob_size) == 0)
8172 return do_strip(self, LEFTSTRIP0); /* Common case */
8173 else
8174 return do_argstrip(self, LEFTSTRIP0, args);
8175}
8176
8177
8178PyDoc_STRVAR(rstrip__doc__,static char rstrip__doc__[] = "S.rstrip([chars]) -> str\n\nReturn a copy of the string S with trailing whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8179 "S.rstrip([chars]) -> str\n\static char rstrip__doc__[] = "S.rstrip([chars]) -> str\n\nReturn a copy of the string S with trailing whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8180\n\static char rstrip__doc__[] = "S.rstrip([chars]) -> str\n\nReturn a copy of the string S with trailing whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8181Return a copy of the string S with trailing whitespace removed.\n\static char rstrip__doc__[] = "S.rstrip([chars]) -> str\n\nReturn a copy of the string S with trailing whitespace removed.\nIf chars is given and not None, remove characters in chars instead."
8182If chars is given and not None, remove characters in chars instead.")static char rstrip__doc__[] = "S.rstrip([chars]) -> str\n\nReturn a copy of the string S with trailing whitespace removed.\nIf chars is given and not None, remove characters in chars instead.";
8183
8184static PyObject *
8185unicode_rstrip(PyUnicodeObject *self, PyObject *args)
8186{
8187 if (PyTuple_GET_SIZE(args)(((PyVarObject*)(args))->ob_size) == 0)
8188 return do_strip(self, RIGHTSTRIP1); /* Common case */
8189 else
8190 return do_argstrip(self, RIGHTSTRIP1, args);
8191}
8192
8193
8194static PyObject*
8195unicode_repeat(PyUnicodeObject *str, Py_ssize_t len)
8196{
8197 PyUnicodeObject *u;
8198 Py_UNICODE *p;
8199 Py_ssize_t nchars;
8200 size_t nbytes;
8201
8202 if (len < 1) {
8203 Py_INCREF(unicode_empty)( _Py_RefTotal++ , ((PyObject*)(unicode_empty))->ob_refcnt
++)
;
8204 return (PyObject *)unicode_empty;
8205 }
8206
8207 if (len == 1 && PyUnicode_CheckExact(str)((((PyObject*)(str))->ob_type) == &PyUnicode_Type)) {
8208 /* no repeat, return original string */
8209 Py_INCREF(str)( _Py_RefTotal++ , ((PyObject*)(str))->ob_refcnt++);
8210 return (PyObject*) str;
8211 }
8212
8213 /* ensure # of chars needed doesn't overflow int and # of bytes
8214 * needed doesn't overflow size_t
8215 */
8216 nchars = len * str->length;
8217 if (nchars / len != str->length) {
8218 PyErr_SetString(PyExc_OverflowError,
8219 "repeated string is too long");
8220 return NULL((void *)0);
8221 }
8222 nbytes = (nchars + 1) * sizeof(Py_UNICODE);
8223 if (nbytes / sizeof(Py_UNICODE) != (size_t)(nchars + 1)) {
8224 PyErr_SetString(PyExc_OverflowError,
8225 "repeated string is too long");
8226 return NULL((void *)0);
8227 }
8228 u = _PyUnicode_New(nchars);
8229 if (!u)
8230 return NULL((void *)0);
8231
8232 p = u->str;
8233
8234 if (str->length == 1) {
8235 Py_UNICODE_FILL(p, str->str[0], len)do {Py_ssize_t i_; Py_UNICODE *t_ = (p); Py_UNICODE v_ = (str
->str[0]); for (i_ = 0; i_ < (len); i_++) t_[i_] = v_; }
while (0)
;
8236 } else {
8237 Py_ssize_t done = str->length; /* number of characters copied this far */
8238 Py_UNICODE_COPY(p, str->str, str->length)((__builtin_object_size ((p), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p), (str->str), (str->length)*sizeof(Py_UNICODE), __builtin_object_size
((p), 0)) : __inline_memcpy_chk ((p), (str->str), (str->
length)*sizeof(Py_UNICODE)))
;
8239 while (done < nchars) {
8240 Py_ssize_t n = (done <= nchars-done) ? done : nchars-done;
8241 Py_UNICODE_COPY(p+done, p, n)((__builtin_object_size ((p+done), 0) != (size_t) -1) ? __builtin___memcpy_chk
((p+done), (p), (n)*sizeof(Py_UNICODE), __builtin_object_size
((p+done), 0)) : __inline_memcpy_chk ((p+done), (p), (n)*sizeof
(Py_UNICODE)))
;
8242 done += n;
8243 }
8244 }
8245
8246 return (PyObject*) u;
8247}
8248
8249PyObject *PyUnicode_ReplacePyUnicodeUCS2_Replace(PyObject *obj,
8250 PyObject *subobj,
8251 PyObject *replobj,
8252 Py_ssize_t maxcount)
8253{
8254 PyObject *self;
8255 PyObject *str1;
8256 PyObject *str2;
8257 PyObject *result;
8258
8259 self = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(obj);
8260 if (self == NULL((void *)0))
8261 return NULL((void *)0);
8262 str1 = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(subobj);
8263 if (str1 == NULL((void *)0)) {
8264 Py_DECREF(self)do { if (_Py_RefTotal-- , --((PyObject*)(self))->ob_refcnt
!= 0) { if (((PyObject*)self)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8264, (PyObject *)(self)); } else
_Py_Dealloc((PyObject *)(self)); } while (0)
;
8265 return NULL((void *)0);
8266 }
8267 str2 = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(replobj);
8268 if (str2 == NULL((void *)0)) {
8269 Py_DECREF(self)do { if (_Py_RefTotal-- , --((PyObject*)(self))->ob_refcnt
!= 0) { if (((PyObject*)self)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8269, (PyObject *)(self)); } else
_Py_Dealloc((PyObject *)(self)); } while (0)
;
8270 Py_DECREF(str1)do { if (_Py_RefTotal-- , --((PyObject*)(str1))->ob_refcnt
!= 0) { if (((PyObject*)str1)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8270, (PyObject *)(str1)); } else
_Py_Dealloc((PyObject *)(str1)); } while (0)
;
8271 return NULL((void *)0);
8272 }
8273 result = replace((PyUnicodeObject *)self,
8274 (PyUnicodeObject *)str1,
8275 (PyUnicodeObject *)str2,
8276 maxcount);
8277 Py_DECREF(self)do { if (_Py_RefTotal-- , --((PyObject*)(self))->ob_refcnt
!= 0) { if (((PyObject*)self)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8277, (PyObject *)(self)); } else
_Py_Dealloc((PyObject *)(self)); } while (0)
;
8278 Py_DECREF(str1)do { if (_Py_RefTotal-- , --((PyObject*)(str1))->ob_refcnt
!= 0) { if (((PyObject*)str1)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8278, (PyObject *)(str1)); } else
_Py_Dealloc((PyObject *)(str1)); } while (0)
;
8279 Py_DECREF(str2)do { if (_Py_RefTotal-- , --((PyObject*)(str2))->ob_refcnt
!= 0) { if (((PyObject*)str2)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8279, (PyObject *)(str2)); } else
_Py_Dealloc((PyObject *)(str2)); } while (0)
;
8280 return result;
8281}
8282
8283PyDoc_STRVAR(replace__doc__,static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced."
8284 "S.replace(old, new[, count]) -> str\n\static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced."
8285\n\static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced."
8286Return a copy of S with all occurrences of substring\n\static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced."
8287old replaced by new. If the optional argument count is\n\static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced."
8288given, only the first count occurrences are replaced.")static char replace__doc__[] = "S.replace(old, new[, count]) -> str\n\nReturn a copy of S with all occurrences of substring\nold replaced by new. If the optional argument count is\ngiven, only the first count occurrences are replaced.";
8289
8290static PyObject*
8291unicode_replace(PyUnicodeObject *self, PyObject *args)
8292{
8293 PyUnicodeObject *str1;
8294 PyUnicodeObject *str2;
8295 Py_ssize_t maxcount = -1;
8296 PyObject *result;
8297
8298 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "OO|n:replace", &str1, &str2, &maxcount))
8299 return NULL((void *)0);
8300 str1 = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject((PyObject *)str1);
8301 if (str1 == NULL((void *)0))
8302 return NULL((void *)0);
8303 str2 = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject((PyObject *)str2);
8304 if (str2 == NULL((void *)0)) {
8305 Py_DECREF(str1)do { if (_Py_RefTotal-- , --((PyObject*)(str1))->ob_refcnt
!= 0) { if (((PyObject*)str1)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8305, (PyObject *)(str1)); } else
_Py_Dealloc((PyObject *)(str1)); } while (0)
;
8306 return NULL((void *)0);
8307 }
8308
8309 result = replace(self, str1, str2, maxcount);
8310
8311 Py_DECREF(str1)do { if (_Py_RefTotal-- , --((PyObject*)(str1))->ob_refcnt
!= 0) { if (((PyObject*)str1)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8311, (PyObject *)(str1)); } else
_Py_Dealloc((PyObject *)(str1)); } while (0)
;
8312 Py_DECREF(str2)do { if (_Py_RefTotal-- , --((PyObject*)(str2))->ob_refcnt
!= 0) { if (((PyObject*)str2)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8312, (PyObject *)(str2)); } else
_Py_Dealloc((PyObject *)(str2)); } while (0)
;
8313 return result;
8314}
8315
8316static
8317PyObject *unicode_repr(PyObject *unicode)
8318{
8319 PyObject *repr;
8320 Py_UNICODE *p;
8321 Py_UNICODE *s = PyUnicode_AS_UNICODE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8321, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str))
;
8322 Py_ssize_t size = PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8322, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
;
8323
8324 /* XXX(nnorwitz): rather than over-allocating, it would be
8325 better to choose a different scheme. Perhaps scan the
8326 first N-chars of the string and allocate based on that size.
8327 */
8328 /* Initial allocation is based on the longest-possible unichr
8329 escape.
8330
8331 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
8332 unichr, so in this case it's the longest unichr escape. In
8333 narrow (UTF-16) builds this is five chars per source unichr
8334 since there are two unichrs in the surrogate pair, so in narrow
8335 (UTF-16) builds it's not the longest unichr escape.
8336
8337 In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
8338 so in the narrow (UTF-16) build case it's the longest unichr
8339 escape.
8340 */
8341
8342 repr = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0),
8343 2 /* quotes */
8344#ifdef Py_UNICODE_WIDE
8345 + 10*size
8346#else
8347 + 6*size
8348#endif
8349 + 1);
8350 if (repr == NULL((void *)0))
8351 return NULL((void *)0);
8352
8353 p = PyUnicode_AS_UNICODE(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8353, "PyUnicode_Check(repr)") :
(void)0),(((PyUnicodeObject *)(repr))->str))
;
8354
8355 /* Add quote */
8356 *p++ = (findchar(s, size, '\'') &&
8357 !findchar(s, size, '"')) ? '"' : '\'';
8358 while (size-- > 0) {
8359 Py_UNICODE ch = *s++;
8360
8361 /* Escape quotes and backslashes */
8362 if ((ch == PyUnicode_AS_UNICODE(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8362, "PyUnicode_Check(repr)") :
(void)0),(((PyUnicodeObject *)(repr))->str))
[0]) || (ch == '\\')) {
8363 *p++ = '\\';
8364 *p++ = ch;
8365 continue;
8366 }
8367
8368 /* Map special whitespace to '\t', \n', '\r' */
8369 if (ch == '\t') {
8370 *p++ = '\\';
8371 *p++ = 't';
8372 }
8373 else if (ch == '\n') {
8374 *p++ = '\\';
8375 *p++ = 'n';
8376 }
8377 else if (ch == '\r') {
8378 *p++ = '\\';
8379 *p++ = 'r';
8380 }
8381
8382 /* Map non-printable US ASCII to '\xhh' */
8383 else if (ch < ' ' || ch == 0x7F) {
8384 *p++ = '\\';
8385 *p++ = 'x';
8386 *p++ = hexdigits[(ch >> 4) & 0x000F];
8387 *p++ = hexdigits[ch & 0x000F];
8388 }
8389
8390 /* Copy ASCII characters as-is */
8391 else if (ch < 0x7F) {
8392 *p++ = ch;
8393 }
8394
8395 /* Non-ASCII characters */
8396 else {
8397 Py_UCS4 ucs = ch;
8398
8399#ifndef Py_UNICODE_WIDE
8400 Py_UNICODE ch2 = 0;
8401 /* Get code point from surrogate pair */
8402 if (size > 0) {
8403 ch2 = *s;
8404 if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
8405 && ch2 <= 0xDFFF) {
8406 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF))
8407 + 0x00010000;
8408 s++;
8409 size--;
8410 }
8411 }
8412#endif
8413 /* Map Unicode whitespace and control characters
8414 (categories Z* and C* except ASCII space)
8415 */
8416 if (!Py_UNICODE_ISPRINTABLE(ucs)_PyUnicode_IsPrintable(ucs)) {
8417 /* Map 8-bit characters to '\xhh' */
8418 if (ucs <= 0xff) {
8419 *p++ = '\\';
8420 *p++ = 'x';
8421 *p++ = hexdigits[(ch >> 4) & 0x000F];
8422 *p++ = hexdigits[ch & 0x000F];
8423 }
8424 /* Map 21-bit characters to '\U00xxxxxx' */
8425 else if (ucs >= 0x10000) {
8426 *p++ = '\\';
8427 *p++ = 'U';
8428 *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
8429 *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
8430 *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
8431 *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
8432 *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
8433 *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
8434 *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
8435 *p++ = hexdigits[ucs & 0x0000000F];
8436 }
8437 /* Map 16-bit characters to '\uxxxx' */
8438 else {
8439 *p++ = '\\';
8440 *p++ = 'u';
8441 *p++ = hexdigits[(ucs >> 12) & 0x000F];
8442 *p++ = hexdigits[(ucs >> 8) & 0x000F];
8443 *p++ = hexdigits[(ucs >> 4) & 0x000F];
8444 *p++ = hexdigits[ucs & 0x000F];
8445 }
8446 }
8447 /* Copy characters as-is */
8448 else {
8449 *p++ = ch;
8450#ifndef Py_UNICODE_WIDE
8451 if (ucs >= 0x10000)
8452 *p++ = ch2;
8453#endif
8454 }
8455 }
8456 }
8457 /* Add quote */
8458 *p++ = PyUnicode_AS_UNICODE(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8458, "PyUnicode_Check(repr)") :
(void)0),(((PyUnicodeObject *)(repr))->str))
[0];
8459
8460 *p = '\0';
8461 PyUnicode_ResizePyUnicodeUCS2_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)((__builtin_expect(!(((((((PyObject*)(repr))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8461, "PyUnicode_Check(repr)") :
(void)0),(((PyUnicodeObject *)(repr))->str))
);
8462 return repr;
8463}
8464
8465PyDoc_STRVAR(rfind__doc__,static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8466 "S.rfind(sub[, start[, end]]) -> int\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8467\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8468Return the highest index in S where substring sub is found,\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8469such that sub is contained within s[start:end]. Optional\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8470arguments start and end are interpreted as in slice notation.\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8471\n\static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure."
8472Return -1 on failure.")static char rfind__doc__[] = "S.rfind(sub[, start[, end]]) -> int\n\nReturn the highest index in S where substring sub is found,\nsuch that sub is contained within s[start:end]. Optional\narguments start and end are interpreted as in slice notation.\n\nReturn -1 on failure.";
8473
8474static PyObject *
8475unicode_rfind(PyUnicodeObject *self, PyObject *args)
8476{
8477 PyObject *substring;
8478 Py_ssize_t start;
8479 Py_ssize_t end;
8480 Py_ssize_t result;
8481
8482 if (!_ParseTupleFinds(args, &substring, &start, &end))
8483 return NULL((void *)0);
8484
8485 result = stringlib_rfind_slice(
8486 PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8486, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
, PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8486, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
,
8487 PyUnicode_AS_UNICODE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8487, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->str))
, PyUnicode_GET_SIZE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8487, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->length))
,
8488 start, end
8489 );
8490
8491 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8491, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
8492
8493 return PyLong_FromSsize_t(result);
8494}
8495
8496PyDoc_STRVAR(rindex__doc__,static char rindex__doc__[] = "S.rindex(sub[, start[, end]]) -> int\n\nLike S.rfind() but raise ValueError when the substring is not found."
8497 "S.rindex(sub[, start[, end]]) -> int\n\static char rindex__doc__[] = "S.rindex(sub[, start[, end]]) -> int\n\nLike S.rfind() but raise ValueError when the substring is not found."
8498\n\static char rindex__doc__[] = "S.rindex(sub[, start[, end]]) -> int\n\nLike S.rfind() but raise ValueError when the substring is not found."
8499Like S.rfind() but raise ValueError when the substring is not found.")static char rindex__doc__[] = "S.rindex(sub[, start[, end]]) -> int\n\nLike S.rfind() but raise ValueError when the substring is not found.";
8500
8501static PyObject *
8502unicode_rindex(PyUnicodeObject *self, PyObject *args)
8503{
8504 PyObject *substring;
8505 Py_ssize_t start;
8506 Py_ssize_t end;
8507 Py_ssize_t result;
8508
8509 if (!_ParseTupleFinds(args, &substring, &start, &end))
8510 return NULL((void *)0);
8511
8512 result = stringlib_rfind_slice(
8513 PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8513, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
, PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8513, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
,
8514 PyUnicode_AS_UNICODE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8514, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->str))
, PyUnicode_GET_SIZE(substring)((__builtin_expect(!(((((((PyObject*)(substring))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8514, "PyUnicode_Check(substring)"
) : (void)0),(((PyUnicodeObject *)(substring))->length))
,
8515 start, end
8516 );
8517
8518 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8518, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
8519
8520 if (result < 0) {
8521 PyErr_SetString(PyExc_ValueError, "substring not found");
8522 return NULL((void *)0);
8523 }
8524 return PyLong_FromSsize_t(result);
8525}
8526
8527PyDoc_STRVAR(rjust__doc__,static char rjust__doc__[] = "S.rjust(width[, fillchar]) -> str\n\nReturn S right-justified in a string of length width. Padding is\ndone using the specified fill character (default is a space)."
8528 "S.rjust(width[, fillchar]) -> str\n\static char rjust__doc__[] = "S.rjust(width[, fillchar]) -> str\n\nReturn S right-justified in a string of length width. Padding is\ndone using the specified fill character (default is a space)."
8529\n\static char rjust__doc__[] = "S.rjust(width[, fillchar]) -> str\n\nReturn S right-justified in a string of length width. Padding is\ndone using the specified fill character (default is a space)."
8530Return S right-justified in a string of length width. Padding is\n\static char rjust__doc__[] = "S.rjust(width[, fillchar]) -> str\n\nReturn S right-justified in a string of length width. Padding is\ndone using the specified fill character (default is a space)."
8531done using the specified fill character (default is a space).")static char rjust__doc__[] = "S.rjust(width[, fillchar]) -> str\n\nReturn S right-justified in a string of length width. Padding is\ndone using the specified fill character (default is a space).";
8532
8533static PyObject *
8534unicode_rjust(PyUnicodeObject *self, PyObject *args)
8535{
8536 Py_ssize_t width;
8537 Py_UNICODE fillchar = ' ';
8538
8539 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "n|O&:rjust", &width, convert_uc, &fillchar))
8540 return NULL((void *)0);
8541
8542 if (self->length >= width && PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8543 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8544 return (PyObject*) self;
8545 }
8546
8547 return (PyObject*) pad(self, width - self->length, 0, fillchar);
8548}
8549
8550PyObject *PyUnicode_SplitPyUnicodeUCS2_Split(PyObject *s,
8551 PyObject *sep,
8552 Py_ssize_t maxsplit)
8553{
8554 PyObject *result;
8555
8556 s = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(s);
8557 if (s == NULL((void *)0))
8558 return NULL((void *)0);
8559 if (sep != NULL((void *)0)) {
8560 sep = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(sep);
8561 if (sep == NULL((void *)0)) {
8562 Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt !=
0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8562, (PyObject *)(s)); } else _Py_Dealloc
((PyObject *)(s)); } while (0)
;
8563 return NULL((void *)0);
8564 }
8565 }
8566
8567 result = split((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
8568
8569 Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt !=
0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8569, (PyObject *)(s)); } else _Py_Dealloc
((PyObject *)(s)); } while (0)
;
8570 Py_XDECREF(sep)do { if ((sep) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(sep))->ob_refcnt != 0) { if (((PyObject*
)sep)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 8570, (PyObject *)(sep)); } else _Py_Dealloc((PyObject *)(sep
)); } while (0); } while (0)
;
8571 return result;
8572}
8573
8574PyDoc_STRVAR(split__doc__,static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8575 "S.split([sep[, maxsplit]]) -> list of strings\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8576\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8577Return a list of the words in S, using sep as the\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8578delimiter string. If maxsplit is given, at most maxsplit\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8579splits are done. If sep is not specified or is None, any\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8580whitespace string is a separator and empty strings are\n\static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result."
8581removed from the result.")static char split__doc__[] = "S.split([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified or is None, any\nwhitespace string is a separator and empty strings are\nremoved from the result.";
8582
8583static PyObject*
8584unicode_split(PyUnicodeObject *self, PyObject *args)
8585{
8586 PyObject *substring = Py_None(&_Py_NoneStruct);
8587 Py_ssize_t maxcount = -1;
8588
8589 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "|On:split", &substring, &maxcount))
8590 return NULL((void *)0);
8591
8592 if (substring == Py_None(&_Py_NoneStruct))
8593 return split(self, NULL((void *)0), maxcount);
8594 else if (PyUnicode_Check(substring)((((((PyObject*)(substring))->ob_type))->tp_flags &
((1L<<28))) != 0)
)
8595 return split(self, (PyUnicodeObject *)substring, maxcount);
8596 else
8597 return PyUnicode_SplitPyUnicodeUCS2_Split((PyObject *)self, substring, maxcount);
8598}
8599
8600PyObject *
8601PyUnicode_PartitionPyUnicodeUCS2_Partition(PyObject *str_in, PyObject *sep_in)
8602{
8603 PyObject* str_obj;
8604 PyObject* sep_obj;
8605 PyObject* out;
8606
8607 str_obj = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str_in);
8608 if (!str_obj)
8609 return NULL((void *)0);
8610 sep_obj = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(sep_in);
8611 if (!sep_obj) {
8612 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8612, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
8613 return NULL((void *)0);
8614 }
8615
8616 out = stringlib_partition(
8617 str_obj, PyUnicode_AS_UNICODE(str_obj)((__builtin_expect(!(((((((PyObject*)(str_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8617, "PyUnicode_Check(str_obj)"
) : (void)0),(((PyUnicodeObject *)(str_obj))->str))
, PyUnicode_GET_SIZE(str_obj)((__builtin_expect(!(((((((PyObject*)(str_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8617, "PyUnicode_Check(str_obj)"
) : (void)0),(((PyUnicodeObject *)(str_obj))->length))
,
8618 sep_obj, PyUnicode_AS_UNICODE(sep_obj)((__builtin_expect(!(((((((PyObject*)(sep_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8618, "PyUnicode_Check(sep_obj)"
) : (void)0),(((PyUnicodeObject *)(sep_obj))->str))
, PyUnicode_GET_SIZE(sep_obj)((__builtin_expect(!(((((((PyObject*)(sep_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8618, "PyUnicode_Check(sep_obj)"
) : (void)0),(((PyUnicodeObject *)(sep_obj))->length))
8619 );
8620
8621 Py_DECREF(sep_obj)do { if (_Py_RefTotal-- , --((PyObject*)(sep_obj))->ob_refcnt
!= 0) { if (((PyObject*)sep_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8621, (PyObject *)(sep_obj)); } else
_Py_Dealloc((PyObject *)(sep_obj)); } while (0)
;
8622 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8622, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
8623
8624 return out;
8625}
8626
8627
8628PyObject *
8629PyUnicode_RPartitionPyUnicodeUCS2_RPartition(PyObject *str_in, PyObject *sep_in)
8630{
8631 PyObject* str_obj;
8632 PyObject* sep_obj;
8633 PyObject* out;
8634
8635 str_obj = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(str_in);
8636 if (!str_obj)
8637 return NULL((void *)0);
8638 sep_obj = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(sep_in);
8639 if (!sep_obj) {
8640 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8640, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
8641 return NULL((void *)0);
8642 }
8643
8644 out = stringlib_rpartition(
8645 str_obj, PyUnicode_AS_UNICODE(str_obj)((__builtin_expect(!(((((((PyObject*)(str_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8645, "PyUnicode_Check(str_obj)"
) : (void)0),(((PyUnicodeObject *)(str_obj))->str))
, PyUnicode_GET_SIZE(str_obj)((__builtin_expect(!(((((((PyObject*)(str_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8645, "PyUnicode_Check(str_obj)"
) : (void)0),(((PyUnicodeObject *)(str_obj))->length))
,
8646 sep_obj, PyUnicode_AS_UNICODE(sep_obj)((__builtin_expect(!(((((((PyObject*)(sep_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8646, "PyUnicode_Check(sep_obj)"
) : (void)0),(((PyUnicodeObject *)(sep_obj))->str))
, PyUnicode_GET_SIZE(sep_obj)((__builtin_expect(!(((((((PyObject*)(sep_obj))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 8646, "PyUnicode_Check(sep_obj)"
) : (void)0),(((PyUnicodeObject *)(sep_obj))->length))
8647 );
8648
8649 Py_DECREF(sep_obj)do { if (_Py_RefTotal-- , --((PyObject*)(sep_obj))->ob_refcnt
!= 0) { if (((PyObject*)sep_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8649, (PyObject *)(sep_obj)); } else
_Py_Dealloc((PyObject *)(sep_obj)); } while (0)
;
8650 Py_DECREF(str_obj)do { if (_Py_RefTotal-- , --((PyObject*)(str_obj))->ob_refcnt
!= 0) { if (((PyObject*)str_obj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8650, (PyObject *)(str_obj)); } else
_Py_Dealloc((PyObject *)(str_obj)); } while (0)
;
8651
8652 return out;
8653}
8654
8655PyDoc_STRVAR(partition__doc__,static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings."
8656 "S.partition(sep) -> (head, sep, tail)\n\static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings."
8657\n\static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings."
8658Search for the separator sep in S, and return the part before it,\n\static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings."
8659the separator itself, and the part after it. If the separator is not\n\static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings."
8660found, return S and two empty strings.")static char partition__doc__[] = "S.partition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, and return the part before it,\nthe separator itself, and the part after it. If the separator is not\nfound, return S and two empty strings.";
8661
8662static PyObject*
8663unicode_partition(PyUnicodeObject *self, PyObject *separator)
8664{
8665 return PyUnicode_PartitionPyUnicodeUCS2_Partition((PyObject *)self, separator);
8666}
8667
8668PyDoc_STRVAR(rpartition__doc__,static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S."
8669 "S.rpartition(sep) -> (head, sep, tail)\n\static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S."
8670\n\static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S."
8671Search for the separator sep in S, starting at the end of S, and return\n\static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S."
8672the part before it, the separator itself, and the part after it. If the\n\static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S."
8673separator is not found, return two empty strings and S.")static char rpartition__doc__[] = "S.rpartition(sep) -> (head, sep, tail)\n\nSearch for the separator sep in S, starting at the end of S, and return\nthe part before it, the separator itself, and the part after it. If the\nseparator is not found, return two empty strings and S.";
8674
8675static PyObject*
8676unicode_rpartition(PyUnicodeObject *self, PyObject *separator)
8677{
8678 return PyUnicode_RPartitionPyUnicodeUCS2_RPartition((PyObject *)self, separator);
8679}
8680
8681PyObject *PyUnicode_RSplitPyUnicodeUCS2_RSplit(PyObject *s,
8682 PyObject *sep,
8683 Py_ssize_t maxsplit)
8684{
8685 PyObject *result;
8686
8687 s = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(s);
8688 if (s == NULL((void *)0))
8689 return NULL((void *)0);
8690 if (sep != NULL((void *)0)) {
8691 sep = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(sep);
8692 if (sep == NULL((void *)0)) {
8693 Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt !=
0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8693, (PyObject *)(s)); } else _Py_Dealloc
((PyObject *)(s)); } while (0)
;
8694 return NULL((void *)0);
8695 }
8696 }
8697
8698 result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
8699
8700 Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt !=
0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8700, (PyObject *)(s)); } else _Py_Dealloc
((PyObject *)(s)); } while (0)
;
8701 Py_XDECREF(sep)do { if ((sep) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(sep))->ob_refcnt != 0) { if (((PyObject*
)sep)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 8701, (PyObject *)(sep)); } else _Py_Dealloc((PyObject *)(sep
)); } while (0); } while (0)
;
8702 return result;
8703}
8704
8705PyDoc_STRVAR(rsplit__doc__,static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8706 "S.rsplit([sep[, maxsplit]]) -> list of strings\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8707\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8708Return a list of the words in S, using sep as the\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8709delimiter string, starting at the end of the string and\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8710working to the front. If maxsplit is given, at most maxsplit\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8711splits are done. If sep is not specified, any whitespace string\n\static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator."
8712is a separator.")static char rsplit__doc__[] = "S.rsplit([sep[, maxsplit]]) -> list of strings\n\nReturn a list of the words in S, using sep as the\ndelimiter string, starting at the end of the string and\nworking to the front. If maxsplit is given, at most maxsplit\nsplits are done. If sep is not specified, any whitespace string\nis a separator.";
8713
8714static PyObject*
8715unicode_rsplit(PyUnicodeObject *self, PyObject *args)
8716{
8717 PyObject *substring = Py_None(&_Py_NoneStruct);
8718 Py_ssize_t maxcount = -1;
8719
8720 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "|On:rsplit", &substring, &maxcount))
8721 return NULL((void *)0);
8722
8723 if (substring == Py_None(&_Py_NoneStruct))
8724 return rsplit(self, NULL((void *)0), maxcount);
8725 else if (PyUnicode_Check(substring)((((((PyObject*)(substring))->ob_type))->tp_flags &
((1L<<28))) != 0)
)
8726 return rsplit(self, (PyUnicodeObject *)substring, maxcount);
8727 else
8728 return PyUnicode_RSplitPyUnicodeUCS2_RSplit((PyObject *)self, substring, maxcount);
8729}
8730
8731PyDoc_STRVAR(splitlines__doc__,static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true."
8732 "S.splitlines([keepends]) -> list of strings\n\static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true."
8733\n\static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true."
8734Return a list of the lines in S, breaking at line boundaries.\n\static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true."
8735Line breaks are not included in the resulting list unless keepends\n\static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true."
8736is given and true.")static char splitlines__doc__[] = "S.splitlines([keepends]) -> list of strings\n\nReturn a list of the lines in S, breaking at line boundaries.\nLine breaks are not included in the resulting list unless keepends\nis given and true.";
8737
8738static PyObject*
8739unicode_splitlines(PyUnicodeObject *self, PyObject *args)
8740{
8741 int keepends = 0;
8742
8743 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "|i:splitlines", &keepends))
8744 return NULL((void *)0);
8745
8746 return PyUnicode_SplitlinesPyUnicodeUCS2_Splitlines((PyObject *)self, keepends);
8747}
8748
8749static
8750PyObject *unicode_str(PyObject *self)
8751{
8752 if (PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8753 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8754 return self;
8755 } else
8756 /* Subtype -- return genuine unicode string with the same value. */
8757 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8757, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
,
8758 PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8758, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
);
8759}
8760
8761PyDoc_STRVAR(swapcase__doc__,static char swapcase__doc__[] = "S.swapcase() -> str\n\nReturn a copy of S with uppercase characters converted to lowercase\nand vice versa."
8762 "S.swapcase() -> str\n\static char swapcase__doc__[] = "S.swapcase() -> str\n\nReturn a copy of S with uppercase characters converted to lowercase\nand vice versa."
8763\n\static char swapcase__doc__[] = "S.swapcase() -> str\n\nReturn a copy of S with uppercase characters converted to lowercase\nand vice versa."
8764Return a copy of S with uppercase characters converted to lowercase\n\static char swapcase__doc__[] = "S.swapcase() -> str\n\nReturn a copy of S with uppercase characters converted to lowercase\nand vice versa."
8765and vice versa.")static char swapcase__doc__[] = "S.swapcase() -> str\n\nReturn a copy of S with uppercase characters converted to lowercase\nand vice versa.";
8766
8767static PyObject*
8768unicode_swapcase(PyUnicodeObject *self)
8769{
8770 return fixup(self, fixswapcase);
8771}
8772
8773PyDoc_STRVAR(maketrans__doc__,static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8774 "str.maketrans(x[, y[, z]]) -> dict (static method)\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8775\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8776Return a translation table usable for str.translate().\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8777If there is only one argument, it must be a dictionary mapping Unicode\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8778ordinals (integers) or characters to Unicode ordinals, strings or None.\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8779Character keys will be then converted to ordinals.\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8780If there are two arguments, they must be strings of equal length, and\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8781in the resulting dictionary, each character in x will be mapped to the\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8782character at the same position in y. If there is a third argument, it\n\static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result."
8783must be a string, whose characters will be mapped to None in the result.")static char maketrans__doc__[] = "str.maketrans(x[, y[, z]]) -> dict (static method)\n\nReturn a translation table usable for str.translate().\nIf there is only one argument, it must be a dictionary mapping Unicode\nordinals (integers) or characters to Unicode ordinals, strings or None.\nCharacter keys will be then converted to ordinals.\nIf there are two arguments, they must be strings of equal length, and\nin the resulting dictionary, each character in x will be mapped to the\ncharacter at the same position in y. If there is a third argument, it\nmust be a string, whose characters will be mapped to None in the result.";
8784
8785static PyObject*
8786unicode_maketrans(PyUnicodeObject *null, PyObject *args)
8787{
8788 PyObject *x, *y = NULL((void *)0), *z = NULL((void *)0);
8789 PyObject *new = NULL((void *)0), *key, *value;
8790 Py_ssize_t i = 0;
8791 int res;
8792
8793 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "O|UU:maketrans", &x, &y, &z))
8794 return NULL((void *)0);
8795 new = PyDict_New();
8796 if (!new)
8797 return NULL((void *)0);
8798 if (y != NULL((void *)0)) {
8799 /* x must be a string too, of equal length */
8800 Py_ssize_t ylen = PyUnicode_GET_SIZE(y)((__builtin_expect(!(((((((PyObject*)(y))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8800, "PyUnicode_Check(y)") : (void)0),(((PyUnicodeObject *
)(y))->length))
;
8801 if (!PyUnicode_Check(x)((((((PyObject*)(x))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
8802 PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
8803 "be a string if there is a second argument");
8804 goto err;
8805 }
8806 if (PyUnicode_GET_SIZE(x)((__builtin_expect(!(((((((PyObject*)(x))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8806, "PyUnicode_Check(x)") : (void)0),(((PyUnicodeObject *
)(x))->length))
!= ylen) {
8807 PyErr_SetString(PyExc_ValueError, "the first two maketrans "
8808 "arguments must have equal length");
8809 goto err;
8810 }
8811 /* create entries for translating chars in x to those in y */
8812 for (i = 0; i < PyUnicode_GET_SIZE(x)((__builtin_expect(!(((((((PyObject*)(x))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8812, "PyUnicode_Check(x)") : (void)0),(((PyUnicodeObject *
)(x))->length))
; i++) {
8813 key = PyLong_FromLong(PyUnicode_AS_UNICODE(x)((__builtin_expect(!(((((((PyObject*)(x))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8813, "PyUnicode_Check(x)") : (void)0),(((PyUnicodeObject *
)(x))->str))
[i]);
8814 value = PyLong_FromLong(PyUnicode_AS_UNICODE(y)((__builtin_expect(!(((((((PyObject*)(y))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8814, "PyUnicode_Check(y)") : (void)0),(((PyUnicodeObject *
)(y))->str))
[i]);
8815 if (!key || !value)
8816 goto err;
8817 res = PyDict_SetItem(new, key, value);
8818 Py_DECREF(key)do { if (_Py_RefTotal-- , --((PyObject*)(key))->ob_refcnt !=
0) { if (((PyObject*)key)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8818, (PyObject *)(key)); } else _Py_Dealloc
((PyObject *)(key)); } while (0)
;
8819 Py_DECREF(value)do { if (_Py_RefTotal-- , --((PyObject*)(value))->ob_refcnt
!= 0) { if (((PyObject*)value)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8819, (PyObject *)(value)); } else
_Py_Dealloc((PyObject *)(value)); } while (0)
;
8820 if (res < 0)
8821 goto err;
8822 }
8823 /* create entries for deleting chars in z */
8824 if (z != NULL((void *)0)) {
8825 for (i = 0; i < PyUnicode_GET_SIZE(z)((__builtin_expect(!(((((((PyObject*)(z))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8825, "PyUnicode_Check(z)") : (void)0),(((PyUnicodeObject *
)(z))->length))
; i++) {
8826 key = PyLong_FromLong(PyUnicode_AS_UNICODE(z)((__builtin_expect(!(((((((PyObject*)(z))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 8826, "PyUnicode_Check(z)") : (void)0),(((PyUnicodeObject *
)(z))->str))
[i]);
8827 if (!key)
8828 goto err;
8829 res = PyDict_SetItem(new, key, Py_None(&_Py_NoneStruct));
8830 Py_DECREF(key)do { if (_Py_RefTotal-- , --((PyObject*)(key))->ob_refcnt !=
0) { if (((PyObject*)key)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8830, (PyObject *)(key)); } else _Py_Dealloc
((PyObject *)(key)); } while (0)
;
8831 if (res < 0)
8832 goto err;
8833 }
8834 }
8835 } else {
8836 /* x must be a dict */
8837 if (!PyDict_CheckExact(x)((((PyObject*)(x))->ob_type) == &PyDict_Type)) {
8838 PyErr_SetString(PyExc_TypeError, "if you give only one argument "
8839 "to maketrans it must be a dict");
8840 goto err;
8841 }
8842 /* copy entries into the new dict, converting string keys to int keys */
8843 while (PyDict_Next(x, &i, &key, &value)) {
8844 if (PyUnicode_Check(key)((((((PyObject*)(key))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
8845 /* convert string keys to integer keys */
8846 PyObject *newkey;
8847 if (PyUnicode_GET_SIZE(key)((__builtin_expect(!(((((((PyObject*)(key))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8847, "PyUnicode_Check(key)") : (
void)0),(((PyUnicodeObject *)(key))->length))
!= 1) {
8848 PyErr_SetString(PyExc_ValueError, "string keys in translate "
8849 "table must be of length 1");
8850 goto err;
8851 }
8852 newkey = PyLong_FromLong(PyUnicode_AS_UNICODE(key)((__builtin_expect(!(((((((PyObject*)(key))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8852, "PyUnicode_Check(key)") : (
void)0),(((PyUnicodeObject *)(key))->str))
[0]);
8853 if (!newkey)
8854 goto err;
8855 res = PyDict_SetItem(new, newkey, value);
8856 Py_DECREF(newkey)do { if (_Py_RefTotal-- , --((PyObject*)(newkey))->ob_refcnt
!= 0) { if (((PyObject*)newkey)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8856, (PyObject *)(newkey)); } else
_Py_Dealloc((PyObject *)(newkey)); } while (0)
;
8857 if (res < 0)
8858 goto err;
8859 } else if (PyLong_Check(key)((((((PyObject*)(key))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
8860 /* just keep integer keys */
8861 if (PyDict_SetItem(new, key, value) < 0)
8862 goto err;
8863 } else {
8864 PyErr_SetString(PyExc_TypeError, "keys in translate table must "
8865 "be strings or integers");
8866 goto err;
8867 }
8868 }
8869 }
8870 return new;
8871 err:
8872 Py_DECREF(new)do { if (_Py_RefTotal-- , --((PyObject*)(new))->ob_refcnt !=
0) { if (((PyObject*)new)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8872, (PyObject *)(new)); } else _Py_Dealloc
((PyObject *)(new)); } while (0)
;
8873 return NULL((void *)0);
8874}
8875
8876PyDoc_STRVAR(translate__doc__,static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8877 "S.translate(table) -> str\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8878\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8879Return a copy of the string S, where all characters have been mapped\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8880through the given translation table, which must be a mapping of\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8881Unicode ordinals to Unicode ordinals, strings, or None.\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8882Unmapped characters are left untouched. Characters mapped to None\n\static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted."
8883are deleted.")static char translate__doc__[] = "S.translate(table) -> str\n\nReturn a copy of the string S, where all characters have been mapped\nthrough the given translation table, which must be a mapping of\nUnicode ordinals to Unicode ordinals, strings, or None.\nUnmapped characters are left untouched. Characters mapped to None\nare deleted.";
8884
8885static PyObject*
8886unicode_translate(PyUnicodeObject *self, PyObject *table)
8887{
8888 return PyUnicode_TranslateCharmapPyUnicodeUCS2_TranslateCharmap(self->str, self->length, table, "ignore");
8889}
8890
8891PyDoc_STRVAR(upper__doc__,static char upper__doc__[] = "S.upper() -> str\n\nReturn a copy of S converted to uppercase."
8892 "S.upper() -> str\n\static char upper__doc__[] = "S.upper() -> str\n\nReturn a copy of S converted to uppercase."
8893\n\static char upper__doc__[] = "S.upper() -> str\n\nReturn a copy of S converted to uppercase."
8894Return a copy of S converted to uppercase.")static char upper__doc__[] = "S.upper() -> str\n\nReturn a copy of S converted to uppercase.";
8895
8896static PyObject*
8897unicode_upper(PyUnicodeObject *self)
8898{
8899 return fixup(self, fixupper);
8900}
8901
8902PyDoc_STRVAR(zfill__doc__,static char zfill__doc__[] = "S.zfill(width) -> str\n\nPad a numeric string S with zeros on the left, to fill a field\nof the specified width. The string S is never truncated."
8903 "S.zfill(width) -> str\n\static char zfill__doc__[] = "S.zfill(width) -> str\n\nPad a numeric string S with zeros on the left, to fill a field\nof the specified width. The string S is never truncated."
8904\n\static char zfill__doc__[] = "S.zfill(width) -> str\n\nPad a numeric string S with zeros on the left, to fill a field\nof the specified width. The string S is never truncated."
8905Pad a numeric string S with zeros on the left, to fill a field\n\static char zfill__doc__[] = "S.zfill(width) -> str\n\nPad a numeric string S with zeros on the left, to fill a field\nof the specified width. The string S is never truncated."
8906of the specified width. The string S is never truncated.")static char zfill__doc__[] = "S.zfill(width) -> str\n\nPad a numeric string S with zeros on the left, to fill a field\nof the specified width. The string S is never truncated.";
8907
8908static PyObject *
8909unicode_zfill(PyUnicodeObject *self, PyObject *args)
8910{
8911 Py_ssize_t fill;
8912 PyUnicodeObject *u;
8913
8914 Py_ssize_t width;
8915 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "n:zfill", &width))
8916 return NULL((void *)0);
8917
8918 if (self->length >= width) {
8919 if (PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
8920 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
8921 return (PyObject*) self;
8922 }
8923 else
8924 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(
8925 PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8925, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
,
8926 PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8926, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
8927 );
8928 }
8929
8930 fill = width - self->length;
8931
8932 u = pad(self, fill, 0, '0');
8933
8934 if (u == NULL((void *)0))
8935 return NULL((void *)0);
8936
8937 if (u->str[fill] == '+' || u->str[fill] == '-') {
8938 /* move sign to beginning of string */
8939 u->str[0] = u->str[fill];
8940 u->str[fill] = '0';
8941 }
8942
8943 return (PyObject*) u;
8944}
8945
8946#if 0
8947static PyObject*
8948unicode_freelistsize(PyUnicodeObject *self)
8949{
8950 return PyLong_FromLong(numfree);
8951}
8952
8953static PyObject *
8954unicode__decimal2ascii(PyObject *self)
8955{
8956 return PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8956, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->str))
,
8957 PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 8957, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
);
8958}
8959#endif
8960
8961PyDoc_STRVAR(startswith__doc__,static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8962 "S.startswith(prefix[, start[, end]]) -> bool\n\static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8963\n\static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8964Return True if S starts with the specified prefix, False otherwise.\n\static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8965With optional start, test S beginning at that position.\n\static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8966With optional end, stop comparing S at that position.\n\static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try."
8967prefix can also be a tuple of strings to try.")static char startswith__doc__[] = "S.startswith(prefix[, start[, end]]) -> bool\n\nReturn True if S starts with the specified prefix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nprefix can also be a tuple of strings to try.";
8968
8969static PyObject *
8970unicode_startswith(PyUnicodeObject *self,
8971 PyObject *args)
8972{
8973 PyObject *subobj;
8974 PyUnicodeObject *substring;
8975 Py_ssize_t start = 0;
8976 Py_ssize_t end = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
8977 int result;
8978
8979 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "O|O&O&:startswith", &subobj,
8980 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
8981 return NULL((void *)0);
8982 if (PyTuple_Check(subobj)((((((PyObject*)(subobj))->ob_type))->tp_flags & ((
1L<<26))) != 0)
) {
8983 Py_ssize_t i;
8984 for (i = 0; i < PyTuple_GET_SIZE(subobj)(((PyVarObject*)(subobj))->ob_size); i++) {
8985 substring = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(
8986 PyTuple_GET_ITEM(subobj, i)(((PyTupleObject *)(subobj))->ob_item[i]));
8987 if (substring == NULL((void *)0))
8988 return NULL((void *)0);
8989 result = tailmatch(self, substring, start, end, -1);
8990 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 8990, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
8991 if (result) {
8992 Py_RETURN_TRUEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_TrueStruct
)))->ob_refcnt++), ((PyObject *) &_Py_TrueStruct)
;
8993 }
8994 }
8995 /* nothing matched */
8996 Py_RETURN_FALSEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_FalseStruct
)))->ob_refcnt++), ((PyObject *) &_Py_FalseStruct)
;
8997 }
8998 substring = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(subobj);
8999 if (substring == NULL((void *)0))
9000 return NULL((void *)0);
9001 result = tailmatch(self, substring, start, end, -1);
9002 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9002, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
9003 return PyBool_FromLong(result);
9004}
9005
9006
9007PyDoc_STRVAR(endswith__doc__,static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9008 "S.endswith(suffix[, start[, end]]) -> bool\n\static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9009\n\static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9010Return True if S ends with the specified suffix, False otherwise.\n\static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9011With optional start, test S beginning at that position.\n\static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9012With optional end, stop comparing S at that position.\n\static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try."
9013suffix can also be a tuple of strings to try.")static char endswith__doc__[] = "S.endswith(suffix[, start[, end]]) -> bool\n\nReturn True if S ends with the specified suffix, False otherwise.\nWith optional start, test S beginning at that position.\nWith optional end, stop comparing S at that position.\nsuffix can also be a tuple of strings to try.";
9014
9015static PyObject *
9016unicode_endswith(PyUnicodeObject *self,
9017 PyObject *args)
9018{
9019 PyObject *subobj;
9020 PyUnicodeObject *substring;
9021 Py_ssize_t start = 0;
9022 Py_ssize_t end = PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1));
9023 int result;
9024
9025 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "O|O&O&:endswith", &subobj,
9026 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
9027 return NULL((void *)0);
9028 if (PyTuple_Check(subobj)((((((PyObject*)(subobj))->ob_type))->tp_flags & ((
1L<<26))) != 0)
) {
9029 Py_ssize_t i;
9030 for (i = 0; i < PyTuple_GET_SIZE(subobj)(((PyVarObject*)(subobj))->ob_size); i++) {
9031 substring = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(
9032 PyTuple_GET_ITEM(subobj, i)(((PyTupleObject *)(subobj))->ob_item[i]));
9033 if (substring == NULL((void *)0))
9034 return NULL((void *)0);
9035 result = tailmatch(self, substring, start, end, +1);
9036 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9036, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
9037 if (result) {
9038 Py_RETURN_TRUEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_TrueStruct
)))->ob_refcnt++), ((PyObject *) &_Py_TrueStruct)
;
9039 }
9040 }
9041 Py_RETURN_FALSEreturn ( _Py_RefTotal++ , ((PyObject*)(((PyObject *) &_Py_FalseStruct
)))->ob_refcnt++), ((PyObject *) &_Py_FalseStruct)
;
9042 }
9043 substring = (PyUnicodeObject *)PyUnicode_FromObjectPyUnicodeUCS2_FromObject(subobj);
9044 if (substring == NULL((void *)0))
9045 return NULL((void *)0);
9046
9047 result = tailmatch(self, substring, start, end, +1);
9048 Py_DECREF(substring)do { if (_Py_RefTotal-- , --((PyObject*)(substring))->ob_refcnt
!= 0) { if (((PyObject*)substring)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9048, (PyObject *)(substring)); }
else _Py_Dealloc((PyObject *)(substring)); } while (0)
;
9049 return PyBool_FromLong(result);
9050}
9051
9052#include "stringlib/string_format.h"
9053
9054PyDoc_STRVAR(format__doc__,static char format__doc__[] = "S.format(*args, **kwargs) -> str\n\nReturn a formatted version of S, using substitutions from args and kwargs.\nThe substitutions are identified by braces ('{' and '}')."
9055 "S.format(*args, **kwargs) -> str\n\static char format__doc__[] = "S.format(*args, **kwargs) -> str\n\nReturn a formatted version of S, using substitutions from args and kwargs.\nThe substitutions are identified by braces ('{' and '}')."
9056\n\static char format__doc__[] = "S.format(*args, **kwargs) -> str\n\nReturn a formatted version of S, using substitutions from args and kwargs.\nThe substitutions are identified by braces ('{' and '}')."
9057Return a formatted version of S, using substitutions from args and kwargs.\n\static char format__doc__[] = "S.format(*args, **kwargs) -> str\n\nReturn a formatted version of S, using substitutions from args and kwargs.\nThe substitutions are identified by braces ('{' and '}')."
9058The substitutions are identified by braces ('{' and '}').")static char format__doc__[] = "S.format(*args, **kwargs) -> str\n\nReturn a formatted version of S, using substitutions from args and kwargs.\nThe substitutions are identified by braces ('{' and '}').";
9059
9060PyDoc_STRVAR(format_map__doc__,static char format_map__doc__[] = "S.format_map(mapping) -> str\n\nReturn a formatted version of S, using substitutions from mapping.\nThe substitutions are identified by braces ('{' and '}')."
9061 "S.format_map(mapping) -> str\n\static char format_map__doc__[] = "S.format_map(mapping) -> str\n\nReturn a formatted version of S, using substitutions from mapping.\nThe substitutions are identified by braces ('{' and '}')."
9062\n\static char format_map__doc__[] = "S.format_map(mapping) -> str\n\nReturn a formatted version of S, using substitutions from mapping.\nThe substitutions are identified by braces ('{' and '}')."
9063Return a formatted version of S, using substitutions from mapping.\n\static char format_map__doc__[] = "S.format_map(mapping) -> str\n\nReturn a formatted version of S, using substitutions from mapping.\nThe substitutions are identified by braces ('{' and '}')."
9064The substitutions are identified by braces ('{' and '}').")static char format_map__doc__[] = "S.format_map(mapping) -> str\n\nReturn a formatted version of S, using substitutions from mapping.\nThe substitutions are identified by braces ('{' and '}').";
9065
9066static PyObject *
9067unicode__format__(PyObject* self, PyObject* args)
9068{
9069 PyObject *format_spec;
9070
9071 if (!PyArg_ParseTuple_PyArg_ParseTuple_SizeT(args, "U:__format__", &format_spec))
9072 return NULL((void *)0);
9073
9074 return _PyUnicode_FormatAdvanced(self,
9075 PyUnicode_AS_UNICODE(format_spec)((__builtin_expect(!(((((((PyObject*)(format_spec))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9075, "PyUnicode_Check(format_spec)"
) : (void)0),(((PyUnicodeObject *)(format_spec))->str))
,
9076 PyUnicode_GET_SIZE(format_spec)((__builtin_expect(!(((((((PyObject*)(format_spec))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9076, "PyUnicode_Check(format_spec)"
) : (void)0),(((PyUnicodeObject *)(format_spec))->length))
);
9077}
9078
9079PyDoc_STRVAR(p_format__doc__,static char p_format__doc__[] = "S.__format__(format_spec) -> str\n\nReturn a formatted version of S as described by format_spec."
9080 "S.__format__(format_spec) -> str\n\static char p_format__doc__[] = "S.__format__(format_spec) -> str\n\nReturn a formatted version of S as described by format_spec."
9081\n\static char p_format__doc__[] = "S.__format__(format_spec) -> str\n\nReturn a formatted version of S as described by format_spec."
9082Return a formatted version of S as described by format_spec.")static char p_format__doc__[] = "S.__format__(format_spec) -> str\n\nReturn a formatted version of S as described by format_spec.";
9083
9084static PyObject *
9085unicode__sizeof__(PyUnicodeObject *v)
9086{
9087 return PyLong_FromSsize_t(sizeof(PyUnicodeObject) +
9088 sizeof(Py_UNICODE) * (v->length + 1));
9089}
9090
9091PyDoc_STRVAR(sizeof__doc__,static char sizeof__doc__[] = "S.__sizeof__() -> size of S in memory, in bytes"
9092 "S.__sizeof__() -> size of S in memory, in bytes")static char sizeof__doc__[] = "S.__sizeof__() -> size of S in memory, in bytes";
9093
9094static PyObject *
9095unicode_getnewargs(PyUnicodeObject *v)
9096{
9097 return Py_BuildValue_Py_BuildValue_SizeT("(u#)", v->str, v->length);
9098}
9099
9100static PyMethodDef unicode_methods[] = {
9101
9102 /* Order is according to common usage: often used methods should
9103 appear first, since lookup is done sequentially. */
9104
9105 {"encode", (PyCFunction) unicode_encode, METH_VARARGS0x0001 | METH_KEYWORDS0x0002, encode__doc__},
9106 {"replace", (PyCFunction) unicode_replace, METH_VARARGS0x0001, replace__doc__},
9107 {"split", (PyCFunction) unicode_split, METH_VARARGS0x0001, split__doc__},
9108 {"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS0x0001, rsplit__doc__},
9109 {"join", (PyCFunction) unicode_join, METH_O0x0008, join__doc__},
9110 {"capitalize", (PyCFunction) unicode_capitalize, METH_NOARGS0x0004, capitalize__doc__},
9111 {"title", (PyCFunction) unicode_title, METH_NOARGS0x0004, title__doc__},
9112 {"center", (PyCFunction) unicode_center, METH_VARARGS0x0001, center__doc__},
9113 {"count", (PyCFunction) unicode_count, METH_VARARGS0x0001, count__doc__},
9114 {"expandtabs", (PyCFunction) unicode_expandtabs, METH_VARARGS0x0001, expandtabs__doc__},
9115 {"find", (PyCFunction) unicode_find, METH_VARARGS0x0001, find__doc__},
9116 {"partition", (PyCFunction) unicode_partition, METH_O0x0008, partition__doc__},
9117 {"index", (PyCFunction) unicode_index, METH_VARARGS0x0001, index__doc__},
9118 {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS0x0001, ljust__doc__},
9119 {"lower", (PyCFunction) unicode_lower, METH_NOARGS0x0004, lower__doc__},
9120 {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS0x0001, lstrip__doc__},
9121 {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS0x0001, rfind__doc__},
9122 {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS0x0001, rindex__doc__},
9123 {"rjust", (PyCFunction) unicode_rjust, METH_VARARGS0x0001, rjust__doc__},
9124 {"rstrip", (PyCFunction) unicode_rstrip, METH_VARARGS0x0001, rstrip__doc__},
9125 {"rpartition", (PyCFunction) unicode_rpartition, METH_O0x0008, rpartition__doc__},
9126 {"splitlines", (PyCFunction) unicode_splitlines, METH_VARARGS0x0001, splitlines__doc__},
9127 {"strip", (PyCFunction) unicode_strip, METH_VARARGS0x0001, strip__doc__},
9128 {"swapcase", (PyCFunction) unicode_swapcase, METH_NOARGS0x0004, swapcase__doc__},
9129 {"translate", (PyCFunction) unicode_translate, METH_O0x0008, translate__doc__},
9130 {"upper", (PyCFunction) unicode_upper, METH_NOARGS0x0004, upper__doc__},
9131 {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS0x0001, startswith__doc__},
9132 {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS0x0001, endswith__doc__},
9133 {"islower", (PyCFunction) unicode_islower, METH_NOARGS0x0004, islower__doc__},
9134 {"isupper", (PyCFunction) unicode_isupper, METH_NOARGS0x0004, isupper__doc__},
9135 {"istitle", (PyCFunction) unicode_istitle, METH_NOARGS0x0004, istitle__doc__},
9136 {"isspace", (PyCFunction) unicode_isspace, METH_NOARGS0x0004, isspace__doc__},
9137 {"isdecimal", (PyCFunction) unicode_isdecimal, METH_NOARGS0x0004, isdecimal__doc__},
9138 {"isdigit", (PyCFunction) unicode_isdigit, METH_NOARGS0x0004, isdigit__doc__},
9139 {"isnumeric", (PyCFunction) unicode_isnumeric, METH_NOARGS0x0004, isnumeric__doc__},
9140 {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS0x0004, isalpha__doc__},
9141 {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS0x0004, isalnum__doc__},
9142 {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS0x0004, isidentifier__doc__},
9143 {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS0x0004, isprintable__doc__},
9144 {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS0x0001, zfill__doc__},
9145 {"format", (PyCFunction) do_string_format, METH_VARARGS0x0001 | METH_KEYWORDS0x0002, format__doc__},
9146 {"format_map", (PyCFunction) do_string_format_map, METH_O0x0008, format_map__doc__},
9147 {"__format__", (PyCFunction) unicode__format__, METH_VARARGS0x0001, p_format__doc__},
9148 {"maketrans", (PyCFunction) unicode_maketrans,
9149 METH_VARARGS0x0001 | METH_STATIC0x0020, maketrans__doc__},
9150 {"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS0x0004, sizeof__doc__},
9151#if 0
9152 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS0x0004, capwords__doc__},
9153#endif
9154
9155#if 0
9156 /* These methods are just used for debugging the implementation. */
9157 {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS0x0004},
9158 {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS0x0004},
9159#endif
9160
9161 {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS0x0004},
9162 {NULL((void *)0), NULL((void *)0)}
9163};
9164
9165static PyObject *
9166unicode_mod(PyObject *v, PyObject *w)
9167{
9168 if (!PyUnicode_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
9169 Py_INCREF(Py_NotImplemented)( _Py_RefTotal++ , ((PyObject*)((&_Py_NotImplementedStruct
)))->ob_refcnt++)
;
9170 return Py_NotImplemented(&_Py_NotImplementedStruct);
9171 }
9172 return PyUnicode_FormatPyUnicodeUCS2_Format(v, w);
9173}
9174
9175static PyNumberMethods unicode_as_number = {
9176 0, /*nb_add*/
9177 0, /*nb_subtract*/
9178 0, /*nb_multiply*/
9179 unicode_mod, /*nb_remainder*/
9180};
9181
9182static PySequenceMethods unicode_as_sequence = {
9183 (lenfunc) unicode_length, /* sq_length */
9184 PyUnicode_ConcatPyUnicodeUCS2_Concat, /* sq_concat */
9185 (ssizeargfunc) unicode_repeat, /* sq_repeat */
9186 (ssizeargfunc) unicode_getitem, /* sq_item */
9187 0, /* sq_slice */
9188 0, /* sq_ass_item */
9189 0, /* sq_ass_slice */
9190 PyUnicode_ContainsPyUnicodeUCS2_Contains, /* sq_contains */
9191};
9192
9193static PyObject*
9194unicode_subscript(PyUnicodeObject* self, PyObject* item)
9195{
9196 if (PyIndex_Check(item)((item)->ob_type->tp_as_number != ((void *)0) &&
(item)->ob_type->tp_as_number->nb_index != ((void *
)0))
) {
9197 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
9198 if (i == -1 && PyErr_Occurred())
9199 return NULL((void *)0);
9200 if (i < 0)
9201 i += PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9201, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
;
9202 return unicode_getitem(self, i);
9203 } else if (PySlice_Check(item)((((PyObject*)(item))->ob_type) == &PySlice_Type)) {
9204 Py_ssize_t start, stop, step, slicelength, cur, i;
9205 Py_UNICODE* source_buf;
9206 Py_UNICODE* result_buf;
9207 PyObject* result;
9208
9209 if (PySlice_GetIndicesEx(item, PyUnicode_GET_SIZE(self)((__builtin_expect(!(((((((PyObject*)(self))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9209, "PyUnicode_Check(self)") :
(void)0),(((PyUnicodeObject *)(self))->length))
,
9210 &start, &stop, &step, &slicelength) < 0) {
9211 return NULL((void *)0);
9212 }
9213
9214 if (slicelength <= 0) {
9215 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(NULL((void *)0), 0);
9216 } else if (start == 0 && step == 1 && slicelength == self->length &&
9217 PyUnicode_CheckExact(self)((((PyObject*)(self))->ob_type) == &PyUnicode_Type)) {
9218 Py_INCREF(self)( _Py_RefTotal++ , ((PyObject*)(self))->ob_refcnt++);
9219 return (PyObject *)self;
9220 } else if (step == 1) {
9221 return PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(self->str + start, slicelength);
9222 } else {
9223 source_buf = PyUnicode_AS_UNICODE((PyObject*)self)((__builtin_expect(!(((((((PyObject*)((PyObject*)self))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9223, "PyUnicode_Check((PyObject*)self)"
) : (void)0),(((PyUnicodeObject *)((PyObject*)self))->str)
)
;
9224 result_buf = (Py_UNICODE *)PyObject_MALLOC_PyObject_DebugMalloc(slicelength*
9225 sizeof(Py_UNICODE));
9226
9227 if (result_buf == NULL((void *)0))
9228 return PyErr_NoMemory();
9229
9230 for (cur = start, i = 0; i < slicelength; cur += step, i++) {
9231 result_buf[i] = source_buf[cur];
9232 }
9233
9234 result = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(result_buf, slicelength);
9235 PyObject_FREE_PyObject_DebugFree(result_buf);
9236 return result;
9237 }
9238 } else {
9239 PyErr_SetString(PyExc_TypeError, "string indices must be integers");
9240 return NULL((void *)0);
9241 }
9242}
9243
9244static PyMappingMethods unicode_as_mapping = {
9245 (lenfunc)unicode_length, /* mp_length */
9246 (binaryfunc)unicode_subscript, /* mp_subscript */
9247 (objobjargproc)0, /* mp_ass_subscript */
9248};
9249
9250
9251/* Helpers for PyUnicode_Format() */
9252
9253static PyObject *
9254getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
9255{
9256 Py_ssize_t argidx = *p_argidx;
9257 if (argidx < arglen) {
9258 (*p_argidx)++;
9259 if (arglen < 0)
9260 return args;
9261 else
9262 return PyTuple_GetItem(args, argidx);
9263 }
9264 PyErr_SetString(PyExc_TypeError,
9265 "not enough arguments for format string");
9266 return NULL((void *)0);
9267}
9268
9269/* Returns a new reference to a PyUnicode object, or NULL on failure. */
9270
9271static PyObject *
9272formatfloat(PyObject *v, int flags, int prec, int type)
9273{
9274 char *p;
9275 PyObject *result;
9276 double x;
9277
9278 x = PyFloat_AsDouble(v);
9279 if (x == -1.0 && PyErr_Occurred())
9280 return NULL((void *)0);
9281
9282 if (prec < 0)
9283 prec = 6;
9284
9285 p = PyOS_double_to_string(x, type, prec,
9286 (flags & F_ALT(1<<3)) ? Py_DTSF_ALT0x04 : 0, NULL((void *)0));
9287 if (p == NULL((void *)0))
9288 return NULL((void *)0);
9289 result = PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(p, strlen(p));
9290 PyMem_Free(p);
9291 return result;
9292}
9293
9294static PyObject*
9295formatlong(PyObject *val, int flags, int prec, int type)
9296{
9297 char *buf;
9298 int len;
9299 PyObject *str; /* temporary string object. */
9300 PyObject *result;
9301
9302 str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
9303 if (!str)
9304 return NULL((void *)0);
9305 result = PyUnicode_FromStringAndSizePyUnicodeUCS2_FromStringAndSize(buf, len);
9306 Py_DECREF(str)do { if (_Py_RefTotal-- , --((PyObject*)(str))->ob_refcnt !=
0) { if (((PyObject*)str)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9306, (PyObject *)(str)); } else _Py_Dealloc
((PyObject *)(str)); } while (0)
;
9307 return result;
9308}
9309
9310static int
9311formatchar(Py_UNICODE *buf,
9312 size_t buflen,
9313 PyObject *v)
9314{
9315 /* presume that the buffer is at least 3 characters long */
9316 if (PyUnicode_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
9317 if (PyUnicode_GET_SIZE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 9317, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->length))
== 1) {
9318 buf[0] = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 9318, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
[0];
9319 buf[1] = '\0';
9320 return 1;
9321 }
9322#ifndef Py_UNICODE_WIDE
9323 if (PyUnicode_GET_SIZE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 9323, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->length))
== 2) {
9324 /* Decode a valid surrogate pair */
9325 int c0 = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 9325, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
[0];
9326 int c1 = PyUnicode_AS_UNICODE(v)((__builtin_expect(!(((((((PyObject*)(v))->ob_type))->tp_flags
& ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 9326, "PyUnicode_Check(v)") : (void)0),(((PyUnicodeObject *
)(v))->str))
[1];
9327 if (0xD800 <= c0 && c0 <= 0xDBFF &&
9328 0xDC00 <= c1 && c1 <= 0xDFFF) {
9329 buf[0] = c0;
9330 buf[1] = c1;
9331 buf[2] = '\0';
9332 return 2;
9333 }
9334 }
9335#endif
9336 goto onError;
9337 }
9338 else {
9339 /* Integer input truncated to a character */
9340 long x;
9341 x = PyLong_AsLong(v);
9342 if (x == -1 && PyErr_Occurred())
9343 goto onError;
9344
9345 if (x < 0 || x > 0x10ffff) {
9346 PyErr_SetString(PyExc_OverflowError,
9347 "%c arg not in range(0x110000)");
9348 return -1;
9349 }
9350
9351#ifndef Py_UNICODE_WIDE
9352 if (x > 0xffff) {
9353 x -= 0x10000;
9354 buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
9355 buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
9356 return 2;
9357 }
9358#endif
9359 buf[0] = (Py_UNICODE) x;
9360 buf[1] = '\0';
9361 return 1;
9362 }
9363
9364 onError:
9365 PyErr_SetString(PyExc_TypeError,
9366 "%c requires int or char");
9367 return -1;
9368}
9369
9370/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
9371 FORMATBUFLEN is the length of the buffer in which chars are formatted.
9372*/
9373#define FORMATBUFLEN(size_t)10 (size_t)10
9374
9375PyObject *PyUnicode_FormatPyUnicodeUCS2_Format(PyObject *format,
9376 PyObject *args)
9377{
9378 Py_UNICODE *fmt, *res;
9379 Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
9380 int args_owned = 0;
9381 PyUnicodeObject *result = NULL((void *)0);
9382 PyObject *dict = NULL((void *)0);
9383 PyObject *uformat;
9384
9385 if (format == NULL((void *)0) || args == NULL((void *)0)) {
9386 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 9386);
9387 return NULL((void *)0);
9388 }
9389 uformat = PyUnicode_FromObjectPyUnicodeUCS2_FromObject(format);
9390 if (uformat == NULL((void *)0))
9391 return NULL((void *)0);
9392 fmt = PyUnicode_AS_UNICODE(uformat)((__builtin_expect(!(((((((PyObject*)(uformat))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9392, "PyUnicode_Check(uformat)"
) : (void)0),(((PyUnicodeObject *)(uformat))->str))
;
9393 fmtcnt = PyUnicode_GET_SIZE(uformat)((__builtin_expect(!(((((((PyObject*)(uformat))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9393, "PyUnicode_Check(uformat)"
) : (void)0),(((PyUnicodeObject *)(uformat))->length))
;
9394
9395 reslen = rescnt = fmtcnt + 100;
9396 result = _PyUnicode_New(reslen);
9397 if (result == NULL((void *)0))
9398 goto onError;
9399 res = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9399, "PyUnicode_Check(result)")
: (void)0),(((PyUnicodeObject *)(result))->str))
;
9400
9401 if (PyTuple_Check(args)((((((PyObject*)(args))->ob_type))->tp_flags & ((1L
<<26))) != 0)
) {
9402 arglen = PyTuple_Size(args);
9403 argidx = 0;
9404 }
9405 else {
9406 arglen = -1;
9407 argidx = -2;
9408 }
9409 if (Py_TYPE(args)(((PyObject*)(args))->ob_type)->tp_as_mapping && !PyTuple_Check(args)((((((PyObject*)(args))->ob_type))->tp_flags & ((1L
<<26))) != 0)
&&
9410 !PyUnicode_Check(args)((((((PyObject*)(args))->ob_type))->tp_flags & ((1L
<<28))) != 0)
)
9411 dict = args;
9412
9413 while (--fmtcnt >= 0) {
9414 if (*fmt != '%') {
9415 if (--rescnt < 0) {
9416 rescnt = fmtcnt + 100;
9417 reslen += rescnt;
9418 if (_PyUnicode_Resize(&result, reslen) < 0)
9419 goto onError;
9420 res = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9420, "PyUnicode_Check(result)")
: (void)0),(((PyUnicodeObject *)(result))->str))
+ reslen - rescnt;
9421 --rescnt;
9422 }
9423 *res++ = *fmt++;
9424 }
9425 else {
9426 /* Got a format specifier */
9427 int flags = 0;
9428 Py_ssize_t width = -1;
9429 int prec = -1;
9430 Py_UNICODE c = '\0';
9431 Py_UNICODE fill;
9432 int isnumok;
9433 PyObject *v = NULL((void *)0);
9434 PyObject *temp = NULL((void *)0);
9435 Py_UNICODE *pbuf;
9436 Py_UNICODE sign;
9437 Py_ssize_t len;
9438 Py_UNICODE formatbuf[FORMATBUFLEN(size_t)10]; /* For formatchar() */
9439
9440 fmt++;
9441 if (*fmt == '(') {
9442 Py_UNICODE *keystart;
9443 Py_ssize_t keylen;
9444 PyObject *key;
9445 int pcount = 1;
9446
9447 if (dict == NULL((void *)0)) {
9448 PyErr_SetString(PyExc_TypeError,
9449 "format requires a mapping");
9450 goto onError;
9451 }
9452 ++fmt;
9453 --fmtcnt;
9454 keystart = fmt;
9455 /* Skip over balanced parentheses */
9456 while (pcount > 0 && --fmtcnt >= 0) {
9457 if (*fmt == ')')
9458 --pcount;
9459 else if (*fmt == '(')
9460 ++pcount;
9461 fmt++;
9462 }
9463 keylen = fmt - keystart - 1;
9464 if (fmtcnt < 0 || pcount > 0) {
9465 PyErr_SetString(PyExc_ValueError,
9466 "incomplete format key");
9467 goto onError;
9468 }
9469#if 0
9470 /* keys are converted to strings using UTF-8 and
9471 then looked up since Python uses strings to hold
9472 variables names etc. in its namespaces and we
9473 wouldn't want to break common idioms. */
9474 key = PyUnicode_EncodeUTF8PyUnicodeUCS2_EncodeUTF8(keystart,
9475 keylen,
9476 NULL((void *)0));
9477#else
9478 key = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(keystart, keylen);
9479#endif
9480 if (key == NULL((void *)0))
9481 goto onError;
9482 if (args_owned) {
9483 Py_DECREF(args)do { if (_Py_RefTotal-- , --((PyObject*)(args))->ob_refcnt
!= 0) { if (((PyObject*)args)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9483, (PyObject *)(args)); } else
_Py_Dealloc((PyObject *)(args)); } while (0)
;
9484 args_owned = 0;
9485 }
9486 args = PyObject_GetItem(dict, key);
9487 Py_DECREF(key)do { if (_Py_RefTotal-- , --((PyObject*)(key))->ob_refcnt !=
0) { if (((PyObject*)key)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9487, (PyObject *)(key)); } else _Py_Dealloc
((PyObject *)(key)); } while (0)
;
9488 if (args == NULL((void *)0)) {
9489 goto onError;
9490 }
9491 args_owned = 1;
9492 arglen = -1;
9493 argidx = -2;
9494 }
9495 while (--fmtcnt >= 0) {
9496 switch (c = *fmt++) {
9497 case '-': flags |= F_LJUST(1<<0); continue;
9498 case '+': flags |= F_SIGN(1<<1); continue;
9499 case ' ': flags |= F_BLANK(1<<2); continue;
9500 case '#': flags |= F_ALT(1<<3); continue;
9501 case '0': flags |= F_ZERO(1<<4); continue;
9502 }
9503 break;
9504 }
9505 if (c == '*') {
9506 v = getnextarg(args, arglen, &argidx);
9507 if (v == NULL((void *)0))
9508 goto onError;
9509 if (!PyLong_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
9510 PyErr_SetString(PyExc_TypeError,
9511 "* wants int");
9512 goto onError;
9513 }
9514 width = PyLong_AsLong(v);
9515 if (width == -1 && PyErr_Occurred())
9516 goto onError;
9517 if (width < 0) {
9518 flags |= F_LJUST(1<<0);
9519 width = -width;
9520 }
9521 if (--fmtcnt >= 0)
9522 c = *fmt++;
9523 }
9524 else if (c >= '0' && c <= '9') {
9525 width = c - '0';
9526 while (--fmtcnt >= 0) {
9527 c = *fmt++;
9528 if (c < '0' || c > '9')
9529 break;
9530 if ((width*10) / 10 != width) {
9531 PyErr_SetString(PyExc_ValueError,
9532 "width too big");
9533 goto onError;
9534 }
9535 width = width*10 + (c - '0');
9536 }
9537 }
9538 if (c == '.') {
9539 prec = 0;
9540 if (--fmtcnt >= 0)
9541 c = *fmt++;
9542 if (c == '*') {
9543 v = getnextarg(args, arglen, &argidx);
9544 if (v == NULL((void *)0))
9545 goto onError;
9546 if (!PyLong_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
9547 PyErr_SetString(PyExc_TypeError,
9548 "* wants int");
9549 goto onError;
9550 }
9551 prec = PyLong_AsLong(v);
9552 if (prec == -1 && PyErr_Occurred())
9553 goto onError;
9554 if (prec < 0)
9555 prec = 0;
9556 if (--fmtcnt >= 0)
9557 c = *fmt++;
9558 }
9559 else if (c >= '0' && c <= '9') {
9560 prec = c - '0';
9561 while (--fmtcnt >= 0) {
9562 c = *fmt++;
9563 if (c < '0' || c > '9')
9564 break;
9565 if ((prec*10) / 10 != prec) {
9566 PyErr_SetString(PyExc_ValueError,
9567 "prec too big");
9568 goto onError;
9569 }
9570 prec = prec*10 + (c - '0');
9571 }
9572 }
9573 } /* prec */
9574 if (fmtcnt >= 0) {
9575 if (c == 'h' || c == 'l' || c == 'L') {
9576 if (--fmtcnt >= 0)
9577 c = *fmt++;
9578 }
9579 }
9580 if (fmtcnt < 0) {
9581 PyErr_SetString(PyExc_ValueError,
9582 "incomplete format");
9583 goto onError;
9584 }
9585 if (c != '%') {
9586 v = getnextarg(args, arglen, &argidx);
9587 if (v == NULL((void *)0))
9588 goto onError;
9589 }
9590 sign = 0;
9591 fill = ' ';
9592 switch (c) {
9593
9594 case '%':
9595 pbuf = formatbuf;
9596 /* presume that buffer length is at least 1 */
9597 pbuf[0] = '%';
9598 len = 1;
9599 break;
9600
9601 case 's':
9602 case 'r':
9603 case 'a':
9604 if (PyUnicode_CheckExact(v)((((PyObject*)(v))->ob_type) == &PyUnicode_Type) && c == 's') {
9605 temp = v;
9606 Py_INCREF(temp)( _Py_RefTotal++ , ((PyObject*)(temp))->ob_refcnt++);
9607 }
9608 else {
9609 if (c == 's')
9610 temp = PyObject_Str(v);
9611 else if (c == 'r')
9612 temp = PyObject_Repr(v);
9613 else
9614 temp = PyObject_ASCII(v);
9615 if (temp == NULL((void *)0))
9616 goto onError;
9617 if (PyUnicode_Check(temp)((((((PyObject*)(temp))->ob_type))->tp_flags & ((1L
<<28))) != 0)
)
9618 /* nothing to do */;
9619 else {
9620 Py_DECREF(temp)do { if (_Py_RefTotal-- , --((PyObject*)(temp))->ob_refcnt
!= 0) { if (((PyObject*)temp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9620, (PyObject *)(temp)); } else
_Py_Dealloc((PyObject *)(temp)); } while (0)
;
9621 PyErr_SetString(PyExc_TypeError,
9622 "%s argument has non-string str()");
9623 goto onError;
9624 }
9625 }
9626 pbuf = PyUnicode_AS_UNICODE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9626, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->str))
;
9627 len = PyUnicode_GET_SIZE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9627, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->length))
;
9628 if (prec >= 0 && len > prec)
9629 len = prec;
9630 break;
9631
9632 case 'i':
9633 case 'd':
9634 case 'u':
9635 case 'o':
9636 case 'x':
9637 case 'X':
9638 if (c == 'i')
9639 c = 'd';
9640 isnumok = 0;
9641 if (PyNumber_Check(v)) {
9642 PyObject *iobj=NULL((void *)0);
9643
9644 if (PyLong_Check(v)((((((PyObject*)(v))->ob_type))->tp_flags & ((1L<<
24))) != 0)
) {
9645 iobj = v;
9646 Py_INCREF(iobj)( _Py_RefTotal++ , ((PyObject*)(iobj))->ob_refcnt++);
9647 }
9648 else {
9649 iobj = PyNumber_Long(v);
9650 }
9651 if (iobj!=NULL((void *)0)) {
9652 if (PyLong_Check(iobj)((((((PyObject*)(iobj))->ob_type))->tp_flags & ((1L
<<24))) != 0)
) {
9653 isnumok = 1;
9654 temp = formatlong(iobj, flags, prec, c);
9655 Py_DECREF(iobj)do { if (_Py_RefTotal-- , --((PyObject*)(iobj))->ob_refcnt
!= 0) { if (((PyObject*)iobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9655, (PyObject *)(iobj)); } else
_Py_Dealloc((PyObject *)(iobj)); } while (0)
;
9656 if (!temp)
9657 goto onError;
9658 pbuf = PyUnicode_AS_UNICODE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9658, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->str))
;
9659 len = PyUnicode_GET_SIZE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9659, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->length))
;
9660 sign = 1;
9661 }
9662 else {
9663 Py_DECREF(iobj)do { if (_Py_RefTotal-- , --((PyObject*)(iobj))->ob_refcnt
!= 0) { if (((PyObject*)iobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9663, (PyObject *)(iobj)); } else
_Py_Dealloc((PyObject *)(iobj)); } while (0)
;
9664 }
9665 }
9666 }
9667 if (!isnumok) {
9668 PyErr_Format(PyExc_TypeError,
9669 "%%%c format: a number is required, "
9670 "not %.200s", (char)c, Py_TYPE(v)(((PyObject*)(v))->ob_type)->tp_name);
9671 goto onError;
9672 }
9673 if (flags & F_ZERO(1<<4))
9674 fill = '0';
9675 break;
9676
9677 case 'e':
9678 case 'E':
9679 case 'f':
9680 case 'F':
9681 case 'g':
9682 case 'G':
9683 temp = formatfloat(v, flags, prec, c);
9684 if (!temp)
9685 goto onError;
9686 pbuf = PyUnicode_AS_UNICODE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9686, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->str))
;
9687 len = PyUnicode_GET_SIZE(temp)((__builtin_expect(!(((((((PyObject*)(temp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9687, "PyUnicode_Check(temp)") :
(void)0),(((PyUnicodeObject *)(temp))->length))
;
9688 sign = 1;
9689 if (flags & F_ZERO(1<<4))
9690 fill = '0';
9691 break;
9692
9693 case 'c':
9694 pbuf = formatbuf;
9695 len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
9696 if (len < 0)
9697 goto onError;
9698 break;
9699
9700 default:
9701 PyErr_Format(PyExc_ValueError,
9702 "unsupported format character '%c' (0x%x) "
9703 "at index %zd",
9704 (31<=c && c<=126) ? (char)c : '?',
9705 (int)c,
9706 (Py_ssize_t)(fmt - 1 -
9707 PyUnicode_AS_UNICODE(uformat)((__builtin_expect(!(((((((PyObject*)(uformat))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 9707, "PyUnicode_Check(uformat)"
) : (void)0),(((PyUnicodeObject *)(uformat))->str))
));
9708 goto onError;
9709 }
9710 if (sign) {
9711 if (*pbuf == '-' || *pbuf == '+') {
9712 sign = *pbuf++;
9713 len--;
9714 }
9715 else if (flags & F_SIGN(1<<1))
9716 sign = '+';
9717 else if (flags & F_BLANK(1<<2))
9718 sign = ' ';
9719 else
9720 sign = 0;
9721 }
9722 if (width < len)
9723 width = len;
9724 if (rescnt - (sign != 0) < width) {
9725 reslen -= rescnt;
9726 rescnt = width + fmtcnt + 100;
9727 reslen += rescnt;
9728 if (reslen < 0) {
9729 Py_XDECREF(temp)do { if ((temp) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(temp))->ob_refcnt != 0) { if (((PyObject
*)temp)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 9729, (PyObject *)(temp)); } else _Py_Dealloc((PyObject *)(
temp)); } while (0); } while (0)
;
9730 PyErr_NoMemory();
9731 goto onError;
9732 }
9733 if (_PyUnicode_Resize(&result, reslen) < 0) {
9734 Py_XDECREF(temp)do { if ((temp) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(temp))->ob_refcnt != 0) { if (((PyObject
*)temp)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 9734, (PyObject *)(temp)); } else _Py_Dealloc((PyObject *)(
temp)); } while (0); } while (0)
;
9735 goto onError;
9736 }
9737 res = PyUnicode_AS_UNICODE(result)((__builtin_expect(!(((((((PyObject*)(result))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9737, "PyUnicode_Check(result)")
: (void)0),(((PyUnicodeObject *)(result))->str))
9738 + reslen - rescnt;
9739 }
9740 if (sign) {
9741 if (fill != ' ')
9742 *res++ = sign;
9743 rescnt--;
9744 if (width > len)
9745 width--;
9746 }
9747 if ((flags & F_ALT(1<<3)) && (c == 'x' || c == 'X' || c == 'o')) {
9748 assert(pbuf[0] == '0')(__builtin_expect(!(pbuf[0] == '0'), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9748, "pbuf[0] == '0'") : (void)
0)
;
9749 assert(pbuf[1] == c)(__builtin_expect(!(pbuf[1] == c), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9749, "pbuf[1] == c") : (void)0)
;
9750 if (fill != ' ') {
9751 *res++ = *pbuf++;
9752 *res++ = *pbuf++;
9753 }
9754 rescnt -= 2;
9755 width -= 2;
9756 if (width < 0)
9757 width = 0;
9758 len -= 2;
9759 }
9760 if (width > len && !(flags & F_LJUST(1<<0))) {
9761 do {
9762 --rescnt;
9763 *res++ = fill;
9764 } while (--width > len);
9765 }
9766 if (fill == ' ') {
9767 if (sign)
9768 *res++ = sign;
9769 if ((flags & F_ALT(1<<3)) && (c == 'x' || c == 'X' || c == 'o')) {
9770 assert(pbuf[0] == '0')(__builtin_expect(!(pbuf[0] == '0'), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9770, "pbuf[0] == '0'") : (void)
0)
;
9771 assert(pbuf[1] == c)(__builtin_expect(!(pbuf[1] == c), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9771, "pbuf[1] == c") : (void)0)
;
9772 *res++ = *pbuf++;
9773 *res++ = *pbuf++;
9774 }
9775 }
9776 Py_UNICODE_COPY(res, pbuf, len)((__builtin_object_size ((res), 0) != (size_t) -1) ? __builtin___memcpy_chk
((res), (pbuf), (len)*sizeof(Py_UNICODE), __builtin_object_size
((res), 0)) : __inline_memcpy_chk ((res), (pbuf), (len)*sizeof
(Py_UNICODE)))
;
9777 res += len;
9778 rescnt -= len;
9779 while (--width >= len) {
9780 --rescnt;
9781 *res++ = ' ';
9782 }
9783 if (dict && (argidx < arglen) && c != '%') {
9784 PyErr_SetString(PyExc_TypeError,
9785 "not all arguments converted during string formatting");
9786 Py_XDECREF(temp)do { if ((temp) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(temp))->ob_refcnt != 0) { if (((PyObject
*)temp)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 9786, (PyObject *)(temp)); } else _Py_Dealloc((PyObject *)(
temp)); } while (0); } while (0)
;
9787 goto onError;
9788 }
9789 Py_XDECREF(temp)do { if ((temp) == ((void *)0)) ; else do { if (_Py_RefTotal--
, --((PyObject*)(temp))->ob_refcnt != 0) { if (((PyObject
*)temp)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 9789, (PyObject *)(temp)); } else _Py_Dealloc((PyObject *)(
temp)); } while (0); } while (0)
;
9790 } /* '%' */
9791 } /* until end */
9792 if (argidx < arglen && !dict) {
9793 PyErr_SetString(PyExc_TypeError,
9794 "not all arguments converted during string formatting");
9795 goto onError;
9796 }
9797
9798 if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
9799 goto onError;
9800 if (args_owned) {
9801 Py_DECREF(args)do { if (_Py_RefTotal-- , --((PyObject*)(args))->ob_refcnt
!= 0) { if (((PyObject*)args)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9801, (PyObject *)(args)); } else
_Py_Dealloc((PyObject *)(args)); } while (0)
;
9802 }
9803 Py_DECREF(uformat)do { if (_Py_RefTotal-- , --((PyObject*)(uformat))->ob_refcnt
!= 0) { if (((PyObject*)uformat)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9803, (PyObject *)(uformat)); } else
_Py_Dealloc((PyObject *)(uformat)); } while (0)
;
9804 return (PyObject *)result;
9805
9806 onError:
9807 Py_XDECREF(result)do { if ((result) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(result))->ob_refcnt != 0) { if (((PyObject
*)result)->ob_refcnt < 0) _Py_NegativeRefcount("Objects/unicodeobject.c"
, 9807, (PyObject *)(result)); } else _Py_Dealloc((PyObject *
)(result)); } while (0); } while (0)
;
9808 Py_DECREF(uformat)do { if (_Py_RefTotal-- , --((PyObject*)(uformat))->ob_refcnt
!= 0) { if (((PyObject*)uformat)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9808, (PyObject *)(uformat)); } else
_Py_Dealloc((PyObject *)(uformat)); } while (0)
;
9809 if (args_owned) {
9810 Py_DECREF(args)do { if (_Py_RefTotal-- , --((PyObject*)(args))->ob_refcnt
!= 0) { if (((PyObject*)args)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9810, (PyObject *)(args)); } else
_Py_Dealloc((PyObject *)(args)); } while (0)
;
9811 }
9812 return NULL((void *)0);
9813}
9814
9815static PyObject *
9816unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
9817
9818static PyObject *
9819unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
9820{
9821 PyObject *x = NULL((void *)0);
9822 static char *kwlist[] = {"object", "encoding", "errors", 0};
9823 char *encoding = NULL((void *)0);
9824 char *errors = NULL((void *)0);
9825
9826 if (type != &PyUnicode_Type)
9827 return unicode_subtype_new(type, args, kwds);
9828 if (!PyArg_ParseTupleAndKeywords_PyArg_ParseTupleAndKeywords_SizeT(args, kwds, "|Oss:str",
9829 kwlist, &x, &encoding, &errors))
9830 return NULL((void *)0);
9831 if (x == NULL((void *)0))
9832 return (PyObject *)_PyUnicode_New(0);
9833 if (encoding == NULL((void *)0) && errors == NULL((void *)0))
9834 return PyObject_Str(x);
9835 else
9836 return PyUnicode_FromEncodedObjectPyUnicodeUCS2_FromEncodedObject(x, encoding, errors);
9837}
9838
9839static PyObject *
9840unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
9841{
9842 PyUnicodeObject *tmp, *pnew;
9843 Py_ssize_t n;
9844
9845 assert(PyType_IsSubtype(type, &PyUnicode_Type))(__builtin_expect(!(PyType_IsSubtype(type, &PyUnicode_Type
)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c", 9845
, "PyType_IsSubtype(type, &PyUnicode_Type)") : (void)0)
;
9846 tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
9847 if (tmp == NULL((void *)0))
9848 return NULL((void *)0);
9849 assert(PyUnicode_Check(tmp))(__builtin_expect(!(((((((PyObject*)(tmp))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9849, "PyUnicode_Check(tmp)") : (
void)0)
;
9850 pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
9851 if (pnew == NULL((void *)0)) {
9852 Py_DECREF(tmp)do { if (_Py_RefTotal-- , --((PyObject*)(tmp))->ob_refcnt !=
0) { if (((PyObject*)tmp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9852, (PyObject *)(tmp)); } else _Py_Dealloc
((PyObject *)(tmp)); } while (0)
;
9853 return NULL((void *)0);
9854 }
9855 pnew->str = (Py_UNICODE*) PyObject_MALLOC_PyObject_DebugMalloc(sizeof(Py_UNICODE) * (n+1));
9856 if (pnew->str == NULL((void *)0)) {
9857 _Py_ForgetReference((PyObject *)pnew);
9858 PyObject_Del_PyObject_DebugFree(pnew);
9859 Py_DECREF(tmp)do { if (_Py_RefTotal-- , --((PyObject*)(tmp))->ob_refcnt !=
0) { if (((PyObject*)tmp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9859, (PyObject *)(tmp)); } else _Py_Dealloc
((PyObject *)(tmp)); } while (0)
;
9860 return PyErr_NoMemory();
9861 }
9862 Py_UNICODE_COPY(pnew->str, tmp->str, n+1)((__builtin_object_size ((pnew->str), 0) != (size_t) -1) ?
__builtin___memcpy_chk ((pnew->str), (tmp->str), (n+1)
*sizeof(Py_UNICODE), __builtin_object_size ((pnew->str), 0
)) : __inline_memcpy_chk ((pnew->str), (tmp->str), (n+1
)*sizeof(Py_UNICODE)))
;
9863 pnew->length = n;
9864 pnew->hash = tmp->hash;
9865 Py_DECREF(tmp)do { if (_Py_RefTotal-- , --((PyObject*)(tmp))->ob_refcnt !=
0) { if (((PyObject*)tmp)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9865, (PyObject *)(tmp)); } else _Py_Dealloc
((PyObject *)(tmp)); } while (0)
;
9866 return (PyObject *)pnew;
9867}
9868
9869PyDoc_STRVAR(unicode_doc,static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."
9870 "str(string[, encoding[, errors]]) -> str\n\static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."
9871\n\static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."
9872Create a new string object from the given encoded string.\n\static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."
9873encoding defaults to the current default string encoding.\n\static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'."
9874errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.")static char unicode_doc[] = "str(string[, encoding[, errors]]) -> str\n\nCreate a new string object from the given encoded string.\nencoding defaults to the current default string encoding.\nerrors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.";
9875
9876static PyObject *unicode_iter(PyObject *seq);
9877
9878PyTypeObject PyUnicode_Type = {
9879 PyVarObject_HEAD_INIT(&PyType_Type, 0){ { 0, 0, 1, &PyType_Type }, 0 },
9880 "str", /* tp_name */
9881 sizeof(PyUnicodeObject), /* tp_size */
9882 0, /* tp_itemsize */
9883 /* Slots */
9884 (destructor)unicode_dealloc, /* tp_dealloc */
9885 0, /* tp_print */
9886 0, /* tp_getattr */
9887 0, /* tp_setattr */
9888 0, /* tp_reserved */
9889 unicode_repr, /* tp_repr */
9890 &unicode_as_number, /* tp_as_number */
9891 &unicode_as_sequence, /* tp_as_sequence */
9892 &unicode_as_mapping, /* tp_as_mapping */
9893 (hashfunc) unicode_hash, /* tp_hash*/
9894 0, /* tp_call*/
9895 (reprfunc) unicode_str, /* tp_str */
9896 PyObject_GenericGetAttr, /* tp_getattro */
9897 0, /* tp_setattro */
9898 0, /* tp_as_buffer */
9899 Py_TPFLAGS_DEFAULT( 0 | (1L<<18) | 0) | Py_TPFLAGS_BASETYPE(1L<<10) |
9900 Py_TPFLAGS_UNICODE_SUBCLASS(1L<<28), /* tp_flags */
9901 unicode_doc, /* tp_doc */
9902 0, /* tp_traverse */
9903 0, /* tp_clear */
9904 PyUnicode_RichComparePyUnicodeUCS2_RichCompare, /* tp_richcompare */
9905 0, /* tp_weaklistoffset */
9906 unicode_iter, /* tp_iter */
9907 0, /* tp_iternext */
9908 unicode_methods, /* tp_methods */
9909 0, /* tp_members */
9910 0, /* tp_getset */
9911 &PyBaseObject_Type, /* tp_base */
9912 0, /* tp_dict */
9913 0, /* tp_descr_get */
9914 0, /* tp_descr_set */
9915 0, /* tp_dictoffset */
9916 0, /* tp_init */
9917 0, /* tp_alloc */
9918 unicode_new, /* tp_new */
9919 PyObject_Del_PyObject_DebugFree, /* tp_free */
9920};
9921
9922/* Initialize the Unicode implementation */
9923
9924void _PyUnicode_Init_PyUnicodeUCS2_Init(void)
9925{
9926 int i;
9927
9928 /* XXX - move this array to unicodectype.c ? */
9929 Py_UNICODE linebreak[] = {
9930 0x000A, /* LINE FEED */
9931 0x000D, /* CARRIAGE RETURN */
9932 0x001C, /* FILE SEPARATOR */
9933 0x001D, /* GROUP SEPARATOR */
9934 0x001E, /* RECORD SEPARATOR */
9935 0x0085, /* NEXT LINE */
9936 0x2028, /* LINE SEPARATOR */
9937 0x2029, /* PARAGRAPH SEPARATOR */
9938 };
9939
9940 /* Init the implementation */
9941 free_list = NULL((void *)0);
9942 numfree = 0;
9943 unicode_empty = _PyUnicode_New(0);
9944 if (!unicode_empty)
9945 return;
9946
9947 for (i = 0; i < 256; i++)
9948 unicode_latin1[i] = NULL((void *)0);
9949 if (PyType_Ready(&PyUnicode_Type) < 0)
9950 Py_FatalError("Can't initialize 'unicode'");
9951
9952 /* initialize the linebreak bloom filter */
9953 bloom_linebreak = make_bloom_mask(
9954 linebreak, sizeof(linebreak) / sizeof(linebreak[0])
9955 );
9956
9957 PyType_Ready(&EncodingMapType);
9958}
9959
9960/* Finalize the Unicode implementation */
9961
9962int
9963PyUnicode_ClearFreeListPyUnicodeUCS2_ClearFreelist(void)
9964{
9965 int freelist_size = numfree;
9966 PyUnicodeObject *u;
9967
9968 for (u = free_list; u != NULL((void *)0);) {
9969 PyUnicodeObject *v = u;
9970 u = *(PyUnicodeObject **)u;
9971 if (v->str)
9972 PyObject_DEL_PyObject_DebugFree(v->str);
9973 Py_XDECREF(v->defenc)do { if ((v->defenc) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(v->defenc))->ob_refcnt != 0) { if (
((PyObject*)v->defenc)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9973, (PyObject *)(v->defenc))
; } else _Py_Dealloc((PyObject *)(v->defenc)); } while (0)
; } while (0)
;
9974 PyObject_Del_PyObject_DebugFree(v);
9975 numfree--;
9976 }
9977 free_list = NULL((void *)0);
9978 assert(numfree == 0)(__builtin_expect(!(numfree == 0), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 9978, "numfree == 0") : (void)0)
;
9979 return freelist_size;
9980}
9981
9982void
9983_PyUnicode_Fini_PyUnicodeUCS2_Fini(void)
9984{
9985 int i;
9986
9987 Py_XDECREF(unicode_empty)do { if ((unicode_empty) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(unicode_empty))->ob_refcnt != 0) { if (
((PyObject*)unicode_empty)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 9987, (PyObject *)(unicode_empty)
); } else _Py_Dealloc((PyObject *)(unicode_empty)); } while (
0); } while (0)
;
9988 unicode_empty = NULL((void *)0);
9989
9990 for (i = 0; i < 256; i++) {
9991 if (unicode_latin1[i]) {
9992 Py_DECREF(unicode_latin1[i])do { if (_Py_RefTotal-- , --((PyObject*)(unicode_latin1[i]))->
ob_refcnt != 0) { if (((PyObject*)unicode_latin1[i])->ob_refcnt
< 0) _Py_NegativeRefcount("Objects/unicodeobject.c", 9992
, (PyObject *)(unicode_latin1[i])); } else _Py_Dealloc((PyObject
*)(unicode_latin1[i])); } while (0)
;
9993 unicode_latin1[i] = NULL((void *)0);
9994 }
9995 }
9996 (void)PyUnicode_ClearFreeListPyUnicodeUCS2_ClearFreelist();
9997}
9998
9999void
10000PyUnicode_InternInPlace(PyObject **p)
10001{
10002 register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
10003 PyObject *t;
10004 if (s == NULL((void *)0) || !PyUnicode_Check(s)((((((PyObject*)(s))->ob_type))->tp_flags & ((1L<<
28))) != 0)
)
10005 Py_FatalError(
10006 "PyUnicode_InternInPlace: unicode strings only please!");
10007 /* If it's a subclass, we don't really know what putting
10008 it in the interned dict might do. */
10009 if (!PyUnicode_CheckExact(s)((((PyObject*)(s))->ob_type) == &PyUnicode_Type))
10010 return;
10011 if (PyUnicode_CHECK_INTERNED(s)(((PyUnicodeObject *)(s))->state))
10012 return;
10013 if (interned == NULL((void *)0)) {
10014 interned = PyDict_New();
10015 if (interned == NULL((void *)0)) {
10016 PyErr_Clear(); /* Don't leave an exception */
10017 return;
10018 }
10019 }
10020 /* It might be that the GetItem call fails even
10021 though the key is present in the dictionary,
10022 namely when this happens during a stack overflow. */
10023 Py_ALLOW_RECURSIONdo { unsigned char _old = PyThreadState_Get()->recursion_critical
; PyThreadState_Get()->recursion_critical = 1;
10024 t = PyDict_GetItem(interned, (PyObject *)s);
10025 Py_END_ALLOW_RECURSIONPyThreadState_Get()->recursion_critical = _old; } while(0)
;
10026
10027 if (t) {
10028 Py_INCREF(t)( _Py_RefTotal++ , ((PyObject*)(t))->ob_refcnt++);
10029 Py_DECREF(*p)do { if (_Py_RefTotal-- , --((PyObject*)(*p))->ob_refcnt !=
0) { if (((PyObject*)*p)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 10029, (PyObject *)(*p)); } else _Py_Dealloc
((PyObject *)(*p)); } while (0)
;
10030 *p = t;
10031 return;
10032 }
10033
10034 PyThreadState_GET()PyThreadState_Get()->recursion_critical = 1;
10035 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
10036 PyErr_Clear();
10037 PyThreadState_GET()PyThreadState_Get()->recursion_critical = 0;
10038 return;
10039 }
10040 PyThreadState_GET()PyThreadState_Get()->recursion_critical = 0;
10041 /* The two references in interned are not counted by refcnt.
10042 The deallocator will take care of this */
10043 Py_REFCNT(s)(((PyObject*)(s))->ob_refcnt) -= 2;
10044 PyUnicode_CHECK_INTERNED(s)(((PyUnicodeObject *)(s))->state) = SSTATE_INTERNED_MORTAL1;
10045}
10046
10047void
10048PyUnicode_InternImmortal(PyObject **p)
10049{
10050 PyUnicode_InternInPlace(p);
10051 if (PyUnicode_CHECK_INTERNED(*p)(((PyUnicodeObject *)(*p))->state) != SSTATE_INTERNED_IMMORTAL2) {
10052 PyUnicode_CHECK_INTERNED(*p)(((PyUnicodeObject *)(*p))->state) = SSTATE_INTERNED_IMMORTAL2;
10053 Py_INCREF(*p)( _Py_RefTotal++ , ((PyObject*)(*p))->ob_refcnt++);
10054 }
10055}
10056
10057PyObject *
10058PyUnicode_InternFromString(const char *cp)
10059{
10060 PyObject *s = PyUnicode_FromStringPyUnicodeUCS2_FromString(cp);
10061 if (s == NULL((void *)0))
10062 return NULL((void *)0);
10063 PyUnicode_InternInPlace(&s);
10064 return s;
10065}
10066
10067void _Py_ReleaseInternedUnicodeStrings(void)
10068{
10069 PyObject *keys;
10070 PyUnicodeObject *s;
10071 Py_ssize_t i, n;
10072 Py_ssize_t immortal_size = 0, mortal_size = 0;
10073
10074 if (interned == NULL((void *)0) || !PyDict_Check(interned)((((((PyObject*)(interned))->ob_type))->tp_flags & (
(1L<<29))) != 0)
)
10075 return;
10076 keys = PyDict_Keys(interned);
10077 if (keys == NULL((void *)0) || !PyList_Check(keys)((((((PyObject*)(keys))->ob_type))->tp_flags & ((1L
<<25))) != 0)
) {
10078 PyErr_Clear();
10079 return;
10080 }
10081
10082 /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
10083 detector, interned unicode strings are not forcibly deallocated;
10084 rather, we give them their stolen references back, and then clear
10085 and DECREF the interned dict. */
10086
10087 n = PyList_GET_SIZE(keys)(((PyVarObject*)(keys))->ob_size);
10088 fprintf(stderr__stderrp, "releasing %" PY_FORMAT_SIZE_T"l" "d interned strings\n",
10089 n);
10090 for (i = 0; i < n; i++) {
10091 s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i)(((PyListObject *)(keys))->ob_item[i]);
10092 switch (s->state) {
10093 case SSTATE_NOT_INTERNED0:
10094 /* XXX Shouldn't happen */
10095 break;
10096 case SSTATE_INTERNED_IMMORTAL2:
10097 Py_REFCNT(s)(((PyObject*)(s))->ob_refcnt) += 1;
10098 immortal_size += s->length;
10099 break;
10100 case SSTATE_INTERNED_MORTAL1:
10101 Py_REFCNT(s)(((PyObject*)(s))->ob_refcnt) += 2;
10102 mortal_size += s->length;
10103 break;
10104 default:
10105 Py_FatalError("Inconsistent interned string state.");
10106 }
10107 s->state = SSTATE_NOT_INTERNED0;
10108 }
10109 fprintf(stderr__stderrp, "total size of all interned strings: "
10110 "%" PY_FORMAT_SIZE_T"l" "d/%" PY_FORMAT_SIZE_T"l" "d "
10111 "mortal/immortal\n", mortal_size, immortal_size);
10112 Py_DECREF(keys)do { if (_Py_RefTotal-- , --((PyObject*)(keys))->ob_refcnt
!= 0) { if (((PyObject*)keys)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 10112, (PyObject *)(keys)); } else
_Py_Dealloc((PyObject *)(keys)); } while (0)
;
10113 PyDict_Clear(interned);
10114 Py_DECREF(interned)do { if (_Py_RefTotal-- , --((PyObject*)(interned))->ob_refcnt
!= 0) { if (((PyObject*)interned)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 10114, (PyObject *)(interned)); }
else _Py_Dealloc((PyObject *)(interned)); } while (0)
;
10115 interned = NULL((void *)0);
10116}
10117
10118
10119/********************* Unicode Iterator **************************/
10120
10121typedef struct {
10122 PyObject_HEADPyObject ob_base;
10123 Py_ssize_t it_index;
10124 PyUnicodeObject *it_seq; /* Set to NULL when iterator is exhausted */
10125} unicodeiterobject;
10126
10127static void
10128unicodeiter_dealloc(unicodeiterobject *it)
10129{
10130 _PyObject_GC_UNTRACK(it)do { PyGC_Head *g = ((PyGC_Head *)(it)-1); (__builtin_expect(
!(g->gc.gc_refs != (-2)), 0) ? __assert_rtn(__func__, "Objects/unicodeobject.c"
, 10130, "g->gc.gc_refs != _PyGC_REFS_UNTRACKED") : (void)
0); g->gc.gc_refs = (-2); g->gc.gc_prev->gc.gc_next =
g->gc.gc_next; g->gc.gc_next->gc.gc_prev = g->gc
.gc_prev; g->gc.gc_next = ((void *)0); } while (0);
;
10131 Py_XDECREF(it->it_seq)do { if ((it->it_seq) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(it->it_seq))->ob_refcnt != 0) { if (
((PyObject*)it->it_seq)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 10131, (PyObject *)(it->it_seq
)); } else _Py_Dealloc((PyObject *)(it->it_seq)); } while (
0); } while (0)
;
10132 PyObject_GC_Del(it);
10133}
10134
10135static int
10136unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg)
10137{
10138 Py_VISIT(it->it_seq)do { if (it->it_seq) { int vret = visit((PyObject *)(it->
it_seq), arg); if (vret) return vret; } } while (0)
;
10139 return 0;
10140}
10141
10142static PyObject *
10143unicodeiter_next(unicodeiterobject *it)
10144{
10145 PyUnicodeObject *seq;
10146 PyObject *item;
10147
10148 assert(it != NULL)(__builtin_expect(!(it != ((void *)0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 10148, "it != NULL") : (void)0)
;
10149 seq = it->it_seq;
10150 if (seq == NULL((void *)0))
10151 return NULL((void *)0);
10152 assert(PyUnicode_Check(seq))(__builtin_expect(!(((((((PyObject*)(seq))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 10152, "PyUnicode_Check(seq)") :
(void)0)
;
10153
10154 if (it->it_index < PyUnicode_GET_SIZE(seq)((__builtin_expect(!(((((((PyObject*)(seq))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 10154, "PyUnicode_Check(seq)") :
(void)0),(((PyUnicodeObject *)(seq))->length))
) {
10155 item = PyUnicode_FromUnicodePyUnicodeUCS2_FromUnicode(
10156 PyUnicode_AS_UNICODE(seq)((__builtin_expect(!(((((((PyObject*)(seq))->ob_type))->
tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn(__func__
, "Objects/unicodeobject.c", 10156, "PyUnicode_Check(seq)") :
(void)0),(((PyUnicodeObject *)(seq))->str))
+it->it_index, 1);
10157 if (item != NULL((void *)0))
10158 ++it->it_index;
10159 return item;
10160 }
10161
10162 Py_DECREF(seq)do { if (_Py_RefTotal-- , --((PyObject*)(seq))->ob_refcnt !=
0) { if (((PyObject*)seq)->ob_refcnt < 0) _Py_NegativeRefcount
("Objects/unicodeobject.c", 10162, (PyObject *)(seq)); } else
_Py_Dealloc((PyObject *)(seq)); } while (0)
;
10163 it->it_seq = NULL((void *)0);
10164 return NULL((void *)0);
10165}
10166
10167static PyObject *
10168unicodeiter_len(unicodeiterobject *it)
10169{
10170 Py_ssize_t len = 0;
10171 if (it->it_seq)
10172 len = PyUnicode_GET_SIZE(it->it_seq)((__builtin_expect(!(((((((PyObject*)(it->it_seq))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 10172, "PyUnicode_Check(it->it_seq)"
) : (void)0),(((PyUnicodeObject *)(it->it_seq))->length
))
- it->it_index;
10173 return PyLong_FromSsize_t(len);
10174}
10175
10176PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).")static char length_hint_doc[] = "Private method returning an estimate of len(list(it)).";
10177
10178static PyMethodDef unicodeiter_methods[] = {
10179 {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS0x0004,
10180 length_hint_doc},
10181 {NULL((void *)0), NULL((void *)0)} /* sentinel */
10182};
10183
10184PyTypeObject PyUnicodeIter_Type = {
10185 PyVarObject_HEAD_INIT(&PyType_Type, 0){ { 0, 0, 1, &PyType_Type }, 0 },
10186 "str_iterator", /* tp_name */
10187 sizeof(unicodeiterobject), /* tp_basicsize */
10188 0, /* tp_itemsize */
10189 /* methods */
10190 (destructor)unicodeiter_dealloc, /* tp_dealloc */
10191 0, /* tp_print */
10192 0, /* tp_getattr */
10193 0, /* tp_setattr */
10194 0, /* tp_reserved */
10195 0, /* tp_repr */
10196 0, /* tp_as_number */
10197 0, /* tp_as_sequence */
10198 0, /* tp_as_mapping */
10199 0, /* tp_hash */
10200 0, /* tp_call */
10201 0, /* tp_str */
10202 PyObject_GenericGetAttr, /* tp_getattro */
10203 0, /* tp_setattro */
10204 0, /* tp_as_buffer */
10205 Py_TPFLAGS_DEFAULT( 0 | (1L<<18) | 0) | Py_TPFLAGS_HAVE_GC(1L<<14),/* tp_flags */
10206 0, /* tp_doc */
10207 (traverseproc)unicodeiter_traverse, /* tp_traverse */
10208 0, /* tp_clear */
10209 0, /* tp_richcompare */
10210 0, /* tp_weaklistoffset */
10211 PyObject_SelfIter, /* tp_iter */
10212 (iternextfunc)unicodeiter_next, /* tp_iternext */
10213 unicodeiter_methods, /* tp_methods */
10214 0,
10215};
10216
10217static PyObject *
10218unicode_iter(PyObject *seq)
10219{
10220 unicodeiterobject *it;
10221
10222 if (!PyUnicode_Check(seq)((((((PyObject*)(seq))->ob_type))->tp_flags & ((1L<<
28))) != 0)
) {
10223 PyErr_BadInternalCall()_PyErr_BadInternalCall("Objects/unicodeobject.c", 10223);
10224 return NULL((void *)0);
10225 }
10226 it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type)( (unicodeiterobject *) _PyObject_GC_New(&PyUnicodeIter_Type
) )
;
10227 if (it == NULL((void *)0))
10228 return NULL((void *)0);
10229 it->it_index = 0;
10230 Py_INCREF(seq)( _Py_RefTotal++ , ((PyObject*)(seq))->ob_refcnt++);
10231 it->it_seq = (PyUnicodeObject *)seq;
10232 _PyObject_GC_TRACK(it)do { PyGC_Head *g = ((PyGC_Head *)(it)-1); if (g->gc.gc_refs
!= (-2)) Py_FatalError("GC object already tracked"); g->gc
.gc_refs = (-3); g->gc.gc_next = _PyGC_generation0; g->
gc.gc_prev = _PyGC_generation0->gc.gc_prev; g->gc.gc_prev
->gc.gc_next = g; _PyGC_generation0->gc.gc_prev = g; } while
(0);
;
10233 return (PyObject *)it;
10234}
10235
10236size_t
10237Py_UNICODE_strlen(const Py_UNICODE *u)
10238{
10239 int res = 0;
10240 while(*u++)
10241 res++;
10242 return res;
10243}
10244
10245Py_UNICODE*
10246Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2)
10247{
10248 Py_UNICODE *u = s1;
10249 while ((*u++ = *s2++));
10250 return s1;
10251}
10252
10253Py_UNICODE*
10254Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
10255{
10256 Py_UNICODE *u = s1;
10257 while ((*u++ = *s2++))
10258 if (n-- == 0)
10259 break;
10260 return s1;
10261}
10262
10263Py_UNICODE*
10264Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2)
10265{
10266 Py_UNICODE *u1 = s1;
10267 u1 += Py_UNICODE_strlen(u1);
10268 Py_UNICODE_strcpy(u1, s2);
10269 return s1;
10270}
10271
10272int
10273Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
10274{
10275 while (*s1 && *s2 && *s1 == *s2)
10276 s1++, s2++;
10277 if (*s1 && *s2)
10278 return (*s1 < *s2) ? -1 : +1;
10279 if (*s1)
10280 return 1;
10281 if (*s2)
10282 return -1;
10283 return 0;
10284}
10285
10286int
10287Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
10288{
10289 register Py_UNICODE u1, u2;
10290 for (; n != 0; n--) {
10291 u1 = *s1;
10292 u2 = *s2;
10293 if (u1 != u2)
10294 return (u1 < u2) ? -1 : +1;
10295 if (u1 == '\0')
10296 return 0;
10297 s1++;
10298 s2++;
10299 }
10300 return 0;
10301}
10302
10303Py_UNICODE*
10304Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
10305{
10306 const Py_UNICODE *p;
10307 for (p = s; *p; p++)
10308 if (*p == c)
10309 return (Py_UNICODE*)p;
10310 return NULL((void *)0);
10311}
10312
10313Py_UNICODE*
10314Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c)
10315{
10316 const Py_UNICODE *p;
10317 p = s + Py_UNICODE_strlen(s);
10318 while (p != s) {
10319 p--;
10320 if (*p == c)
10321 return (Py_UNICODE*)p;
10322 }
10323 return NULL((void *)0);
10324}
10325
10326Py_UNICODE*
10327PyUnicode_AsUnicodeCopy(PyObject *object)
10328{
10329 PyUnicodeObject *unicode = (PyUnicodeObject *)object;
10330 Py_UNICODE *copy;
10331 Py_ssize_t size;
10332
10333 /* Ensure we won't overflow the size. */
10334 if (PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 10334, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
> ((PY_SSIZE_T_MAX((Py_ssize_t)(((size_t)-1)>>1)) / sizeof(Py_UNICODE)) - 1)) {
10335 PyErr_NoMemory();
10336 return NULL((void *)0);
10337 }
10338 size = PyUnicode_GET_SIZE(unicode)((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type))
->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 10338, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->length))
+ 1; /* copy the nul character */
10339 size *= sizeof(Py_UNICODE);
10340 copy = PyMem_Malloc(size);
10341 if (copy == NULL((void *)0)) {
10342 PyErr_NoMemory();
10343 return NULL((void *)0);
10344 }
10345 memcpy(copy, PyUnicode_AS_UNICODE(unicode), size)((__builtin_object_size (copy, 0) != (size_t) -1) ? __builtin___memcpy_chk
(copy, ((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 10345, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str)), size,
__builtin_object_size (copy, 0)) : __inline_memcpy_chk (copy
, ((__builtin_expect(!(((((((PyObject*)(unicode))->ob_type
))->tp_flags & ((1L<<28))) != 0)), 0) ? __assert_rtn
(__func__, "Objects/unicodeobject.c", 10345, "PyUnicode_Check(unicode)"
) : (void)0),(((PyUnicodeObject *)(unicode))->str)), size)
)
;
10346 return copy;
10347}
10348
10349/* A _string module, to export formatter_parser and formatter_field_name_split
10350 to the string.Formatter class implemented in Python. */
10351
10352static PyMethodDef _string_methods[] = {
10353 {"formatter_field_name_split", (PyCFunction) formatter_field_name_split,
10354 METH_O0x0008, PyDoc_STR("split the argument as a field name")"split the argument as a field name"},
10355 {"formatter_parser", (PyCFunction) formatter_parser,
10356 METH_O0x0008, PyDoc_STR("parse the argument as a format string")"parse the argument as a format string"},
10357 {NULL((void *)0), NULL((void *)0)}
10358};
10359
10360static struct PyModuleDef _string_module = {
10361 PyModuleDef_HEAD_INIT{ { 0, 0, 1, ((void *)0) }, ((void *)0), 0, ((void *)0), },
10362 "_string",
10363 PyDoc_STR("string helper module")"string helper module",
10364 0,
10365 _string_methods,
10366 NULL((void *)0),
10367 NULL((void *)0),
10368 NULL((void *)0),
10369 NULL((void *)0)
10370};
10371
10372PyMODINIT_FUNCPyObject*
10373PyInit__string(void)
10374{
10375 return PyModule_Create(&_string_module)PyModule_Create2TraceRefs(&_string_module, 1013);
10376}
10377
10378
10379#ifdef __cplusplus
10380}
10381#endif