| LEFT | RIGHT |
| 1 /* | 1 /* |
| 2 | 2 |
| 3 Unicode implementation based on original code by Fredrik Lundh, | 3 Unicode implementation based on original code by Fredrik Lundh, |
| 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the | 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the |
| 5 Unicode Integration Proposal (see file Misc/unicode.txt). | 5 Unicode Integration Proposal (see file Misc/unicode.txt). |
| 6 | 6 |
| 7 Major speed upgrades to the method implementations at the Reykjavik | 7 Major speed upgrades to the method implementations at the Reykjavik |
| 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. | 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. |
| 9 | 9 |
| 10 Copyright (c) Corporation for National Research Initiatives. | 10 Copyright (c) Corporation for National Research Initiatives. |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | 36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 38 -------------------------------------------------------------------- | 38 -------------------------------------------------------------------- |
| 39 | 39 |
| 40 */ | 40 */ |
| 41 | 41 |
| 42 #define PY_SSIZE_T_CLEAN | 42 #define PY_SSIZE_T_CLEAN |
| 43 #include "Python.h" | 43 #include "Python.h" |
| 44 #include "bytes_methods.h" |
| 45 |
| 46 #include "unicodeobject.h" |
| 44 #include "ucnhash.h" | 47 #include "ucnhash.h" |
| 45 | 48 |
| 46 #ifdef MS_WINDOWS | 49 #ifdef MS_WINDOWS |
| 47 #include <windows.h> | 50 #include <windows.h> |
| 48 #endif | 51 #endif |
| 49 | 52 |
| 50 /* Limit for the Unicode object free list */ | 53 /* Limit for the Unicode object free list */ |
| 51 | 54 |
| 52 #define PyUnicode_MAXFREELIST 1024 | 55 #define PyUnicode_MAXFREELIST 1024 |
| 53 | 56 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 104 static PyUnicodeObject *free_list; | 107 static PyUnicodeObject *free_list; |
| 105 static int numfree; | 108 static int numfree; |
| 106 | 109 |
| 107 /* The empty Unicode object is shared to improve performance. */ | 110 /* The empty Unicode object is shared to improve performance. */ |
| 108 static PyUnicodeObject *unicode_empty; | 111 static PyUnicodeObject *unicode_empty; |
| 109 | 112 |
| 110 /* Single character Unicode strings in the Latin-1 range are being | 113 /* Single character Unicode strings in the Latin-1 range are being |
| 111 shared as well. */ | 114 shared as well. */ |
| 112 static PyUnicodeObject *unicode_latin1[256]; | 115 static PyUnicodeObject *unicode_latin1[256]; |
| 113 | 116 |
| 117 /* Default encoding to use and assume when NULL is passed as encoding |
| 118 parameter; it is fixed to "utf-8". Always use the |
| 119 PyUnicode_GetDefaultEncoding() API to access this global. |
| 120 |
| 121 Don't forget to alter Py_FileSystemDefaultEncoding if you change the |
| 122 hard coded default! |
| 123 */ |
| 124 static const char unicode_default_encoding[] = "utf-8"; |
| 125 |
| 114 /* Fast detection of the most frequent whitespace characters */ | 126 /* Fast detection of the most frequent whitespace characters */ |
| 115 const unsigned char _Py_ascii_whitespace[] = { | 127 const unsigned char _Py_ascii_whitespace[] = { |
| 116 0, 0, 0, 0, 0, 0, 0, 0, | 128 0, 0, 0, 0, 0, 0, 0, 0, |
| 117 /* case 0x0009: * CHARACTER TABULATION */ | 129 /* case 0x0009: * HORIZONTAL TABULATION */ |
| 118 /* case 0x000A: * LINE FEED */ | 130 /* case 0x000A: * LINE FEED */ |
| 119 /* case 0x000B: * LINE TABULATION */ | 131 /* case 0x000B: * VERTICAL TABULATION */ |
| 120 /* case 0x000C: * FORM FEED */ | 132 /* case 0x000C: * FORM FEED */ |
| 121 /* case 0x000D: * CARRIAGE RETURN */ | 133 /* case 0x000D: * CARRIAGE RETURN */ |
| 122 0, 1, 1, 1, 1, 1, 0, 0, | 134 0, 1, 1, 1, 1, 1, 0, 0, |
| 123 0, 0, 0, 0, 0, 0, 0, 0, | 135 0, 0, 0, 0, 0, 0, 0, 0, |
| 124 /* case 0x001C: * FILE SEPARATOR */ | 136 /* case 0x001C: * FILE SEPARATOR */ |
| 125 /* case 0x001D: * GROUP SEPARATOR */ | 137 /* case 0x001D: * GROUP SEPARATOR */ |
| 126 /* case 0x001E: * RECORD SEPARATOR */ | 138 /* case 0x001E: * RECORD SEPARATOR */ |
| 127 /* case 0x001F: * UNIT SEPARATOR */ | 139 /* case 0x001F: * UNIT SEPARATOR */ |
| 128 0, 0, 0, 0, 1, 1, 1, 1, | 140 0, 0, 0, 0, 1, 1, 1, 1, |
| 129 /* case 0x0020: * SPACE */ | 141 /* case 0x0020: * SPACE */ |
| 130 1, 0, 0, 0, 0, 0, 0, 0, | 142 1, 0, 0, 0, 0, 0, 0, 0, |
| 131 0, 0, 0, 0, 0, 0, 0, 0, | 143 0, 0, 0, 0, 0, 0, 0, 0, |
| 132 0, 0, 0, 0, 0, 0, 0, 0, | 144 0, 0, 0, 0, 0, 0, 0, 0, |
| 133 0, 0, 0, 0, 0, 0, 0, 0, | 145 0, 0, 0, 0, 0, 0, 0, 0, |
| 134 | 146 |
| 135 0, 0, 0, 0, 0, 0, 0, 0, | 147 0, 0, 0, 0, 0, 0, 0, 0, |
| 136 0, 0, 0, 0, 0, 0, 0, 0, | 148 0, 0, 0, 0, 0, 0, 0, 0, |
| 137 0, 0, 0, 0, 0, 0, 0, 0, | 149 0, 0, 0, 0, 0, 0, 0, 0, |
| 138 0, 0, 0, 0, 0, 0, 0, 0, | 150 0, 0, 0, 0, 0, 0, 0, 0, |
| 139 0, 0, 0, 0, 0, 0, 0, 0, | 151 0, 0, 0, 0, 0, 0, 0, 0, |
| 140 0, 0, 0, 0, 0, 0, 0, 0, | 152 0, 0, 0, 0, 0, 0, 0, 0, |
| 141 0, 0, 0, 0, 0, 0, 0, 0, | 153 0, 0, 0, 0, 0, 0, 0, 0, |
| 142 0, 0, 0, 0, 0, 0, 0, 0 | 154 0, 0, 0, 0, 0, 0, 0, 0 |
| 143 }; | 155 }; |
| 144 | 156 |
| 145 static PyObject * | 157 static PyObject *unicode_encode_call_errorhandler(const char *errors, |
| 146 unicode_encode_call_errorhandler(const char *errors, | |
| 147 PyObject **errorHandler,const char *encoding, const char *reason, | 158 PyObject **errorHandler,const char *encoding, const char *reason, |
| 148 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, | 159 const Py_UNICODE *unicode, Py_ssize_t size, PyObject **exceptionObject, |
| 149 Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); | 160 Py_ssize_t startpos, Py_ssize_t endpos, Py_ssize_t *newpos); |
| 150 | 161 |
| 151 static void | 162 static void raise_encode_exception(PyObject **exceptionObject, |
| 152 raise_encode_exception(PyObject **exceptionObject, | 163 const char *encoding, |
| 153 » » const char *encoding, | 164 const Py_UNICODE *unicode, Py_ssize_t size, |
| 154 » » const Py_UNICODE *unicode, Py_ssize_t size, | 165 Py_ssize_t startpos, Py_ssize_t endpos, |
| 155 » » Py_ssize_t startpos, Py_ssize_t endpos, | 166 const char *reason); |
| 156 » » const char *reason); | |
| 157 | 167 |
| 158 /* Same for linebreaks */ | 168 /* Same for linebreaks */ |
| 159 static unsigned char ascii_linebreak[] = { | 169 static unsigned char ascii_linebreak[] = { |
| 160 0, 0, 0, 0, 0, 0, 0, 0, | 170 0, 0, 0, 0, 0, 0, 0, 0, |
| 161 /* 0x000A, * LINE FEED */ | 171 /* 0x000A, * LINE FEED */ |
| 162 /* 0x000B, * LINE TABULATION */ | |
| 163 /* 0x000C, * FORM FEED */ | |
| 164 /* 0x000D, * CARRIAGE RETURN */ | 172 /* 0x000D, * CARRIAGE RETURN */ |
| 165 0, 0, 1, 1, 1, 1, 0, 0, | 173 0, 0, 1, 0, 0, 1, 0, 0, |
| 166 0, 0, 0, 0, 0, 0, 0, 0, | 174 0, 0, 0, 0, 0, 0, 0, 0, |
| 167 /* 0x001C, * FILE SEPARATOR */ | 175 /* 0x001C, * FILE SEPARATOR */ |
| 168 /* 0x001D, * GROUP SEPARATOR */ | 176 /* 0x001D, * GROUP SEPARATOR */ |
| 169 /* 0x001E, * RECORD SEPARATOR */ | 177 /* 0x001E, * RECORD SEPARATOR */ |
| 170 0, 0, 0, 0, 1, 1, 1, 0, | 178 0, 0, 0, 0, 1, 1, 1, 0, |
| 171 0, 0, 0, 0, 0, 0, 0, 0, | 179 0, 0, 0, 0, 0, 0, 0, 0, |
| 172 0, 0, 0, 0, 0, 0, 0, 0, | 180 0, 0, 0, 0, 0, 0, 0, 0, |
| 173 0, 0, 0, 0, 0, 0, 0, 0, | 181 0, 0, 0, 0, 0, 0, 0, 0, |
| 174 0, 0, 0, 0, 0, 0, 0, 0, | 182 0, 0, 0, 0, 0, 0, 0, 0, |
| 175 | 183 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 197 } | 205 } |
| 198 | 206 |
| 199 /* --- Bloom Filters ----------------------------------------------------- */ | 207 /* --- Bloom Filters ----------------------------------------------------- */ |
| 200 | 208 |
| 201 /* stuff to implement simple "bloom filters" for Unicode characters. | 209 /* stuff to implement simple "bloom filters" for Unicode characters. |
| 202 to keep things simple, we use a single bitmask, using the least 5 | 210 to keep things simple, we use a single bitmask, using the least 5 |
| 203 bits from each unicode characters as the bit index. */ | 211 bits from each unicode characters as the bit index. */ |
| 204 | 212 |
| 205 /* the linebreak mask is set up by Unicode_Init below */ | 213 /* the linebreak mask is set up by Unicode_Init below */ |
| 206 | 214 |
| 207 #if LONG_BIT >= 128 | |
| 208 #define BLOOM_WIDTH 128 | |
| 209 #elif LONG_BIT >= 64 | |
| 210 #define BLOOM_WIDTH 64 | |
| 211 #elif LONG_BIT >= 32 | |
| 212 #define BLOOM_WIDTH 32 | |
| 213 #else | |
| 214 #error "LONG_BIT is smaller than 32" | |
| 215 #endif | |
| 216 | |
| 217 #define BLOOM_MASK unsigned long | 215 #define BLOOM_MASK unsigned long |
| 218 | 216 |
| 219 static BLOOM_MASK bloom_linebreak; | 217 static BLOOM_MASK bloom_linebreak; |
| 220 | 218 |
| 221 #define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1))))) | 219 #define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F)))) |
| 222 #define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1))))) | |
| 223 | 220 |
| 224 #define BLOOM_LINEBREAK(ch) \ | 221 #define BLOOM_LINEBREAK(ch) \ |
| 225 ((ch) < 128U ? ascii_linebreak[(ch)] : \ | 222 ((ch) < 128U ? ascii_linebreak[(ch)] : \ |
| 226 (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) | 223 (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch))) |
| 227 | 224 |
| 228 Py_LOCAL_INLINE(BLOOM_MASK) | 225 Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) |
| 229 make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len) | |
| 230 { | 226 { |
| 231 /* calculate simple bloom-style bitmask for a given unicode string */ | 227 /* calculate simple bloom-style bitmask for a given unicode string */ |
| 232 | 228 |
| 233 BLOOM_MASK mask; | 229 long mask; |
| 234 Py_ssize_t i; | 230 Py_ssize_t i; |
| 235 | 231 |
| 236 mask = 0; | 232 mask = 0; |
| 237 for (i = 0; i < len; i++) | 233 for (i = 0; i < len; i++) |
| 238 BLOOM_ADD(mask, ptr[i]); | 234 mask |= (1 << (ptr[i] & 0x1F)); |
| 239 | 235 |
| 240 return mask; | 236 return mask; |
| 241 } | 237 } |
| 242 | 238 |
| 243 Py_LOCAL_INLINE(int) | 239 Py_LOCAL_INLINE(int) unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t
setlen) |
| 244 unicode_member(Py_UNICODE chr, Py_UNICODE* set, Py_ssize_t setlen) | |
| 245 { | 240 { |
| 246 Py_ssize_t i; | 241 Py_ssize_t i; |
| 247 | 242 |
| 248 for (i = 0; i < setlen; i++) | 243 for (i = 0; i < setlen; i++) |
| 249 if (set[i] == chr) | 244 if (set[i] == chr) |
| 250 return 1; | 245 return 1; |
| 251 | 246 |
| 252 return 0; | 247 return 0; |
| 253 } | 248 } |
| 254 | 249 |
| 255 #define BLOOM_MEMBER(mask, chr, set, setlen) \ | 250 #define BLOOM_MEMBER(mask, chr, set, setlen) \ |
| 256 BLOOM(mask, chr) && unicode_member(chr, set, setlen) | 251 BLOOM(mask, chr) && unicode_member(chr, set, setlen) |
| 257 | 252 |
| 258 /* --- Unicode Object ----------------------------------------------------- */ | 253 /* --- Unicode Object ----------------------------------------------------- */ |
| 259 | 254 |
| 260 static int | 255 static |
| 261 unicode_resize(register PyUnicodeObject *unicode, | 256 int unicode_resize(register PyUnicodeObject *unicode, |
| 262 » Py_ssize_t length) | 257 Py_ssize_t length) |
| 263 { | 258 { |
| 264 void *oldstr; | 259 void *oldstr; |
| 265 | 260 |
| 266 /* Shortcut if there's nothing much to do. */ | 261 /* Shortcut if there's nothing much to do. */ |
| 267 if (unicode->length == length) | 262 if (unicode->length == length) |
| 268 goto reset; | 263 goto reset; |
| 269 | 264 |
| 270 /* Resizing shared object (unicode_empty or single character | 265 /* Resizing shared object (unicode_empty or single character |
| 271 objects) in-place is not allowed. Use PyUnicode_Resize() | 266 objects) in-place is not allowed. Use PyUnicode_Resize() |
| 272 instead ! */ | 267 instead ! */ |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 308 | 303 |
| 309 /* We allocate one more byte to make sure the string is | 304 /* We allocate one more byte to make sure the string is |
| 310 Ux0000 terminated; some code (e.g. new_identifier) | 305 Ux0000 terminated; some code (e.g. new_identifier) |
| 311 relies on that. | 306 relies on that. |
| 312 | 307 |
| 313 XXX This allocator could further be enhanced by assuring that the | 308 XXX This allocator could further be enhanced by assuring that the |
| 314 free list never reduces its size below 1. | 309 free list never reduces its size below 1. |
| 315 | 310 |
| 316 */ | 311 */ |
| 317 | 312 |
| 318 static PyUnicodeObject * | 313 static |
| 319 _PyUnicode_New(Py_ssize_t length) | 314 PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) |
| 320 { | 315 { |
| 321 register PyUnicodeObject *unicode; | 316 register PyUnicodeObject *unicode; |
| 322 | 317 |
| 323 /* Optimization for empty strings */ | 318 /* Optimization for empty strings */ |
| 324 if (length == 0 && unicode_empty != NULL) { | 319 if (length == 0 && unicode_empty != NULL) { |
| 325 Py_INCREF(unicode_empty); | 320 Py_INCREF(unicode_empty); |
| 326 return unicode_empty; | 321 return unicode_empty; |
| 327 } | 322 } |
| 328 | 323 |
| 329 /* Ensure we won't overflow the size. */ | 324 /* Ensure we won't overflow the size. */ |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 380 return unicode; | 375 return unicode; |
| 381 | 376 |
| 382 onError: | 377 onError: |
| 383 /* XXX UNREF/NEWREF interface should be more symmetrical */ | 378 /* XXX UNREF/NEWREF interface should be more symmetrical */ |
| 384 _Py_DEC_REFTOTAL; | 379 _Py_DEC_REFTOTAL; |
| 385 _Py_ForgetReference((PyObject *)unicode); | 380 _Py_ForgetReference((PyObject *)unicode); |
| 386 PyObject_Del(unicode); | 381 PyObject_Del(unicode); |
| 387 return NULL; | 382 return NULL; |
| 388 } | 383 } |
| 389 | 384 |
| 390 static void | 385 static |
| 391 unicode_dealloc(register PyUnicodeObject *unicode) | 386 void unicode_dealloc(register PyUnicodeObject *unicode) |
| 392 { | 387 { |
| 393 switch (PyUnicode_CHECK_INTERNED(unicode)) { | 388 switch (PyUnicode_CHECK_INTERNED(unicode)) { |
| 394 case SSTATE_NOT_INTERNED: | 389 case SSTATE_NOT_INTERNED: |
| 395 break; | 390 break; |
| 396 | 391 |
| 397 case SSTATE_INTERNED_MORTAL: | 392 case SSTATE_INTERNED_MORTAL: |
| 398 /* revive dead object temporarily for DelItem */ | 393 /* revive dead object temporarily for DelItem */ |
| 399 Py_REFCNT(unicode) = 3; | 394 Py_REFCNT(unicode) = 3; |
| 400 if (PyDict_DelItem(interned, (PyObject *)unicode) != 0) | 395 if (PyDict_DelItem(interned, (PyObject *)unicode) != 0) |
| 401 Py_FatalError( | 396 Py_FatalError( |
| (...skipping 23 matching lines...) Expand all Loading... |
| 425 free_list = unicode; | 420 free_list = unicode; |
| 426 numfree++; | 421 numfree++; |
| 427 } | 422 } |
| 428 else { | 423 else { |
| 429 PyObject_DEL(unicode->str); | 424 PyObject_DEL(unicode->str); |
| 430 Py_XDECREF(unicode->defenc); | 425 Py_XDECREF(unicode->defenc); |
| 431 Py_TYPE(unicode)->tp_free((PyObject *)unicode); | 426 Py_TYPE(unicode)->tp_free((PyObject *)unicode); |
| 432 } | 427 } |
| 433 } | 428 } |
| 434 | 429 |
| 435 static int | 430 static |
| 436 _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) | 431 int _PyUnicode_Resize(PyUnicodeObject **unicode, Py_ssize_t length) |
| 437 { | 432 { |
| 438 register PyUnicodeObject *v; | 433 register PyUnicodeObject *v; |
| 439 | 434 |
| 440 /* Argument checks */ | 435 /* Argument checks */ |
| 441 if (unicode == NULL) { | 436 if (unicode == NULL) { |
| 442 PyErr_BadInternalCall(); | 437 PyErr_BadInternalCall(); |
| 443 return -1; | 438 return -1; |
| 444 } | 439 } |
| 445 v = *unicode; | 440 v = *unicode; |
| 446 if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { | 441 if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 461 Py_DECREF(*unicode); | 456 Py_DECREF(*unicode); |
| 462 *unicode = w; | 457 *unicode = w; |
| 463 return 0; | 458 return 0; |
| 464 } | 459 } |
| 465 | 460 |
| 466 /* Note that we don't have to modify *unicode for unshared Unicode | 461 /* Note that we don't have to modify *unicode for unshared Unicode |
| 467 objects, since we can modify them in-place. */ | 462 objects, since we can modify them in-place. */ |
| 468 return unicode_resize(v, length); | 463 return unicode_resize(v, length); |
| 469 } | 464 } |
| 470 | 465 |
| 471 int | 466 int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) |
| 472 PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) | |
| 473 { | 467 { |
| 474 return _PyUnicode_Resize((PyUnicodeObject **)unicode, length); | 468 return _PyUnicode_Resize((PyUnicodeObject **)unicode, length); |
| 475 } | 469 } |
| 476 | 470 |
| 477 PyObject * | 471 PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, |
| 478 PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) | 472 Py_ssize_t size) |
| 479 { | 473 { |
| 480 PyUnicodeObject *unicode; | 474 PyUnicodeObject *unicode; |
| 481 | 475 |
| 482 /* If the Unicode data is known at construction time, we can apply | 476 /* If the Unicode data is known at construction time, we can apply |
| 483 some optimizations which share commonly used objects. */ | 477 some optimizations which share commonly used objects. */ |
| 484 if (u != NULL) { | 478 if (u != NULL) { |
| 485 | 479 |
| 486 /* Optimization for empty strings */ | 480 /* Optimization for empty strings */ |
| 487 if (size == 0 && unicode_empty != NULL) { | 481 if (size == 0 && unicode_empty != NULL) { |
| 488 Py_INCREF(unicode_empty); | 482 Py_INCREF(unicode_empty); |
| (...skipping 20 matching lines...) Expand all Loading... |
| 509 if (!unicode) | 503 if (!unicode) |
| 510 return NULL; | 504 return NULL; |
| 511 | 505 |
| 512 /* Copy the Unicode data into the new object */ | 506 /* Copy the Unicode data into the new object */ |
| 513 if (u != NULL) | 507 if (u != NULL) |
| 514 Py_UNICODE_COPY(unicode->str, u, size); | 508 Py_UNICODE_COPY(unicode->str, u, size); |
| 515 | 509 |
| 516 return (PyObject *)unicode; | 510 return (PyObject *)unicode; |
| 517 } | 511 } |
| 518 | 512 |
| 519 PyObject * | 513 PyObject *PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) |
| 520 PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) | |
| 521 { | 514 { |
| 522 PyUnicodeObject *unicode; | 515 PyUnicodeObject *unicode; |
| 523 | 516 |
| 524 if (size < 0) { | 517 if (size < 0) { |
| 525 PyErr_SetString(PyExc_SystemError, | 518 PyErr_SetString(PyExc_SystemError, |
| 526 "Negative size passed to PyUnicode_FromStringAndSize"); | 519 "Negative size passed to PyUnicode_FromStringAndSize"); |
| 527 return NULL; | 520 return NULL; |
| 528 } | 521 } |
| 529 | 522 |
| 530 /* If the Unicode data is known at construction time, we can apply | 523 /* If the Unicode data is known at construction time, we can apply |
| (...skipping 26 matching lines...) Expand all Loading... |
| 557 return PyUnicode_DecodeUTF8(u, size, NULL); | 550 return PyUnicode_DecodeUTF8(u, size, NULL); |
| 558 } | 551 } |
| 559 | 552 |
| 560 unicode = _PyUnicode_New(size); | 553 unicode = _PyUnicode_New(size); |
| 561 if (!unicode) | 554 if (!unicode) |
| 562 return NULL; | 555 return NULL; |
| 563 | 556 |
| 564 return (PyObject *)unicode; | 557 return (PyObject *)unicode; |
| 565 } | 558 } |
| 566 | 559 |
| 567 PyObject * | 560 PyObject *PyUnicode_FromString(const char *u) |
| 568 PyUnicode_FromString(const char *u) | |
| 569 { | 561 { |
| 570 size_t size = strlen(u); | 562 size_t size = strlen(u); |
| 571 if (size > PY_SSIZE_T_MAX) { | 563 if (size > PY_SSIZE_T_MAX) { |
| 572 PyErr_SetString(PyExc_OverflowError, "input too long"); | 564 PyErr_SetString(PyExc_OverflowError, "input too long"); |
| 573 return NULL; | 565 return NULL; |
| 574 } | 566 } |
| 575 | 567 |
| 576 return PyUnicode_FromStringAndSize(u, size); | 568 return PyUnicode_FromStringAndSize(u, size); |
| 577 } | 569 } |
| 578 | 570 |
| 579 #ifdef HAVE_WCHAR_H | 571 #ifdef HAVE_WCHAR_H |
| 580 | 572 |
| 581 #if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) | 573 #if (Py_UNICODE_SIZE == 2) && defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) |
| 582 # define CONVERT_WCHAR_TO_SURROGATES | 574 # define CONVERT_WCHAR_TO_SURROGATES |
| 583 #endif | 575 #endif |
| 584 | 576 |
| 585 #ifdef CONVERT_WCHAR_TO_SURROGATES | 577 #ifdef CONVERT_WCHAR_TO_SURROGATES |
| 586 | 578 |
| 587 /* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need | 579 /* Here sizeof(wchar_t) is 4 but Py_UNICODE_SIZE == 2, so we need |
| 588 to convert from UTF32 to UTF16. */ | 580 to convert from UTF32 to UTF16. */ |
| 589 | 581 |
| 590 PyObject * | 582 PyObject *PyUnicode_FromWideChar(register const wchar_t *w, |
| 591 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) | 583 Py_ssize_t size) |
| 592 { | 584 { |
| 593 PyUnicodeObject *unicode; | 585 PyUnicodeObject *unicode; |
| 594 register Py_ssize_t i; | 586 register Py_ssize_t i; |
| 595 Py_ssize_t alloc; | 587 Py_ssize_t alloc; |
| 596 const wchar_t *orig_w; | 588 const wchar_t *orig_w; |
| 597 | 589 |
| 598 if (w == NULL) { | 590 if (w == NULL) { |
| 599 if (size == 0) | 591 if (size == 0) |
| 600 return PyUnicode_FromStringAndSize(NULL, 0); | 592 return PyUnicode_FromStringAndSize(NULL, 0); |
| 601 PyErr_BadInternalCall(); | 593 PyErr_BadInternalCall(); |
| (...skipping 29 matching lines...) Expand all Loading... |
| 631 } | 623 } |
| 632 else | 624 else |
| 633 *u++ = *w++; | 625 *u++ = *w++; |
| 634 } | 626 } |
| 635 } | 627 } |
| 636 return (PyObject *)unicode; | 628 return (PyObject *)unicode; |
| 637 } | 629 } |
| 638 | 630 |
| 639 #else | 631 #else |
| 640 | 632 |
| 641 PyObject * | 633 PyObject *PyUnicode_FromWideChar(register const wchar_t *w, |
| 642 PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size) | 634 Py_ssize_t size) |
| 643 { | 635 { |
| 644 PyUnicodeObject *unicode; | 636 PyUnicodeObject *unicode; |
| 645 | 637 |
| 646 if (w == NULL) { | 638 if (w == NULL) { |
| 647 if (size == 0) | 639 if (size == 0) |
| 648 return PyUnicode_FromStringAndSize(NULL, 0); | 640 return PyUnicode_FromStringAndSize(NULL, 0); |
| 649 PyErr_BadInternalCall(); | 641 PyErr_BadInternalCall(); |
| 650 return NULL; | 642 return NULL; |
| 651 } | 643 } |
| 652 | 644 |
| 653 if (size == -1) { | 645 if (size == -1) { |
| 654 size = wcslen(w); | 646 size = wcslen(w); |
| 655 } | 647 } |
| 656 | 648 |
| 657 unicode = _PyUnicode_New(size); | 649 unicode = _PyUnicode_New(size); |
| 658 if (!unicode) | 650 if (!unicode) |
| 659 return NULL; | 651 return NULL; |
| 660 | 652 |
| 661 /* Copy the wchar_t data into the new object */ | 653 /* Copy the wchar_t data into the new object */ |
| 662 #if Py_UNICODE_SIZE == SIZEOF_WCHAR_T | 654 #ifdef HAVE_USABLE_WCHAR_T |
| 663 memcpy(unicode->str, w, size * sizeof(wchar_t)); | 655 memcpy(unicode->str, w, size * sizeof(wchar_t)); |
| 664 #else | 656 #else |
| 665 { | 657 { |
| 666 register Py_UNICODE *u; | 658 register Py_UNICODE *u; |
| 667 register Py_ssize_t i; | 659 register Py_ssize_t i; |
| 668 u = PyUnicode_AS_UNICODE(unicode); | 660 u = PyUnicode_AS_UNICODE(unicode); |
| 669 for (i = size; i > 0; i--) | 661 for (i = size; i > 0; i--) |
| 670 *u++ = *w++; | 662 *u++ = *w++; |
| 671 } | 663 } |
| 672 #endif | 664 #endif |
| 673 | 665 |
| 674 return (PyObject *)unicode; | 666 return (PyObject *)unicode; |
| 675 } | 667 } |
| 676 | 668 |
| 677 #endif /* CONVERT_WCHAR_TO_SURROGATES */ | 669 #endif /* CONVERT_WCHAR_TO_SURROGATES */ |
| 678 | 670 |
| 679 #undef CONVERT_WCHAR_TO_SURROGATES | 671 #undef CONVERT_WCHAR_TO_SURROGATES |
| 680 | 672 |
| 681 static void | 673 static void |
| 682 makefmt(char *fmt, int longflag, int longlongflag, int size_tflag, | 674 makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int pre
cision, char c) |
| 683 int zeropad, int width, int precision, char c) | |
| 684 { | 675 { |
| 685 *fmt++ = '%'; | 676 *fmt++ = '%'; |
| 686 if (width) { | 677 if (width) { |
| 687 if (zeropad) | 678 if (zeropad) |
| 688 *fmt++ = '0'; | 679 *fmt++ = '0'; |
| 689 fmt += sprintf(fmt, "%d", width); | 680 fmt += sprintf(fmt, "%d", width); |
| 690 } | 681 } |
| 691 if (precision) | 682 if (precision) |
| 692 fmt += sprintf(fmt, ".%d", precision); | 683 fmt += sprintf(fmt, ".%d", precision); |
| 693 if (longflag) | 684 if (longflag) |
| 694 *fmt++ = 'l'; | 685 *fmt++ = 'l'; |
| 695 else if (longlongflag) { | |
| 696 /* longlongflag should only ever be nonzero on machines with | |
| 697 HAVE_LONG_LONG defined */ | |
| 698 #ifdef HAVE_LONG_LONG | |
| 699 char *f = PY_FORMAT_LONG_LONG; | |
| 700 while (*f) | |
| 701 *fmt++ = *f++; | |
| 702 #else | |
| 703 /* we shouldn't ever get here */ | |
| 704 assert(0); | |
| 705 *fmt++ = 'l'; | |
| 706 #endif | |
| 707 } | |
| 708 else if (size_tflag) { | 686 else if (size_tflag) { |
| 709 char *f = PY_FORMAT_SIZE_T; | 687 char *f = PY_FORMAT_SIZE_T; |
| 710 while (*f) | 688 while (*f) |
| 711 *fmt++ = *f++; | 689 *fmt++ = *f++; |
| 712 } | 690 } |
| 713 *fmt++ = c; | 691 *fmt++ = c; |
| 714 *fmt = '\0'; | 692 *fmt = '\0'; |
| 715 } | 693 } |
| 716 | 694 |
| 717 /* helper for PyUnicode_FromFormatV() */ | |
| 718 | |
| 719 static const char* | |
| 720 parse_format_flags(const char *f, | |
| 721 int *p_width, int *p_precision, | |
| 722 int *p_longflag, int *p_longlongflag, int *p_size_tflag) | |
| 723 { | |
| 724 int width, precision, longflag, longlongflag, size_tflag; | |
| 725 | |
| 726 /* parse the width.precision part, e.g. "%2.5s" => width=2, precision=5 */ | |
| 727 f++; | |
| 728 width = 0; | |
| 729 while (Py_ISDIGIT((unsigned)*f)) | |
| 730 width = (width*10) + *f++ - '0'; | |
| 731 precision = 0; | |
| 732 if (*f == '.') { | |
| 733 f++; | |
| 734 while (Py_ISDIGIT((unsigned)*f)) | |
| 735 precision = (precision*10) + *f++ - '0'; | |
| 736 if (*f == '%') { | |
| 737 /* "%.3%s" => f points to "3" */ | |
| 738 f--; | |
| 739 } | |
| 740 } | |
| 741 if (*f == '\0') { | |
| 742 /* bogus format "%.1" => go backward, f points to "1" */ | |
| 743 f--; | |
| 744 } | |
| 745 if (p_width != NULL) | |
| 746 *p_width = width; | |
| 747 if (p_precision != NULL) | |
| 748 *p_precision = precision; | |
| 749 | |
| 750 /* Handle %ld, %lu, %lld and %llu. */ | |
| 751 longflag = 0; | |
| 752 longlongflag = 0; | |
| 753 size_tflag = 0; | |
| 754 | |
| 755 if (*f == 'l') { | |
| 756 if (f[1] == 'd' || f[1] == 'u' || f[1] == 'i') { | |
| 757 longflag = 1; | |
| 758 ++f; | |
| 759 } | |
| 760 #ifdef HAVE_LONG_LONG | |
| 761 else if (f[1] == 'l' && | |
| 762 (f[2] == 'd' || f[2] == 'u' || f[2] == 'i')) { | |
| 763 longlongflag = 1; | |
| 764 f += 2; | |
| 765 } | |
| 766 #endif | |
| 767 } | |
| 768 /* handle the size_t flag. */ | |
| 769 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u' || f[1] == 'i')) { | |
| 770 size_tflag = 1; | |
| 771 ++f; | |
| 772 } | |
| 773 if (p_longflag != NULL) | |
| 774 *p_longflag = longflag; | |
| 775 if (p_longlongflag != NULL) | |
| 776 *p_longlongflag = longlongflag; | |
| 777 if (p_size_tflag != NULL) | |
| 778 *p_size_tflag = size_tflag; | |
| 779 return f; | |
| 780 } | |
| 781 | |
| 782 #define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;} | 695 #define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;} |
| 783 | |
| 784 /* size of fixed-size buffer for formatting single arguments */ | |
| 785 #define ITEM_BUFFER_LEN 21 | |
| 786 /* maximum number of characters required for output of %ld. 21 characters | |
| 787 allows for 64-bit integers (in decimal) and an optional sign. */ | |
| 788 #define MAX_LONG_CHARS 21 | |
| 789 /* maximum number of characters required for output of %lld. | |
| 790 We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, | |
| 791 plus 1 for the sign. 53/22 is an upper bound for log10(256). */ | |
| 792 #define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) | |
| 793 | 696 |
| 794 PyObject * | 697 PyObject * |
| 795 PyUnicode_FromFormatV(const char *format, va_list vargs) | 698 PyUnicode_FromFormatV(const char *format, va_list vargs) |
| 796 { | 699 { |
| 797 va_list count; | 700 va_list count; |
| 798 Py_ssize_t callcount = 0; | 701 Py_ssize_t callcount = 0; |
| 799 PyObject **callresults = NULL; | 702 PyObject **callresults = NULL; |
| 800 PyObject **callresult = NULL; | 703 PyObject **callresult = NULL; |
| 801 Py_ssize_t n = 0; | 704 Py_ssize_t n = 0; |
| 802 int width = 0; | 705 int width = 0; |
| 803 int precision = 0; | 706 int precision = 0; |
| 804 int zeropad; | 707 int zeropad; |
| 805 const char* f; | 708 const char* f; |
| 806 Py_UNICODE *s; | 709 Py_UNICODE *s; |
| 807 PyObject *string; | 710 PyObject *string; |
| 808 /* used by sprintf */ | 711 /* used by sprintf */ |
| 809 char buffer[ITEM_BUFFER_LEN+1]; | 712 char buffer[21]; |
| 810 /* use abuffer instead of buffer, if we need more space | 713 /* use abuffer instead of buffer, if we need more space |
| 811 * (which can happen if there's a format specifier with width). */ | 714 * (which can happen if there's a format specifier with width). */ |
| 812 char *abuffer = NULL; | 715 char *abuffer = NULL; |
| 813 char *realbuffer; | 716 char *realbuffer; |
| 814 Py_ssize_t abuffersize = 0; | 717 Py_ssize_t abuffersize = 0; |
| 815 char fmt[61]; /* should be enough for %0width.precisionlld */ | 718 char fmt[60]; /* should be enough for %0width.precisionld */ |
| 816 const char *copy; | 719 const char *copy; |
| 817 | 720 |
| 818 Py_VA_COPY(count, vargs); | 721 #ifdef VA_LIST_IS_ARRAY |
| 722 Py_MEMCPY(count, vargs, sizeof(va_list)); |
| 723 #else |
| 724 #ifdef __va_copy |
| 725 __va_copy(count, vargs); |
| 726 #else |
| 727 count = vargs; |
| 728 #endif |
| 729 #endif |
| 819 /* step 1: count the number of %S/%R/%A/%s format specifications | 730 /* step 1: count the number of %S/%R/%A/%s format specifications |
| 820 * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ | 731 * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ |
| 821 * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the | 732 * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the |
| 822 * result in an array) */ | 733 * result in an array) */ |
| 823 for (f = format; *f; f++) { | 734 for (f = format; *f; f++) { |
| 824 if (*f == '%') { | 735 if (*f == '%') { |
| 825 /* skip width or width.precision (eg. "1.2" of "%1.2f") */ | 736 if (*(f+1)=='%') |
| 826 f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); | 737 continue; |
| 827 if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') | 738 if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A') |
| 828 ++callcount; | 739 ++callcount; |
| 829 } | 740 while (ISDIGIT((unsigned)*f)) |
| 830 else if (128 <= (unsigned char)*f) { | 741 width = (width*10) + *f++ - '0'; |
| 831 PyErr_Format(PyExc_ValueError, | 742 while (*++f && *f != '%' && !ISALPHA((unsigned)*f)) |
| 832 "PyUnicode_FromFormatV() expects an ASCII-encoded format " | 743 ; |
| 833 "string, got a non-ASCII byte: 0x%02x", | 744 if (*f == 's') |
| 834 (unsigned char)*f); | 745 ++callcount; |
| 835 return NULL; | |
| 836 } | 746 } |
| 837 } | 747 } |
| 838 /* step 2: allocate memory for the results of | 748 /* step 2: allocate memory for the results of |
| 839 * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ | 749 * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ |
| 840 if (callcount) { | 750 if (callcount) { |
| 841 callresults = PyObject_Malloc(sizeof(PyObject *)*callcount); | 751 callresults = PyObject_Malloc(sizeof(PyObject *)*callcount); |
| 842 if (!callresults) { | 752 if (!callresults) { |
| 843 PyErr_NoMemory(); | 753 PyErr_NoMemory(); |
| 844 return NULL; | 754 return NULL; |
| 845 } | 755 } |
| 846 callresult = callresults; | 756 callresult = callresults; |
| 847 } | 757 } |
| 848 /* step 3: figure out how large a buffer we need */ | 758 /* step 3: figure out how large a buffer we need */ |
| 849 for (f = format; *f; f++) { | 759 for (f = format; *f; f++) { |
| 850 if (*f == '%') { | 760 if (*f == '%') { |
| 851 #ifdef HAVE_LONG_LONG | 761 const char* p = f; |
| 852 int longlongflag; | 762 width = 0; |
| 853 #endif | 763 while (ISDIGIT((unsigned)*f)) |
| 854 const char* p; | 764 width = (width*10) + *f++ - '0'; |
| 855 | 765 while (*++f && *f != '%' && !ISALPHA((unsigned)*f)) |
| 856 p = f; | 766 ; |
| 857 f = parse_format_flags(f, &width, NULL, | 767 |
| 858 NULL, &longlongflag, NULL); | 768 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since |
| 769 * they don't affect the amount of space we reserve. |
| 770 */ |
| 771 if ((*f == 'l' || *f == 'z') && |
| 772 (f[1] == 'd' || f[1] == 'u')) |
| 773 ++f; |
| 859 | 774 |
| 860 switch (*f) { | 775 switch (*f) { |
| 861 case 'c': | 776 case 'c': |
| 862 { | |
| 863 #ifndef Py_UNICODE_WIDE | |
| 864 int ordinal = va_arg(count, int); | |
| 865 if (ordinal > 0xffff) | |
| 866 n += 2; | |
| 867 else | |
| 868 n++; | |
| 869 #else | |
| 870 (void)va_arg(count, int); | 777 (void)va_arg(count, int); |
| 871 n++; | 778 /* fall through... */ |
| 872 #endif | |
| 873 break; | |
| 874 } | |
| 875 case '%': | 779 case '%': |
| 876 n++; | 780 n++; |
| 877 break; | 781 break; |
| 878 case 'd': case 'u': case 'i': case 'x': | 782 case 'd': case 'u': case 'i': case 'x': |
| 879 (void) va_arg(count, int); | 783 (void) va_arg(count, int); |
| 880 #ifdef HAVE_LONG_LONG | 784 /* 20 bytes is enough to hold a 64-bit |
| 881 if (longlongflag) { | 785 integer. Decimal takes the most space. |
| 882 if (width < MAX_LONG_LONG_CHARS) | 786 This isn't enough for octal. |
| 883 width = MAX_LONG_LONG_CHARS; | 787 If a width is specified we need more |
| 884 } | 788 (which we allocate later). */ |
| 885 else | 789 if (width < 20) |
| 886 #endif | 790 width = 20; |
| 887 /* MAX_LONG_CHARS is enough to hold a 64-bit integer, | |
| 888 including sign. Decimal takes the most space. This | |
| 889 isn't enough for octal. If a width is specified we | |
| 890 need more (which we allocate later). */ | |
| 891 if (width < MAX_LONG_CHARS) | |
| 892 width = MAX_LONG_CHARS; | |
| 893 n += width; | 791 n += width; |
| 894 /* XXX should allow for large precision here too. */ | |
| 895 if (abuffersize < width) | 792 if (abuffersize < width) |
| 896 abuffersize = width; | 793 abuffersize = width; |
| 897 break; | 794 break; |
| 898 case 's': | 795 case 's': |
| 899 { | 796 { |
| 900 /* UTF-8 */ | 797 /* UTF-8 */ |
| 901 const char *s = va_arg(count, const char*); | 798 const char *s = va_arg(count, const char*); |
| 902 PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); | 799 PyObject *str = PyUnicode_DecodeUTF8(s, strlen(s), "replace"); |
| 903 if (!str) | 800 if (!str) |
| 904 goto fail; | 801 goto fail; |
| 905 n += PyUnicode_GET_SIZE(str); | 802 n += PyUnicode_GET_SIZE(str); |
| 906 /* Remember the str and switch to the next slot */ | 803 /* Remember the str and switch to the next slot */ |
| 907 *callresult++ = str; | 804 *callresult++ = str; |
| 908 break; | 805 break; |
| 909 } | 806 } |
| 910 case 'U': | 807 case 'U': |
| 911 { | 808 { |
| 912 PyObject *obj = va_arg(count, PyObject *); | 809 PyObject *obj = va_arg(count, PyObject *); |
| 913 assert(obj && PyUnicode_Check(obj)); | 810 assert(obj && PyUnicode_Check(obj)); |
| 914 n += PyUnicode_GET_SIZE(obj); | 811 n += PyUnicode_GET_SIZE(obj); |
| 915 break; | 812 break; |
| 916 } | 813 } |
| 917 case 'V': | 814 case 'V': |
| 918 { | 815 { |
| 919 PyObject *obj = va_arg(count, PyObject *); | 816 PyObject *obj = va_arg(count, PyObject *); |
| 920 const char *str = va_arg(count, const char *); | 817 const char *str = va_arg(count, const char *); |
| 921 PyObject *str_obj; | |
| 922 assert(obj || str); | 818 assert(obj || str); |
| 923 assert(!obj || PyUnicode_Check(obj)); | 819 assert(!obj || PyUnicode_Check(obj)); |
| 924 if (obj) { | 820 if (obj) |
| 925 n += PyUnicode_GET_SIZE(obj); | 821 n += PyUnicode_GET_SIZE(obj); |
| 926 *callresult++ = NULL; | 822 else |
| 927 } | 823 n += strlen(str); |
| 928 else { | |
| 929 str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace"); | |
| 930 if (!str_obj) | |
| 931 goto fail; | |
| 932 n += PyUnicode_GET_SIZE(str_obj); | |
| 933 *callresult++ = str_obj; | |
| 934 } | |
| 935 break; | 824 break; |
| 936 } | 825 } |
| 937 case 'S': | 826 case 'S': |
| 938 { | 827 { |
| 939 PyObject *obj = va_arg(count, PyObject *); | 828 PyObject *obj = va_arg(count, PyObject *); |
| 940 PyObject *str; | 829 PyObject *str; |
| 941 assert(obj); | 830 assert(obj); |
| 942 str = PyObject_Str(obj); | 831 str = PyObject_Str(obj); |
| 943 if (!str) | 832 if (!str) |
| 944 goto fail; | 833 goto fail; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 989 string. (we cannot just skip the | 878 string. (we cannot just skip the |
| 990 code, since there's no way to know | 879 code, since there's no way to know |
| 991 what's in the argument list) */ | 880 what's in the argument list) */ |
| 992 n += strlen(p); | 881 n += strlen(p); |
| 993 goto expand; | 882 goto expand; |
| 994 } | 883 } |
| 995 } else | 884 } else |
| 996 n++; | 885 n++; |
| 997 } | 886 } |
| 998 expand: | 887 expand: |
| 999 if (abuffersize > ITEM_BUFFER_LEN) { | 888 if (abuffersize > 20) { |
| 1000 /* add 1 for sprintf's trailing null byte */ | 889 abuffer = PyObject_Malloc(abuffersize); |
| 1001 abuffer = PyObject_Malloc(abuffersize + 1); | |
| 1002 if (!abuffer) { | 890 if (!abuffer) { |
| 1003 PyErr_NoMemory(); | 891 PyErr_NoMemory(); |
| 1004 goto fail; | 892 goto fail; |
| 1005 } | 893 } |
| 1006 realbuffer = abuffer; | 894 realbuffer = abuffer; |
| 1007 } | 895 } |
| 1008 else | 896 else |
| 1009 realbuffer = buffer; | 897 realbuffer = buffer; |
| 1010 /* step 4: fill the buffer */ | 898 /* step 4: fill the buffer */ |
| 1011 /* Since we've analyzed how much space we need for the worst case, | 899 /* Since we've analyzed how much space we need for the worst case, |
| 1012 we don't have to resize the string. | 900 we don't have to resize the string. |
| 1013 There can be no errors beyond this point. */ | 901 There can be no errors beyond this point. */ |
| 1014 string = PyUnicode_FromUnicode(NULL, n); | 902 string = PyUnicode_FromUnicode(NULL, n); |
| 1015 if (!string) | 903 if (!string) |
| 1016 goto fail; | 904 goto fail; |
| 1017 | 905 |
| 1018 s = PyUnicode_AS_UNICODE(string); | 906 s = PyUnicode_AS_UNICODE(string); |
| 1019 callresult = callresults; | 907 callresult = callresults; |
| 1020 | 908 |
| 1021 for (f = format; *f; f++) { | 909 for (f = format; *f; f++) { |
| 1022 if (*f == '%') { | 910 if (*f == '%') { |
| 1023 const char* p; | 911 const char* p = f++; |
| 1024 int longflag; | 912 int longflag = 0; |
| 1025 int longlongflag; | 913 int size_tflag = 0; |
| 1026 int size_tflag; | 914 zeropad = (*f == '0'); |
| 1027 | 915 /* parse the width.precision part */ |
| 1028 p = f; | 916 width = 0; |
| 1029 zeropad = (f[1] == '0'); | 917 while (ISDIGIT((unsigned)*f)) |
| 1030 f = parse_format_flags(f, &width, &precision, | 918 width = (width*10) + *f++ - '0'; |
| 1031 &longflag, &longlongflag, &size_tflag); | 919 precision = 0; |
| 920 if (*f == '.') { |
| 921 f++; |
| 922 while (ISDIGIT((unsigned)*f)) |
| 923 precision = (precision*10) + *f++ - '0'; |
| 924 } |
| 925 /* handle the long flag, but only for %ld and %lu. |
| 926 others can be added when necessary. */ |
| 927 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { |
| 928 longflag = 1; |
| 929 ++f; |
| 930 } |
| 931 /* handle the size_t flag. */ |
| 932 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { |
| 933 size_tflag = 1; |
| 934 ++f; |
| 935 } |
| 1032 | 936 |
| 1033 switch (*f) { | 937 switch (*f) { |
| 1034 case 'c': | 938 case 'c': |
| 1035 { | 939 *s++ = va_arg(vargs, int); |
| 1036 int ordinal = va_arg(vargs, int); | |
| 1037 #ifndef Py_UNICODE_WIDE | |
| 1038 if (ordinal > 0xffff) { | |
| 1039 ordinal -= 0x10000; | |
| 1040 *s++ = 0xD800 | (ordinal >> 10); | |
| 1041 *s++ = 0xDC00 | (ordinal & 0x3FF); | |
| 1042 } else | |
| 1043 #endif | |
| 1044 *s++ = ordinal; | |
| 1045 break; | 940 break; |
| 1046 } | |
| 1047 case 'i': | |
| 1048 case 'd': | 941 case 'd': |
| 1049 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, | 942 makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd
'); |
| 1050 width, precision, *f); | |
| 1051 if (longflag) | 943 if (longflag) |
| 1052 sprintf(realbuffer, fmt, va_arg(vargs, long)); | 944 sprintf(realbuffer, fmt, va_arg(vargs, long)); |
| 1053 #ifdef HAVE_LONG_LONG | |
| 1054 else if (longlongflag) | |
| 1055 sprintf(realbuffer, fmt, va_arg(vargs, PY_LONG_LONG)); | |
| 1056 #endif | |
| 1057 else if (size_tflag) | 945 else if (size_tflag) |
| 1058 sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t)); | 946 sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t)); |
| 1059 else | 947 else |
| 1060 sprintf(realbuffer, fmt, va_arg(vargs, int)); | 948 sprintf(realbuffer, fmt, va_arg(vargs, int)); |
| 1061 appendstring(realbuffer); | 949 appendstring(realbuffer); |
| 1062 break; | 950 break; |
| 1063 case 'u': | 951 case 'u': |
| 1064 makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, | 952 makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u
'); |
| 1065 width, precision, 'u'); | |
| 1066 if (longflag) | 953 if (longflag) |
| 1067 sprintf(realbuffer, fmt, va_arg(vargs, unsigned long)); | 954 sprintf(realbuffer, fmt, va_arg(vargs, unsigned long)); |
| 1068 #ifdef HAVE_LONG_LONG | |
| 1069 else if (longlongflag) | |
| 1070 sprintf(realbuffer, fmt, va_arg(vargs, | |
| 1071 unsigned PY_LONG_LONG)); | |
| 1072 #endif | |
| 1073 else if (size_tflag) | 955 else if (size_tflag) |
| 1074 sprintf(realbuffer, fmt, va_arg(vargs, size_t)); | 956 sprintf(realbuffer, fmt, va_arg(vargs, size_t)); |
| 1075 else | 957 else |
| 1076 sprintf(realbuffer, fmt, va_arg(vargs, unsigned int)); | 958 sprintf(realbuffer, fmt, va_arg(vargs, unsigned int)); |
| 1077 appendstring(realbuffer); | 959 appendstring(realbuffer); |
| 1078 break; | 960 break; |
| 961 case 'i': |
| 962 makefmt(fmt, 0, 0, zeropad, width, precision, 'i'); |
| 963 sprintf(realbuffer, fmt, va_arg(vargs, int)); |
| 964 appendstring(realbuffer); |
| 965 break; |
| 1079 case 'x': | 966 case 'x': |
| 1080 makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); | 967 makefmt(fmt, 0, 0, zeropad, width, precision, 'x'); |
| 1081 sprintf(realbuffer, fmt, va_arg(vargs, int)); | 968 sprintf(realbuffer, fmt, va_arg(vargs, int)); |
| 1082 appendstring(realbuffer); | 969 appendstring(realbuffer); |
| 1083 break; | 970 break; |
| 1084 case 's': | 971 case 's': |
| 1085 { | 972 { |
| 1086 /* unused, since we already have the result */ | 973 /* unused, since we already have the result */ |
| 1087 (void) va_arg(vargs, char *); | 974 (void) va_arg(vargs, char *); |
| 1088 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), | 975 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), |
| 1089 PyUnicode_GET_SIZE(*callresult)); | 976 PyUnicode_GET_SIZE(*callresult)); |
| 1090 s += PyUnicode_GET_SIZE(*callresult); | 977 s += PyUnicode_GET_SIZE(*callresult); |
| 1091 /* We're done with the unicode()/repr() => forget it */ | 978 /* We're done with the unicode()/repr() => forget it */ |
| 1092 Py_DECREF(*callresult); | 979 Py_DECREF(*callresult); |
| 1093 /* switch to next unicode()/repr() result */ | 980 /* switch to next unicode()/repr() result */ |
| 1094 ++callresult; | 981 ++callresult; |
| 1095 break; | 982 break; |
| 1096 } | 983 } |
| 1097 case 'U': | 984 case 'U': |
| 1098 { | 985 { |
| 1099 PyObject *obj = va_arg(vargs, PyObject *); | 986 PyObject *obj = va_arg(vargs, PyObject *); |
| 1100 Py_ssize_t size = PyUnicode_GET_SIZE(obj); | 987 Py_ssize_t size = PyUnicode_GET_SIZE(obj); |
| 1101 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); | 988 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); |
| 1102 s += size; | 989 s += size; |
| 1103 break; | 990 break; |
| 1104 } | 991 } |
| 1105 case 'V': | 992 case 'V': |
| 1106 { | 993 { |
| 1107 PyObject *obj = va_arg(vargs, PyObject *); | 994 PyObject *obj = va_arg(vargs, PyObject *); |
| 1108 va_arg(vargs, const char *); | 995 const char *str = va_arg(vargs, const char *); |
| 1109 if (obj) { | 996 if (obj) { |
| 1110 Py_ssize_t size = PyUnicode_GET_SIZE(obj); | 997 Py_ssize_t size = PyUnicode_GET_SIZE(obj); |
| 1111 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); | 998 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size); |
| 1112 s += size; | 999 s += size; |
| 1113 } else { | 1000 } else { |
| 1114 Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult), | 1001 appendstring(str); |
| 1115 PyUnicode_GET_SIZE(*callresult)); | |
| 1116 s += PyUnicode_GET_SIZE(*callresult); | |
| 1117 Py_DECREF(*callresult); | |
| 1118 } | 1002 } |
| 1119 ++callresult; | |
| 1120 break; | 1003 break; |
| 1121 } | 1004 } |
| 1122 case 'S': | 1005 case 'S': |
| 1123 case 'R': | 1006 case 'R': |
| 1124 case 'A': | |
| 1125 { | 1007 { |
| 1126 Py_UNICODE *ucopy; | 1008 Py_UNICODE *ucopy; |
| 1127 Py_ssize_t usize; | 1009 Py_ssize_t usize; |
| 1128 Py_ssize_t upos; | 1010 Py_ssize_t upos; |
| 1129 /* unused, since we already have the result */ | 1011 /* unused, since we already have the result */ |
| 1130 (void) va_arg(vargs, PyObject *); | 1012 (void) va_arg(vargs, PyObject *); |
| 1131 ucopy = PyUnicode_AS_UNICODE(*callresult); | 1013 ucopy = PyUnicode_AS_UNICODE(*callresult); |
| 1132 usize = PyUnicode_GET_SIZE(*callresult); | 1014 usize = PyUnicode_GET_SIZE(*callresult); |
| 1133 for (upos = 0; upos<usize;) | 1015 for (upos = 0; upos<usize;) |
| 1134 *s++ = ucopy[upos++]; | 1016 *s++ = ucopy[upos++]; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 1150 } | 1032 } |
| 1151 appendstring(buffer); | 1033 appendstring(buffer); |
| 1152 break; | 1034 break; |
| 1153 case '%': | 1035 case '%': |
| 1154 *s++ = '%'; | 1036 *s++ = '%'; |
| 1155 break; | 1037 break; |
| 1156 default: | 1038 default: |
| 1157 appendstring(p); | 1039 appendstring(p); |
| 1158 goto end; | 1040 goto end; |
| 1159 } | 1041 } |
| 1160 } | 1042 } else |
| 1161 else | |
| 1162 *s++ = *f; | 1043 *s++ = *f; |
| 1163 } | 1044 } |
| 1164 | 1045 |
| 1165 end: | 1046 end: |
| 1166 if (callresults) | 1047 if (callresults) |
| 1167 PyObject_Free(callresults); | 1048 PyObject_Free(callresults); |
| 1168 if (abuffer) | 1049 if (abuffer) |
| 1169 PyObject_Free(abuffer); | 1050 PyObject_Free(abuffer); |
| 1170 PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string)); | 1051 PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string)); |
| 1171 return string; | 1052 return string; |
| 1172 fail: | 1053 fail: |
| 1173 if (callresults) { | 1054 if (callresults) { |
| 1174 PyObject **callresult2 = callresults; | 1055 PyObject **callresult2 = callresults; |
| 1175 while (callresult2 < callresult) { | 1056 while (callresult2 < callresult) { |
| 1176 Py_XDECREF(*callresult2); | 1057 Py_DECREF(*callresult2); |
| 1177 ++callresult2; | 1058 ++callresult2; |
| 1178 } | 1059 } |
| 1179 PyObject_Free(callresults); | 1060 PyObject_Free(callresults); |
| 1180 } | 1061 } |
| 1181 if (abuffer) | 1062 if (abuffer) |
| 1182 PyObject_Free(abuffer); | 1063 PyObject_Free(abuffer); |
| 1183 return NULL; | 1064 return NULL; |
| 1184 } | 1065 } |
| 1185 | 1066 |
| 1186 #undef appendstring | 1067 #undef appendstring |
| 1187 | 1068 |
| 1188 PyObject * | 1069 PyObject * |
| 1189 PyUnicode_FromFormat(const char *format, ...) | 1070 PyUnicode_FromFormat(const char *format, ...) |
| 1190 { | 1071 { |
| 1191 PyObject* ret; | 1072 PyObject* ret; |
| 1192 va_list vargs; | 1073 va_list vargs; |
| 1193 | 1074 |
| 1194 #ifdef HAVE_STDARG_PROTOTYPES | 1075 #ifdef HAVE_STDARG_PROTOTYPES |
| 1195 va_start(vargs, format); | 1076 va_start(vargs, format); |
| 1196 #else | 1077 #else |
| 1197 va_start(vargs); | 1078 va_start(vargs); |
| 1198 #endif | 1079 #endif |
| 1199 ret = PyUnicode_FromFormatV(format, vargs); | 1080 ret = PyUnicode_FromFormatV(format, vargs); |
| 1200 va_end(vargs); | 1081 va_end(vargs); |
| 1201 return ret; | 1082 return ret; |
| 1202 } | 1083 } |
| 1203 | 1084 |
| 1204 /* Helper function for PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(): | 1085 Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, |
| 1205 convert a Unicode object to a wide character string. | 1086 wchar_t *w, |
| 1206 | 1087 Py_ssize_t size) |
| 1207 - If w is NULL: return the number of wide characters (including the nul | |
| 1208 character) required to convert the unicode object. Ignore size argument. | |
| 1209 | |
| 1210 - Otherwise: return the number of wide characters (excluding the nul | |
| 1211 character) written into w. Write at most size wide characters (including | |
| 1212 the nul character). */ | |
| 1213 static Py_ssize_t | |
| 1214 unicode_aswidechar(PyUnicodeObject *unicode, | |
| 1215 wchar_t *w, | |
| 1216 Py_ssize_t size) | |
| 1217 { | |
| 1218 #if Py_UNICODE_SIZE == SIZEOF_WCHAR_T | |
| 1219 Py_ssize_t res; | |
| 1220 if (w != NULL) { | |
| 1221 res = PyUnicode_GET_SIZE(unicode); | |
| 1222 if (size > res) | |
| 1223 size = res + 1; | |
| 1224 else | |
| 1225 res = size; | |
| 1226 memcpy(w, unicode->str, size * sizeof(wchar_t)); | |
| 1227 return res; | |
| 1228 } | |
| 1229 else | |
| 1230 return PyUnicode_GET_SIZE(unicode) + 1; | |
| 1231 #elif Py_UNICODE_SIZE == 2 && SIZEOF_WCHAR_T == 4 | |
| 1232 register const Py_UNICODE *u; | |
| 1233 const Py_UNICODE *uend; | |
| 1234 const wchar_t *worig, *wend; | |
| 1235 Py_ssize_t nchar; | |
| 1236 | |
| 1237 u = PyUnicode_AS_UNICODE(unicode); | |
| 1238 uend = u + PyUnicode_GET_SIZE(unicode); | |
| 1239 if (w != NULL) { | |
| 1240 worig = w; | |
| 1241 wend = w + size; | |
| 1242 while (u != uend && w != wend) { | |
| 1243 if (0xD800 <= u[0] && u[0] <= 0xDBFF | |
| 1244 && 0xDC00 <= u[1] && u[1] <= 0xDFFF) | |
| 1245 { | |
| 1246 *w = (((u[0] & 0x3FF) << 10) | (u[1] & 0x3FF)) + 0x10000; | |
| 1247 u += 2; | |
| 1248 } | |
| 1249 else { | |
| 1250 *w = *u; | |
| 1251 u++; | |
| 1252 } | |
| 1253 w++; | |
| 1254 } | |
| 1255 if (w != wend) | |
| 1256 *w = L'\0'; | |
| 1257 return w - worig; | |
| 1258 } | |
| 1259 else { | |
| 1260 nchar = 1; /* nul character at the end */ | |
| 1261 while (u != uend) { | |
| 1262 if (0xD800 <= u[0] && u[0] <= 0xDBFF | |
| 1263 && 0xDC00 <= u[1] && u[1] <= 0xDFFF) | |
| 1264 u += 2; | |
| 1265 else | |
| 1266 u++; | |
| 1267 nchar++; | |
| 1268 } | |
| 1269 } | |
| 1270 return nchar; | |
| 1271 #elif Py_UNICODE_SIZE == 4 && SIZEOF_WCHAR_T == 2 | |
| 1272 register Py_UNICODE *u, *uend, ordinal; | |
| 1273 register Py_ssize_t i; | |
| 1274 wchar_t *worig, *wend; | |
| 1275 Py_ssize_t nchar; | |
| 1276 | |
| 1277 u = PyUnicode_AS_UNICODE(unicode); | |
| 1278 uend = u + PyUnicode_GET_SIZE(u); | |
| 1279 if (w != NULL) { | |
| 1280 worig = w; | |
| 1281 wend = w + size; | |
| 1282 while (u != uend && w != wend) { | |
| 1283 ordinal = *u; | |
| 1284 if (ordinal > 0xffff) { | |
| 1285 ordinal -= 0x10000; | |
| 1286 *w++ = 0xD800 | (ordinal >> 10); | |
| 1287 *w++ = 0xDC00 | (ordinal & 0x3FF); | |
| 1288 } | |
| 1289 else | |
| 1290 *w++ = ordinal; | |
| 1291 u++; | |
| 1292 } | |
| 1293 if (w != wend) | |
| 1294 *w = 0; | |
| 1295 return w - worig; | |
| 1296 } | |
| 1297 else { | |
| 1298 nchar = 1; /* nul character */ | |
| 1299 while (u != uend) { | |
| 1300 if (*u > 0xffff) | |
| 1301 nchar += 2; | |
| 1302 else | |
| 1303 nchar++; | |
| 1304 u++; | |
| 1305 } | |
| 1306 return nchar; | |
| 1307 } | |
| 1308 #else | |
| 1309 # error "unsupported wchar_t and Py_UNICODE sizes, see issue #8670" | |
| 1310 #endif | |
| 1311 } | |
| 1312 | |
| 1313 Py_ssize_t | |
| 1314 PyUnicode_AsWideChar(PyObject *unicode, | |
| 1315 wchar_t *w, | |
| 1316 Py_ssize_t size) | |
| 1317 { | 1088 { |
| 1318 if (unicode == NULL) { | 1089 if (unicode == NULL) { |
| 1319 PyErr_BadInternalCall(); | 1090 PyErr_BadInternalCall(); |
| 1320 return -1; | 1091 return -1; |
| 1321 } | 1092 } |
| 1322 return unicode_aswidechar((PyUnicodeObject*)unicode, w, size); | 1093 |
| 1323 } | 1094 /* If possible, try to copy the 0-termination as well */ |
| 1324 | 1095 if (size > PyUnicode_GET_SIZE(unicode)) |
| 1325 wchar_t* | 1096 size = PyUnicode_GET_SIZE(unicode) + 1; |
| 1326 PyUnicode_AsWideCharString(PyObject *unicode, | 1097 |
| 1327 Py_ssize_t *size) | 1098 #ifdef HAVE_USABLE_WCHAR_T |
| 1328 { | 1099 memcpy(w, unicode->str, size * sizeof(wchar_t)); |
| 1329 wchar_t* buffer; | 1100 #else |
| 1330 Py_ssize_t buflen; | 1101 { |
| 1331 | 1102 register Py_UNICODE *u; |
| 1332 if (unicode == NULL) { | 1103 register Py_ssize_t i; |
| 1333 PyErr_BadInternalCall(); | 1104 u = PyUnicode_AS_UNICODE(unicode); |
| 1334 return NULL; | 1105 for (i = size; i > 0; i--) |
| 1335 } | 1106 *w++ = *u++; |
| 1336 | 1107 } |
| 1337 buflen = unicode_aswidechar((PyUnicodeObject *)unicode, NULL, 0); | |
| 1338 if (PY_SSIZE_T_MAX / sizeof(wchar_t) < buflen) { | |
| 1339 PyErr_NoMemory(); | |
| 1340 return NULL; | |
| 1341 } | |
| 1342 | |
| 1343 buffer = PyMem_MALLOC(buflen * sizeof(wchar_t)); | |
| 1344 if (buffer == NULL) { | |
| 1345 PyErr_NoMemory(); | |
| 1346 return NULL; | |
| 1347 } | |
| 1348 buflen = unicode_aswidechar((PyUnicodeObject *)unicode, buffer, buflen); | |
| 1349 if (size != NULL) | |
| 1350 *size = buflen; | |
| 1351 return buffer; | |
| 1352 } | |
| 1353 | |
| 1354 #endif | 1108 #endif |
| 1355 | 1109 |
| 1356 PyObject * | 1110 if (size > PyUnicode_GET_SIZE(unicode)) |
| 1357 PyUnicode_FromOrdinal(int ordinal) | 1111 return PyUnicode_GET_SIZE(unicode); |
| 1112 else |
| 1113 return size; |
| 1114 } |
| 1115 |
| 1116 #endif |
| 1117 |
| 1118 PyObject *PyUnicode_FromOrdinal(int ordinal) |
| 1358 { | 1119 { |
| 1359 Py_UNICODE s[2]; | 1120 Py_UNICODE s[2]; |
| 1360 | 1121 |
| 1361 if (ordinal < 0 || ordinal > 0x10ffff) { | 1122 if (ordinal < 0 || ordinal > 0x10ffff) { |
| 1362 PyErr_SetString(PyExc_ValueError, | 1123 PyErr_SetString(PyExc_ValueError, |
| 1363 "chr() arg not in range(0x110000)"); | 1124 "chr() arg not in range(0x110000)"); |
| 1364 return NULL; | 1125 return NULL; |
| 1365 } | 1126 } |
| 1366 | 1127 |
| 1367 #ifndef Py_UNICODE_WIDE | 1128 #ifndef Py_UNICODE_WIDE |
| 1368 if (ordinal > 0xffff) { | 1129 if (ordinal > 0xffff) { |
| 1369 ordinal -= 0x10000; | 1130 ordinal -= 0x10000; |
| 1370 s[0] = 0xD800 | (ordinal >> 10); | 1131 s[0] = 0xD800 | (ordinal >> 10); |
| 1371 s[1] = 0xDC00 | (ordinal & 0x3FF); | 1132 s[1] = 0xDC00 | (ordinal & 0x3FF); |
| 1372 return PyUnicode_FromUnicode(s, 2); | 1133 return PyUnicode_FromUnicode(s, 2); |
| 1373 } | 1134 } |
| 1374 #endif | 1135 #endif |
| 1375 | 1136 |
| 1376 s[0] = (Py_UNICODE)ordinal; | 1137 s[0] = (Py_UNICODE)ordinal; |
| 1377 return PyUnicode_FromUnicode(s, 1); | 1138 return PyUnicode_FromUnicode(s, 1); |
| 1378 } | 1139 } |
| 1379 | 1140 |
| 1380 PyObject * | 1141 PyObject *PyUnicode_FromObject(register PyObject *obj) |
| 1381 PyUnicode_FromObject(register PyObject *obj) | |
| 1382 { | 1142 { |
| 1383 /* XXX Perhaps we should make this API an alias of | 1143 /* XXX Perhaps we should make this API an alias of |
| 1384 PyObject_Str() instead ?! */ | 1144 PyObject_Str() instead ?! */ |
| 1385 if (PyUnicode_CheckExact(obj)) { | 1145 if (PyUnicode_CheckExact(obj)) { |
| 1386 Py_INCREF(obj); | 1146 Py_INCREF(obj); |
| 1387 return obj; | 1147 return obj; |
| 1388 } | 1148 } |
| 1389 if (PyUnicode_Check(obj)) { | 1149 if (PyUnicode_Check(obj)) { |
| 1390 /* For a Unicode subtype that's not a Unicode object, | 1150 /* For a Unicode subtype that's not a Unicode object, |
| 1391 return a true Unicode object with the same data. */ | 1151 return a true Unicode object with the same data. */ |
| 1392 return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj), | 1152 return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj), |
| 1393 PyUnicode_GET_SIZE(obj)); | 1153 PyUnicode_GET_SIZE(obj)); |
| 1394 } | 1154 } |
| 1395 PyErr_Format(PyExc_TypeError, | 1155 PyErr_Format(PyExc_TypeError, |
| 1396 "Can't convert '%.100s' object to str implicitly", | 1156 "Can't convert '%.100s' object to str implicitly", |
| 1397 Py_TYPE(obj)->tp_name); | 1157 Py_TYPE(obj)->tp_name); |
| 1398 return NULL; | 1158 return NULL; |
| 1399 } | 1159 } |
| 1400 | 1160 |
| 1401 PyObject * | 1161 PyObject *PyUnicode_FromEncodedObject(register PyObject *obj, |
| 1402 PyUnicode_FromEncodedObject(register PyObject *obj, | 1162 const char *encoding, |
| 1403 » » » const char *encoding, | 1163 const char *errors) |
| 1404 » » » const char *errors) | |
| 1405 { | 1164 { |
| 1406 Py_buffer buffer; | 1165 Py_buffer buffer; |
| 1407 PyObject *v; | 1166 PyObject *v; |
| 1408 | 1167 |
| 1409 if (obj == NULL) { | 1168 if (obj == NULL) { |
| 1410 PyErr_BadInternalCall(); | 1169 PyErr_BadInternalCall(); |
| 1411 return NULL; | 1170 return NULL; |
| 1412 } | 1171 } |
| 1413 | 1172 |
| 1414 /* Decoding bytes objects is the most common case and should be fast */ | 1173 /* Decoding bytes objects is the most common case and should be fast */ |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1444 Py_INCREF(unicode_empty); | 1203 Py_INCREF(unicode_empty); |
| 1445 v = (PyObject *) unicode_empty; | 1204 v = (PyObject *) unicode_empty; |
| 1446 } | 1205 } |
| 1447 else | 1206 else |
| 1448 v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); | 1207 v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); |
| 1449 | 1208 |
| 1450 PyBuffer_Release(&buffer); | 1209 PyBuffer_Release(&buffer); |
| 1451 return v; | 1210 return v; |
| 1452 } | 1211 } |
| 1453 | 1212 |
| 1454 /* Convert encoding to lower case and replace '_' with '-' in order to | 1213 PyObject *PyUnicode_Decode(const char *s, |
| 1455 catch e.g. UTF_8. Return 0 on error (encoding is longer than lower_len-1), | 1214 Py_ssize_t size, |
| 1456 1 on success. */ | 1215 const char *encoding, |
| 1457 static int | 1216 const char *errors) |
| 1458 normalize_encoding(const char *encoding, | 1217 { |
| 1459 char *lower, | 1218 PyObject *buffer = NULL, *unicode; |
| 1460 size_t lower_len) | 1219 Py_buffer info; |
| 1461 { | 1220 char lower[20]; /* Enough for any encoding name we recognize */ |
| 1221 char *l; |
| 1462 const char *e; | 1222 const char *e; |
| 1463 char *l; | 1223 |
| 1464 char *l_end; | 1224 if (encoding == NULL) |
| 1465 | 1225 encoding = PyUnicode_GetDefaultEncoding(); |
| 1226 |
| 1227 /* Convert encoding to lower case and replace '_' with '-' in order to |
| 1228 catch e.g. UTF_8 */ |
| 1466 e = encoding; | 1229 e = encoding; |
| 1467 l = lower; | 1230 l = lower; |
| 1468 l_end = &lower[lower_len - 1]; | 1231 while (*e && l < &lower[(sizeof lower) - 2]) { |
| 1469 while (*e) { | 1232 if (ISUPPER(*e)) { |
| 1470 if (l == l_end) | 1233 *l++ = TOLOWER(*e++); |
| 1471 return 0; | |
| 1472 if (Py_ISUPPER(*e)) { | |
| 1473 *l++ = Py_TOLOWER(*e++); | |
| 1474 } | 1234 } |
| 1475 else if (*e == '_') { | 1235 else if (*e == '_') { |
| 1476 *l++ = '-'; | 1236 *l++ = '-'; |
| 1477 e++; | 1237 e++; |
| 1478 } | 1238 } |
| 1479 else { | 1239 else { |
| 1480 *l++ = *e++; | 1240 *l++ = *e++; |
| 1481 } | 1241 } |
| 1482 } | 1242 } |
| 1483 *l = '\0'; | 1243 *l = '\0'; |
| 1484 return 1; | 1244 |
| 1485 } | 1245 /* Shortcuts for common default encodings */ |
| 1486 | 1246 if (strcmp(lower, "utf-8") == 0) |
| 1487 PyObject * | |
| 1488 PyUnicode_Decode(const char *s, | |
| 1489 » » Py_ssize_t size, | |
| 1490 » » const char *encoding, | |
| 1491 » » const char *errors) | |
| 1492 { | |
| 1493 PyObject *buffer = NULL, *unicode; | |
| 1494 Py_buffer info; | |
| 1495 char lower[11]; /* Enough for any encoding shortcut */ | |
| 1496 | |
| 1497 if (encoding == NULL) | |
| 1498 return PyUnicode_DecodeUTF8(s, size, errors); | 1247 return PyUnicode_DecodeUTF8(s, size, errors); |
| 1499 | 1248 else if ((strcmp(lower, "latin-1") == 0) || |
| 1500 /* Shortcuts for common default encodings */ | 1249 (strcmp(lower, "iso-8859-1") == 0)) |
| 1501 if (normalize_encoding(encoding, lower, sizeof(lower))) { | 1250 return PyUnicode_DecodeLatin1(s, size, errors); |
| 1502 if ((strcmp(lower, "utf-8") == 0) || | |
| 1503 (strcmp(lower, "utf8") == 0)) | |
| 1504 return PyUnicode_DecodeUTF8(s, size, errors); | |
| 1505 else if ((strcmp(lower, "latin-1") == 0) || | |
| 1506 (strcmp(lower, "latin1") == 0) || | |
| 1507 (strcmp(lower, "iso-8859-1") == 0)) | |
| 1508 return PyUnicode_DecodeLatin1(s, size, errors); | |
| 1509 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | 1251 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) |
| 1510 else if (strcmp(lower, "mbcs") == 0) | 1252 else if (strcmp(lower, "mbcs") == 0) |
| 1511 return PyUnicode_DecodeMBCS(s, size, errors); | 1253 return PyUnicode_DecodeMBCS(s, size, errors); |
| 1512 #endif | 1254 #endif |
| 1513 else if (strcmp(lower, "ascii") == 0) | 1255 else if (strcmp(lower, "ascii") == 0) |
| 1514 return PyUnicode_DecodeASCII(s, size, errors); | 1256 return PyUnicode_DecodeASCII(s, size, errors); |
| 1515 else if (strcmp(lower, "utf-16") == 0) | 1257 else if (strcmp(lower, "utf-16") == 0) |
| 1516 return PyUnicode_DecodeUTF16(s, size, errors, 0); | 1258 return PyUnicode_DecodeUTF16(s, size, errors, 0); |
| 1517 else if (strcmp(lower, "utf-32") == 0) | 1259 else if (strcmp(lower, "utf-32") == 0) |
| 1518 return PyUnicode_DecodeUTF32(s, size, errors, 0); | 1260 return PyUnicode_DecodeUTF32(s, size, errors, 0); |
| 1519 } | |
| 1520 | 1261 |
| 1521 /* Decode via the codec registry */ | 1262 /* Decode via the codec registry */ |
| 1522 buffer = NULL; | 1263 buffer = NULL; |
| 1523 if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0) | 1264 if (PyBuffer_FillInfo(&info, NULL, (void *)s, size, 1, PyBUF_FULL_RO) < 0) |
| 1524 goto onError; | 1265 goto onError; |
| 1525 buffer = PyMemoryView_FromBuffer(&info); | 1266 buffer = PyMemoryView_FromBuffer(&info); |
| 1526 if (buffer == NULL) | 1267 if (buffer == NULL) |
| 1527 goto onError; | 1268 goto onError; |
| 1528 unicode = PyCodec_Decode(buffer, encoding, errors); | 1269 unicode = PyCodec_Decode(buffer, encoding, errors); |
| 1529 if (unicode == NULL) | 1270 if (unicode == NULL) |
| 1530 goto onError; | 1271 goto onError; |
| 1531 if (!PyUnicode_Check(unicode)) { | 1272 if (!PyUnicode_Check(unicode)) { |
| 1532 PyErr_Format(PyExc_TypeError, | 1273 PyErr_Format(PyExc_TypeError, |
| 1533 "decoder did not return a str object (type=%.400s)", | 1274 "decoder did not return a str object (type=%.400s)", |
| 1534 Py_TYPE(unicode)->tp_name); | 1275 Py_TYPE(unicode)->tp_name); |
| 1535 Py_DECREF(unicode); | 1276 Py_DECREF(unicode); |
| 1536 goto onError; | 1277 goto onError; |
| 1537 } | 1278 } |
| 1538 Py_DECREF(buffer); | 1279 Py_DECREF(buffer); |
| 1539 return unicode; | 1280 return unicode; |
| 1540 | 1281 |
| 1541 onError: | 1282 onError: |
| 1542 Py_XDECREF(buffer); | 1283 Py_XDECREF(buffer); |
| 1543 return NULL; | 1284 return NULL; |
| 1544 } | 1285 } |
| 1545 | 1286 |
| 1546 PyObject * | 1287 PyObject *PyUnicode_AsDecodedObject(PyObject *unicode, |
| 1547 PyUnicode_AsDecodedObject(PyObject *unicode, | 1288 const char *encoding, |
| 1548 » » » const char *encoding, | 1289 const char *errors) |
| 1549 » » » const char *errors) | |
| 1550 { | 1290 { |
| 1551 PyObject *v; | 1291 PyObject *v; |
| 1552 | 1292 |
| 1553 if (!PyUnicode_Check(unicode)) { | 1293 if (!PyUnicode_Check(unicode)) { |
| 1554 PyErr_BadArgument(); | 1294 PyErr_BadArgument(); |
| 1555 goto onError; | 1295 goto onError; |
| 1556 } | 1296 } |
| 1557 | 1297 |
| 1558 if (encoding == NULL) | 1298 if (encoding == NULL) |
| 1559 encoding = PyUnicode_GetDefaultEncoding(); | 1299 encoding = PyUnicode_GetDefaultEncoding(); |
| 1560 | 1300 |
| 1561 /* Decode via the codec registry */ | 1301 /* Decode via the codec registry */ |
| 1562 v = PyCodec_Decode(unicode, encoding, errors); | 1302 v = PyCodec_Decode(unicode, encoding, errors); |
| 1563 if (v == NULL) | 1303 if (v == NULL) |
| 1564 goto onError; | 1304 goto onError; |
| 1565 return v; | 1305 return v; |
| 1566 | 1306 |
| 1567 onError: | 1307 onError: |
| 1568 return NULL; | 1308 return NULL; |
| 1569 } | 1309 } |
| 1570 | 1310 |
| 1571 PyObject * | 1311 PyObject *PyUnicode_AsDecodedUnicode(PyObject *unicode, |
| 1572 PyUnicode_AsDecodedUnicode(PyObject *unicode, | 1312 const char *encoding, |
| 1573 » » » const char *encoding, | 1313 const char *errors) |
| 1574 » » » const char *errors) | |
| 1575 { | 1314 { |
| 1576 PyObject *v; | 1315 PyObject *v; |
| 1577 | 1316 |
| 1578 if (!PyUnicode_Check(unicode)) { | 1317 if (!PyUnicode_Check(unicode)) { |
| 1579 PyErr_BadArgument(); | 1318 PyErr_BadArgument(); |
| 1580 goto onError; | 1319 goto onError; |
| 1581 } | 1320 } |
| 1582 | 1321 |
| 1583 if (encoding == NULL) | 1322 if (encoding == NULL) |
| 1584 encoding = PyUnicode_GetDefaultEncoding(); | 1323 encoding = PyUnicode_GetDefaultEncoding(); |
| 1585 | 1324 |
| 1586 /* Decode via the codec registry */ | 1325 /* Decode via the codec registry */ |
| 1587 v = PyCodec_Decode(unicode, encoding, errors); | 1326 v = PyCodec_Decode(unicode, encoding, errors); |
| 1588 if (v == NULL) | 1327 if (v == NULL) |
| 1589 goto onError; | 1328 goto onError; |
| 1590 if (!PyUnicode_Check(v)) { | 1329 if (!PyUnicode_Check(v)) { |
| 1591 PyErr_Format(PyExc_TypeError, | 1330 PyErr_Format(PyExc_TypeError, |
| 1592 "decoder did not return a str object (type=%.400s)", | 1331 "decoder did not return a str object (type=%.400s)", |
| 1593 Py_TYPE(v)->tp_name); | 1332 Py_TYPE(v)->tp_name); |
| 1594 Py_DECREF(v); | 1333 Py_DECREF(v); |
| 1595 goto onError; | 1334 goto onError; |
| 1596 } | 1335 } |
| 1597 return v; | 1336 return v; |
| 1598 | 1337 |
| 1599 onError: | 1338 onError: |
| 1600 return NULL; | 1339 return NULL; |
| 1601 } | 1340 } |
| 1602 | 1341 |
| 1603 PyObject * | 1342 PyObject *PyUnicode_Encode(const Py_UNICODE *s, |
| 1604 PyUnicode_Encode(const Py_UNICODE *s, | 1343 Py_ssize_t size, |
| 1605 » » Py_ssize_t size, | 1344 const char *encoding, |
| 1606 » » const char *encoding, | 1345 const char *errors) |
| 1607 » » const char *errors) | |
| 1608 { | 1346 { |
| 1609 PyObject *v, *unicode; | 1347 PyObject *v, *unicode; |
| 1610 | 1348 |
| 1611 unicode = PyUnicode_FromUnicode(s, size); | 1349 unicode = PyUnicode_FromUnicode(s, size); |
| 1612 if (unicode == NULL) | 1350 if (unicode == NULL) |
| 1613 return NULL; | 1351 return NULL; |
| 1614 v = PyUnicode_AsEncodedString(unicode, encoding, errors); | 1352 v = PyUnicode_AsEncodedString(unicode, encoding, errors); |
| 1615 Py_DECREF(unicode); | 1353 Py_DECREF(unicode); |
| 1616 return v; | 1354 return v; |
| 1617 } | 1355 } |
| 1618 | 1356 |
| 1619 PyObject * | 1357 PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, |
| 1620 PyUnicode_AsEncodedObject(PyObject *unicode, | 1358 const char *encoding, |
| 1621 » » » const char *encoding, | 1359 const char *errors) |
| 1622 » » » const char *errors) | |
| 1623 { | 1360 { |
| 1624 PyObject *v; | 1361 PyObject *v; |
| 1625 | 1362 |
| 1626 if (!PyUnicode_Check(unicode)) { | 1363 if (!PyUnicode_Check(unicode)) { |
| 1627 PyErr_BadArgument(); | 1364 PyErr_BadArgument(); |
| 1628 goto onError; | 1365 goto onError; |
| 1629 } | 1366 } |
| 1630 | 1367 |
| 1631 if (encoding == NULL) | 1368 if (encoding == NULL) |
| 1632 encoding = PyUnicode_GetDefaultEncoding(); | 1369 encoding = PyUnicode_GetDefaultEncoding(); |
| 1633 | 1370 |
| 1634 /* Encode via the codec registry */ | 1371 /* Encode via the codec registry */ |
| 1635 v = PyCodec_Encode(unicode, encoding, errors); | 1372 v = PyCodec_Encode(unicode, encoding, errors); |
| 1636 if (v == NULL) | 1373 if (v == NULL) |
| 1637 goto onError; | 1374 goto onError; |
| 1638 return v; | 1375 return v; |
| 1639 | 1376 |
| 1640 onError: | 1377 onError: |
| 1641 return NULL; | 1378 return NULL; |
| 1642 } | 1379 } |
| 1643 | 1380 |
| 1644 PyObject * | 1381 PyObject *PyUnicode_AsEncodedString(PyObject *unicode, |
| 1645 PyUnicode_EncodeFSDefault(PyObject *unicode) | 1382 const char *encoding, |
| 1646 { | 1383 const char *errors) |
| 1647 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | |
| 1648 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), | |
| 1649 PyUnicode_GET_SIZE(unicode), | |
| 1650 NULL); | |
| 1651 #elif defined(__APPLE__) | |
| 1652 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), | |
| 1653 PyUnicode_GET_SIZE(unicode), | |
| 1654 "surrogateescape"); | |
| 1655 #else | |
| 1656 if (Py_FileSystemDefaultEncoding) { | |
| 1657 return PyUnicode_AsEncodedString(unicode, | |
| 1658 Py_FileSystemDefaultEncoding, | |
| 1659 "surrogateescape"); | |
| 1660 } | |
| 1661 else { | |
| 1662 /* locale encoding with surrogateescape */ | |
| 1663 wchar_t *wchar; | |
| 1664 char *bytes; | |
| 1665 PyObject *bytes_obj; | |
| 1666 size_t error_pos; | |
| 1667 | |
| 1668 wchar = PyUnicode_AsWideCharString(unicode, NULL); | |
| 1669 if (wchar == NULL) | |
| 1670 return NULL; | |
| 1671 bytes = _Py_wchar2char(wchar, &error_pos); | |
| 1672 if (bytes == NULL) { | |
| 1673 if (error_pos != (size_t)-1) { | |
| 1674 char *errmsg = strerror(errno); | |
| 1675 PyObject *exc = NULL; | |
| 1676 if (errmsg == NULL) | |
| 1677 errmsg = "Py_wchar2char() failed"; | |
| 1678 raise_encode_exception(&exc, | |
| 1679 "filesystemencoding", | |
| 1680 PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), | |
| 1681 error_pos, error_pos+1, | |
| 1682 errmsg); | |
| 1683 Py_XDECREF(exc); | |
| 1684 } | |
| 1685 else | |
| 1686 PyErr_NoMemory(); | |
| 1687 PyMem_Free(wchar); | |
| 1688 return NULL; | |
| 1689 } | |
| 1690 PyMem_Free(wchar); | |
| 1691 | |
| 1692 bytes_obj = PyBytes_FromString(bytes); | |
| 1693 PyMem_Free(bytes); | |
| 1694 return bytes_obj; | |
| 1695 } | |
| 1696 #endif | |
| 1697 } | |
| 1698 | |
| 1699 PyObject * | |
| 1700 PyUnicode_AsEncodedString(PyObject *unicode, | |
| 1701 » » » const char *encoding, | |
| 1702 » » » const char *errors) | |
| 1703 { | 1384 { |
| 1704 PyObject *v; | 1385 PyObject *v; |
| 1705 char lower[11]; /* Enough for any encoding shortcut */ | |
| 1706 | 1386 |
| 1707 if (!PyUnicode_Check(unicode)) { | 1387 if (!PyUnicode_Check(unicode)) { |
| 1708 PyErr_BadArgument(); | 1388 PyErr_BadArgument(); |
| 1709 return NULL; | 1389 return NULL; |
| 1710 } | 1390 } |
| 1711 | 1391 |
| 1712 if (encoding == NULL) { | 1392 if (encoding == NULL) |
| 1713 if (errors == NULL || strcmp(errors, "strict") == 0) | 1393 encoding = PyUnicode_GetDefaultEncoding(); |
| 1394 |
| 1395 /* Shortcuts for common default encodings */ |
| 1396 if (errors == NULL) { |
| 1397 if (strcmp(encoding, "utf-8") == 0) |
| 1714 return PyUnicode_AsUTF8String(unicode); | 1398 return PyUnicode_AsUTF8String(unicode); |
| 1715 else | 1399 else if (strcmp(encoding, "latin-1") == 0) |
| 1716 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), | 1400 return PyUnicode_AsLatin1String(unicode); |
| 1717 PyUnicode_GET_SIZE(unicode), | |
| 1718 errors); | |
| 1719 } | |
| 1720 | |
| 1721 /* Shortcuts for common default encodings */ | |
| 1722 if (normalize_encoding(encoding, lower, sizeof(lower))) { | |
| 1723 if ((strcmp(lower, "utf-8") == 0) || | |
| 1724 (strcmp(lower, "utf8") == 0)) | |
| 1725 { | |
| 1726 if (errors == NULL || strcmp(errors, "strict") == 0) | |
| 1727 return PyUnicode_AsUTF8String(unicode); | |
| 1728 else | |
| 1729 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), | |
| 1730 PyUnicode_GET_SIZE(unicode), | |
| 1731 errors); | |
| 1732 } | |
| 1733 else if ((strcmp(lower, "latin-1") == 0) || | |
| 1734 (strcmp(lower, "latin1") == 0) || | |
| 1735 (strcmp(lower, "iso-8859-1") == 0)) | |
| 1736 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), | |
| 1737 PyUnicode_GET_SIZE(unicode), | |
| 1738 errors); | |
| 1739 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | 1401 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) |
| 1740 else if (strcmp(lower, "mbcs") == 0) | 1402 else if (strcmp(encoding, "mbcs") == 0) |
| 1741 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), | 1403 return PyUnicode_AsMBCSString(unicode); |
| 1742 PyUnicode_GET_SIZE(unicode), | |
| 1743 errors); | |
| 1744 #endif | 1404 #endif |
| 1745 else if (strcmp(lower, "ascii") == 0) | 1405 else if (strcmp(encoding, "ascii") == 0) |
| 1746 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), | 1406 return PyUnicode_AsASCIIString(unicode); |
| 1747 PyUnicode_GET_SIZE(unicode), | 1407 /* During bootstrap, we may need to find the encodings |
| 1748 errors); | 1408 package, to load the file system encoding, and require the |
| 1409 file system encoding in order to load the encodings |
| 1410 package. |
| 1411 |
| 1412 Break out of this dependency by assuming that the path to |
| 1413 the encodings module is ASCII-only. XXX could try wcstombs |
| 1414 instead, if the file system encoding is the locale's |
| 1415 encoding. */ |
| 1416 else if (Py_FileSystemDefaultEncoding && |
| 1417 strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 && |
| 1418 !PyThreadState_GET()->interp->codecs_initialized) |
| 1419 return PyUnicode_AsASCIIString(unicode); |
| 1749 } | 1420 } |
| 1750 | 1421 |
| 1751 /* Encode via the codec registry */ | 1422 /* Encode via the codec registry */ |
| 1752 v = PyCodec_Encode(unicode, encoding, errors); | 1423 v = PyCodec_Encode(unicode, encoding, errors); |
| 1753 if (v == NULL) | 1424 if (v == NULL) |
| 1754 return NULL; | 1425 return NULL; |
| 1755 | 1426 |
| 1756 /* The normal path */ | 1427 /* The normal path */ |
| 1757 if (PyBytes_Check(v)) | 1428 if (PyBytes_Check(v)) |
| 1758 return v; | 1429 return v; |
| 1759 | 1430 |
| 1760 /* If the codec returns a buffer, raise a warning and convert to bytes */ | 1431 /* If the codec returns a buffer, raise a warning and convert to bytes */ |
| 1761 if (PyByteArray_Check(v)) { | 1432 if (PyByteArray_Check(v)) { |
| 1762 int error; | 1433 char msg[100]; |
| 1763 PyObject *b; | 1434 PyObject *b; |
| 1764 | 1435 PyOS_snprintf(msg, sizeof(msg), |
| 1765 error = PyErr_WarnFormat(PyExc_RuntimeWarning, 1, | 1436 "encoder %s returned buffer instead of bytes", |
| 1766 "encoder %s returned bytearray instead of bytes", | 1437 encoding); |
| 1767 encoding); | 1438 if (PyErr_WarnEx(PyExc_RuntimeWarning, msg, 1) < 0) { |
| 1768 if (error) { | |
| 1769 Py_DECREF(v); | 1439 Py_DECREF(v); |
| 1770 return NULL; | 1440 return NULL; |
| 1771 } | 1441 } |
| 1772 | 1442 |
| 1773 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); | 1443 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); |
| 1774 Py_DECREF(v); | 1444 Py_DECREF(v); |
| 1775 return b; | 1445 return b; |
| 1776 } | 1446 } |
| 1777 | 1447 |
| 1778 PyErr_Format(PyExc_TypeError, | 1448 PyErr_Format(PyExc_TypeError, |
| 1779 "encoder did not return a bytes object (type=%.400s)", | 1449 "encoder did not return a bytes object (type=%.400s)", |
| 1780 Py_TYPE(v)->tp_name); | 1450 Py_TYPE(v)->tp_name); |
| 1781 Py_DECREF(v); | 1451 Py_DECREF(v); |
| 1782 return NULL; | 1452 return NULL; |
| 1783 } | 1453 } |
| 1784 | 1454 |
| 1785 PyObject * | 1455 PyObject *PyUnicode_AsEncodedUnicode(PyObject *unicode, |
| 1786 PyUnicode_AsEncodedUnicode(PyObject *unicode, | 1456 const char *encoding, |
| 1787 » » » const char *encoding, | 1457 const char *errors) |
| 1788 » » » const char *errors) | |
| 1789 { | 1458 { |
| 1790 PyObject *v; | 1459 PyObject *v; |
| 1791 | 1460 |
| 1792 if (!PyUnicode_Check(unicode)) { | 1461 if (!PyUnicode_Check(unicode)) { |
| 1793 PyErr_BadArgument(); | 1462 PyErr_BadArgument(); |
| 1794 goto onError; | 1463 goto onError; |
| 1795 } | 1464 } |
| 1796 | 1465 |
| 1797 if (encoding == NULL) | 1466 if (encoding == NULL) |
| 1798 encoding = PyUnicode_GetDefaultEncoding(); | 1467 encoding = PyUnicode_GetDefaultEncoding(); |
| 1799 | 1468 |
| 1800 /* Encode via the codec registry */ | 1469 /* Encode via the codec registry */ |
| 1801 v = PyCodec_Encode(unicode, encoding, errors); | 1470 v = PyCodec_Encode(unicode, encoding, errors); |
| 1802 if (v == NULL) | 1471 if (v == NULL) |
| 1803 goto onError; | 1472 goto onError; |
| 1804 if (!PyUnicode_Check(v)) { | 1473 if (!PyUnicode_Check(v)) { |
| 1805 PyErr_Format(PyExc_TypeError, | 1474 PyErr_Format(PyExc_TypeError, |
| 1806 "encoder did not return an str object (type=%.400s)", | 1475 "encoder did not return an str object (type=%.400s)", |
| 1807 Py_TYPE(v)->tp_name); | 1476 Py_TYPE(v)->tp_name); |
| 1808 Py_DECREF(v); | 1477 Py_DECREF(v); |
| 1809 goto onError; | 1478 goto onError; |
| 1810 } | 1479 } |
| 1811 return v; | 1480 return v; |
| 1812 | 1481 |
| 1813 onError: | 1482 onError: |
| 1814 return NULL; | 1483 return NULL; |
| 1815 } | 1484 } |
| 1816 | 1485 |
| 1817 PyObject * | 1486 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode, |
| 1818 _PyUnicode_AsDefaultEncodedString(PyObject *unicode) | 1487 const char *errors) |
| 1819 { | 1488 { |
| 1820 PyObject *v = ((PyUnicodeObject *)unicode)->defenc; | 1489 PyObject *v = ((PyUnicodeObject *)unicode)->defenc; |
| 1821 if (v) | 1490 if (v) |
| 1822 return v; | 1491 return v; |
| 1492 if (errors != NULL) |
| 1493 Py_FatalError("non-NULL encoding in _PyUnicode_AsDefaultEncodedString"); |
| 1823 v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), | 1494 v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), |
| 1824 PyUnicode_GET_SIZE(unicode), | 1495 PyUnicode_GET_SIZE(unicode), |
| 1825 NULL); | 1496 NULL); |
| 1826 if (!v) | 1497 if (!v) |
| 1827 return NULL; | 1498 return NULL; |
| 1828 ((PyUnicodeObject *)unicode)->defenc = v; | 1499 ((PyUnicodeObject *)unicode)->defenc = v; |
| 1829 return v; | 1500 return v; |
| 1830 } | 1501 } |
| 1831 | 1502 |
| 1832 PyObject* | 1503 PyObject* |
| 1833 PyUnicode_DecodeFSDefault(const char *s) { | 1504 PyUnicode_DecodeFSDefault(const char *s) { |
| 1834 Py_ssize_t size = (Py_ssize_t)strlen(s); | 1505 Py_ssize_t size = (Py_ssize_t)strlen(s); |
| 1835 return PyUnicode_DecodeFSDefaultAndSize(s, size); | 1506 return PyUnicode_DecodeFSDefaultAndSize(s, size); |
| 1836 } | 1507 } |
| 1837 | 1508 |
| 1838 PyObject* | 1509 PyObject* |
| 1839 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) | 1510 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) |
| 1840 { | 1511 { |
| 1841 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | |
| 1842 return PyUnicode_DecodeMBCS(s, size, NULL); | |
| 1843 #elif defined(__APPLE__) | |
| 1844 return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); | |
| 1845 #else | |
| 1846 /* During the early bootstrapping process, Py_FileSystemDefaultEncoding | 1512 /* During the early bootstrapping process, Py_FileSystemDefaultEncoding |
| 1847 can be undefined. If it is case, decode using UTF-8. The following assume
s | 1513 can be undefined. If it is case, decode using UTF-8. The following assume
s |
| 1848 that Py_FileSystemDefaultEncoding is set to a built-in encoding during th
e | 1514 that Py_FileSystemDefaultEncoding is set to a built-in encoding during th
e |
| 1849 bootstrapping process where the codecs aren't ready yet. | 1515 bootstrapping process where the codecs aren't ready yet. |
| 1850 */ | 1516 */ |
| 1851 if (Py_FileSystemDefaultEncoding) { | 1517 if (Py_FileSystemDefaultEncoding) { |
| 1518 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) |
| 1519 if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { |
| 1520 return PyUnicode_DecodeMBCS(s, size, "replace"); |
| 1521 } |
| 1522 #elif defined(__APPLE__) |
| 1523 if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { |
| 1524 return PyUnicode_DecodeUTF8(s, size, "replace"); |
| 1525 } |
| 1526 #endif |
| 1852 return PyUnicode_Decode(s, size, | 1527 return PyUnicode_Decode(s, size, |
| 1853 Py_FileSystemDefaultEncoding, | 1528 Py_FileSystemDefaultEncoding, |
| 1854 "surrogateescape"); | 1529 "replace"); |
| 1855 } | 1530 } |
| 1856 else { | 1531 else { |
| 1857 /* locale encoding with surrogateescape */ | 1532 return PyUnicode_DecodeUTF8(s, size, "replace"); |
| 1858 wchar_t *wchar; | 1533 } |
| 1859 PyObject *unicode; | 1534 } |
| 1860 size_t len; | 1535 |
| 1861 | 1536 /* Convert the argument to a bytes object, according to the file |
| 1862 if (s[size] != '\0' || size != strlen(s)) { | 1537 system encoding */ |
| 1863 PyErr_SetString(PyExc_TypeError, "embedded NUL character"); | |
| 1864 return NULL; | |
| 1865 } | |
| 1866 | |
| 1867 wchar = _Py_char2wchar(s, &len); | |
| 1868 if (wchar == NULL) | |
| 1869 return PyErr_NoMemory(); | |
| 1870 | |
| 1871 unicode = PyUnicode_FromWideChar(wchar, len); | |
| 1872 PyMem_Free(wchar); | |
| 1873 return unicode; | |
| 1874 } | |
| 1875 #endif | |
| 1876 } | |
| 1877 | |
| 1878 | 1538 |
| 1879 int | 1539 int |
| 1880 PyUnicode_FSConverter(PyObject* arg, void* addr) | 1540 PyUnicode_FSConverter(PyObject* arg, void* addr) |
| 1881 { | 1541 { |
| 1882 PyObject *output = NULL; | 1542 PyObject *output = NULL; |
| 1883 Py_ssize_t size; | 1543 Py_ssize_t size; |
| 1884 void *data; | 1544 void *data; |
| 1885 if (arg == NULL) { | 1545 if (arg == NULL) { |
| 1886 Py_DECREF(*(PyObject**)addr); | 1546 Py_DECREF(*(PyObject**)addr); |
| 1887 return 1; | 1547 return 1; |
| 1888 } | 1548 } |
| 1889 if (PyBytes_Check(arg)) { | 1549 if (PyBytes_Check(arg) || PyByteArray_Check(arg)) { |
| 1890 output = arg; | 1550 output = arg; |
| 1891 Py_INCREF(output); | 1551 Py_INCREF(output); |
| 1892 } | 1552 } |
| 1893 else { | 1553 else { |
| 1894 arg = PyUnicode_FromObject(arg); | 1554 arg = PyUnicode_FromObject(arg); |
| 1895 if (!arg) | 1555 if (!arg) |
| 1896 return 0; | 1556 return 0; |
| 1897 output = PyUnicode_EncodeFSDefault(arg); | 1557 output = PyUnicode_AsEncodedObject(arg, |
| 1558 Py_FileSystemDefaultEncoding, |
| 1559 "surrogateescape"); |
| 1898 Py_DECREF(arg); | 1560 Py_DECREF(arg); |
| 1899 if (!output) | 1561 if (!output) |
| 1900 return 0; | 1562 return 0; |
| 1901 if (!PyBytes_Check(output)) { | 1563 if (!PyBytes_Check(output)) { |
| 1902 Py_DECREF(output); | 1564 Py_DECREF(output); |
| 1903 PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes"); | 1565 PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes"); |
| 1904 return 0; | 1566 return 0; |
| 1905 } | 1567 } |
| 1906 } | 1568 } |
| 1907 size = PyBytes_GET_SIZE(output); | 1569 if (PyBytes_Check(output)) { |
| 1908 data = PyBytes_AS_STRING(output); | 1570 size = PyBytes_GET_SIZE(output); |
| 1571 data = PyBytes_AS_STRING(output); |
| 1572 } |
| 1573 else { |
| 1574 size = PyByteArray_GET_SIZE(output); |
| 1575 data = PyByteArray_AS_STRING(output); |
| 1576 } |
| 1909 if (size != strlen(data)) { | 1577 if (size != strlen(data)) { |
| 1910 PyErr_SetString(PyExc_TypeError, "embedded NUL character"); | 1578 PyErr_SetString(PyExc_TypeError, "embedded NUL character"); |
| 1911 Py_DECREF(output); | 1579 Py_DECREF(output); |
| 1912 return 0; | 1580 return 0; |
| 1913 } | 1581 } |
| 1914 *(PyObject**)addr = output; | 1582 *(PyObject**)addr = output; |
| 1915 return Py_CLEANUP_SUPPORTED; | 1583 return Py_CLEANUP_SUPPORTED; |
| 1916 } | 1584 } |
| 1917 | 1585 |
| 1918 | 1586 |
| 1919 int | |
| 1920 PyUnicode_FSDecoder(PyObject* arg, void* addr) | |
| 1921 { | |
| 1922 PyObject *output = NULL; | |
| 1923 Py_ssize_t size; | |
| 1924 void *data; | |
| 1925 if (arg == NULL) { | |
| 1926 Py_DECREF(*(PyObject**)addr); | |
| 1927 return 1; | |
| 1928 } | |
| 1929 if (PyUnicode_Check(arg)) { | |
| 1930 output = arg; | |
| 1931 Py_INCREF(output); | |
| 1932 } | |
| 1933 else { | |
| 1934 arg = PyBytes_FromObject(arg); | |
| 1935 if (!arg) | |
| 1936 return 0; | |
| 1937 output = PyUnicode_DecodeFSDefaultAndSize(PyBytes_AS_STRING(arg), | |
| 1938 PyBytes_GET_SIZE(arg)); | |
| 1939 Py_DECREF(arg); | |
| 1940 if (!output) | |
| 1941 return 0; | |
| 1942 if (!PyUnicode_Check(output)) { | |
| 1943 Py_DECREF(output); | |
| 1944 PyErr_SetString(PyExc_TypeError, "decoder failed to return unicode")
; | |
| 1945 return 0; | |
| 1946 } | |
| 1947 } | |
| 1948 size = PyUnicode_GET_SIZE(output); | |
| 1949 data = PyUnicode_AS_UNICODE(output); | |
| 1950 if (size != Py_UNICODE_strlen(data)) { | |
| 1951 PyErr_SetString(PyExc_TypeError, "embedded NUL character"); | |
| 1952 Py_DECREF(output); | |
| 1953 return 0; | |
| 1954 } | |
| 1955 *(PyObject**)addr = output; | |
| 1956 return Py_CLEANUP_SUPPORTED; | |
| 1957 } | |
| 1958 | |
| 1959 | |
| 1960 char* | 1587 char* |
| 1961 _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) | 1588 _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) |
| 1962 { | 1589 { |
| 1963 PyObject *bytes; | 1590 PyObject *bytes; |
| 1964 if (!PyUnicode_Check(unicode)) { | 1591 if (!PyUnicode_Check(unicode)) { |
| 1965 PyErr_BadArgument(); | 1592 PyErr_BadArgument(); |
| 1966 return NULL; | 1593 return NULL; |
| 1967 } | 1594 } |
| 1968 bytes = _PyUnicode_AsDefaultEncodedString(unicode); | 1595 bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL); |
| 1969 if (bytes == NULL) | 1596 if (bytes == NULL) |
| 1970 return NULL; | 1597 return NULL; |
| 1971 if (psize != NULL) | 1598 if (psize != NULL) |
| 1972 *psize = PyBytes_GET_SIZE(bytes); | 1599 *psize = PyBytes_GET_SIZE(bytes); |
| 1973 return PyBytes_AS_STRING(bytes); | 1600 return PyBytes_AS_STRING(bytes); |
| 1974 } | 1601 } |
| 1975 | 1602 |
| 1976 char* | 1603 char* |
| 1977 _PyUnicode_AsString(PyObject *unicode) | 1604 _PyUnicode_AsString(PyObject *unicode) |
| 1978 { | 1605 { |
| 1979 return _PyUnicode_AsStringAndSize(unicode, NULL); | 1606 return _PyUnicode_AsStringAndSize(unicode, NULL); |
| 1980 } | 1607 } |
| 1981 | 1608 |
| 1982 Py_UNICODE * | 1609 Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode) |
| 1983 PyUnicode_AsUnicode(PyObject *unicode) | |
| 1984 { | 1610 { |
| 1985 if (!PyUnicode_Check(unicode)) { | 1611 if (!PyUnicode_Check(unicode)) { |
| 1986 PyErr_BadArgument(); | 1612 PyErr_BadArgument(); |
| 1987 goto onError; | 1613 goto onError; |
| 1988 } | 1614 } |
| 1989 return PyUnicode_AS_UNICODE(unicode); | 1615 return PyUnicode_AS_UNICODE(unicode); |
| 1990 | 1616 |
| 1991 onError: | 1617 onError: |
| 1992 return NULL; | 1618 return NULL; |
| 1993 } | 1619 } |
| 1994 | 1620 |
| 1995 Py_ssize_t | 1621 Py_ssize_t PyUnicode_GetSize(PyObject *unicode) |
| 1996 PyUnicode_GetSize(PyObject *unicode) | |
| 1997 { | 1622 { |
| 1998 if (!PyUnicode_Check(unicode)) { | 1623 if (!PyUnicode_Check(unicode)) { |
| 1999 PyErr_BadArgument(); | 1624 PyErr_BadArgument(); |
| 2000 goto onError; | 1625 goto onError; |
| 2001 } | 1626 } |
| 2002 return PyUnicode_GET_SIZE(unicode); | 1627 return PyUnicode_GET_SIZE(unicode); |
| 2003 | 1628 |
| 2004 onError: | 1629 onError: |
| 2005 return -1; | 1630 return -1; |
| 2006 } | 1631 } |
| 2007 | 1632 |
| 2008 const char * | 1633 const char *PyUnicode_GetDefaultEncoding(void) |
| 2009 PyUnicode_GetDefaultEncoding(void) | 1634 { |
| 2010 { | 1635 return unicode_default_encoding; |
| 2011 return "utf-8"; | 1636 } |
| 2012 } | 1637 |
| 2013 | 1638 int PyUnicode_SetDefaultEncoding(const char *encoding) |
| 2014 /* create or adjust a UnicodeDecodeError */ | 1639 { |
| 2015 static void | 1640 if (strcmp(encoding, unicode_default_encoding) != 0) { |
| 2016 make_decode_exception(PyObject **exceptionObject, | 1641 PyErr_Format(PyExc_ValueError, |
| 2017 const char *encoding, | 1642 "Can only set default encoding to %s", |
| 2018 const char *input, Py_ssize_t length, | 1643 unicode_default_encoding); |
| 2019 Py_ssize_t startpos, Py_ssize_t endpos, | 1644 return -1; |
| 2020 const char *reason) | 1645 } |
| 2021 { | 1646 return 0; |
| 2022 if (*exceptionObject == NULL) { | |
| 2023 *exceptionObject = PyUnicodeDecodeError_Create( | |
| 2024 encoding, input, length, startpos, endpos, reason); | |
| 2025 } | |
| 2026 else { | |
| 2027 if (PyUnicodeDecodeError_SetStart(*exceptionObject, startpos)) | |
| 2028 goto onError; | |
| 2029 if (PyUnicodeDecodeError_SetEnd(*exceptionObject, endpos)) | |
| 2030 goto onError; | |
| 2031 if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) | |
| 2032 goto onError; | |
| 2033 } | |
| 2034 return; | |
| 2035 | |
| 2036 onError: | |
| 2037 Py_DECREF(*exceptionObject); | |
| 2038 *exceptionObject = NULL; | |
| 2039 } | 1647 } |
| 2040 | 1648 |
| 2041 /* error handling callback helper: | 1649 /* error handling callback helper: |
| 2042 build arguments, call the callback and check the arguments, | 1650 build arguments, call the callback and check the arguments, |
| 2043 if no exception occurred, copy the replacement to the output | 1651 if no exception occurred, copy the replacement to the output |
| 2044 and adjust various state variables. | 1652 and adjust various state variables. |
| 2045 return 0 on success, -1 on error | 1653 return 0 on success, -1 on error |
| 2046 */ | 1654 */ |
| 2047 | 1655 |
| 2048 static int | 1656 static |
| 2049 unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, | 1657 int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
, |
| 2050 » » » » const char *encoding, const char *reason, | 1658 const char *encoding, const char *reason, |
| 2051 » » » » const char **input, const char **inend, Py_ssiz
e_t *startinpos, | 1659 const char **input, const char **inend, Py_
ssize_t *startinpos, |
| 2052 » » » » Py_ssize_t *endinpos, PyObject **exceptionObjec
t, const char **inptr, | 1660 Py_ssize_t *endinpos, PyObject **exceptionO
bject, const char **inptr, |
| 2053 » » » » PyUnicodeObject **output, Py_ssize_t *outpos, P
y_UNICODE **outptr) | 1661 PyUnicodeObject **output, Py_ssize_t *outpo
s, Py_UNICODE **outptr) |
| 2054 { | 1662 { |
| 2055 static char *argparse = "O!n;decoding error handler must return (str, int) t
uple"; | 1663 static char *argparse = "O!n;decoding error handler must return (str, int) t
uple"; |
| 2056 | 1664 |
| 2057 PyObject *restuple = NULL; | 1665 PyObject *restuple = NULL; |
| 2058 PyObject *repunicode = NULL; | 1666 PyObject *repunicode = NULL; |
| 2059 Py_ssize_t outsize = PyUnicode_GET_SIZE(*output); | 1667 Py_ssize_t outsize = PyUnicode_GET_SIZE(*output); |
| 2060 Py_ssize_t insize; | 1668 Py_ssize_t insize; |
| 2061 Py_ssize_t requiredsize; | 1669 Py_ssize_t requiredsize; |
| 2062 Py_ssize_t newpos; | 1670 Py_ssize_t newpos; |
| 2063 Py_UNICODE *repptr; | 1671 Py_UNICODE *repptr; |
| 2064 PyObject *inputobj = NULL; | 1672 PyObject *inputobj = NULL; |
| 2065 Py_ssize_t repsize; | 1673 Py_ssize_t repsize; |
| 2066 int res = -1; | 1674 int res = -1; |
| 2067 | 1675 |
| 2068 if (*errorHandler == NULL) { | 1676 if (*errorHandler == NULL) { |
| 2069 *errorHandler = PyCodec_LookupError(errors); | 1677 *errorHandler = PyCodec_LookupError(errors); |
| 2070 if (*errorHandler == NULL) | 1678 if (*errorHandler == NULL) |
| 2071 goto onError; | 1679 goto onError; |
| 2072 } | 1680 } |
| 2073 | 1681 |
| 2074 make_decode_exception(exceptionObject, | 1682 if (*exceptionObject == NULL) { |
| 2075 encoding, | 1683 *exceptionObject = PyUnicodeDecodeError_Create( |
| 2076 *input, *inend - *input, | 1684 encoding, *input, *inend-*input, *startinpos, *endinpos, reason); |
| 2077 *startinpos, *endinpos, | 1685 if (*exceptionObject == NULL) |
| 2078 reason); | 1686 goto onError; |
| 2079 if (*exceptionObject == NULL) | 1687 } |
| 2080 goto onError; | 1688 else { |
| 1689 if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos)) |
| 1690 goto onError; |
| 1691 if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos)) |
| 1692 goto onError; |
| 1693 if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason)) |
| 1694 goto onError; |
| 1695 } |
| 2081 | 1696 |
| 2082 restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NUL
L); | 1697 restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NUL
L); |
| 2083 if (restuple == NULL) | 1698 if (restuple == NULL) |
| 2084 goto onError; | 1699 goto onError; |
| 2085 if (!PyTuple_Check(restuple)) { | 1700 if (!PyTuple_Check(restuple)) { |
| 2086 PyErr_SetString(PyExc_TypeError, &argparse[4]); | 1701 PyErr_SetString(PyExc_TypeError, &argparse[4]); |
| 2087 goto onError; | 1702 goto onError; |
| 2088 } | 1703 } |
| 2089 if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &new
pos)) | 1704 if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &new
pos)) |
| 2090 goto onError; | 1705 goto onError; |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2213 * on whether we are encoding whitespace as itself. RFC2152 makes it | 1828 * on whether we are encoding whitespace as itself. RFC2152 makes it |
| 2214 * clear that the answers to these questions vary between | 1829 * clear that the answers to these questions vary between |
| 2215 * applications, so this code needs to be flexible. */ | 1830 * applications, so this code needs to be flexible. */ |
| 2216 | 1831 |
| 2217 #define ENCODE_DIRECT(c, directO, directWS) \ | 1832 #define ENCODE_DIRECT(c, directO, directWS) \ |
| 2218 ((c) < 128 && (c) > 0 && \ | 1833 ((c) < 128 && (c) > 0 && \ |
| 2219 ((utf7_category[(c)] == 0) || \ | 1834 ((utf7_category[(c)] == 0) || \ |
| 2220 (directWS && (utf7_category[(c)] == 2)) || \ | 1835 (directWS && (utf7_category[(c)] == 2)) || \ |
| 2221 (directO && (utf7_category[(c)] == 1)))) | 1836 (directO && (utf7_category[(c)] == 1)))) |
| 2222 | 1837 |
| 2223 PyObject * | 1838 PyObject *PyUnicode_DecodeUTF7(const char *s, |
| 2224 PyUnicode_DecodeUTF7(const char *s, | 1839 Py_ssize_t size, |
| 2225 » » Py_ssize_t size, | 1840 const char *errors) |
| 2226 » » const char *errors) | |
| 2227 { | 1841 { |
| 2228 return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); | 1842 return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL); |
| 2229 } | 1843 } |
| 2230 | 1844 |
| 2231 /* The decoder. The only state we preserve is our read position, | 1845 /* The decoder. The only state we preserve is our read position, |
| 2232 * i.e. how many characters we have consumed. So if we end in the | 1846 * i.e. how many characters we have consumed. So if we end in the |
| 2233 * middle of a shift sequence we have to back off the read position | 1847 * middle of a shift sequence we have to back off the read position |
| 2234 * and the output to the beginning of the sequence, otherwise we lose | 1848 * and the output to the beginning of the sequence, otherwise we lose |
| 2235 * all the shift state (seen bits, number of bits seen, high | 1849 * all the shift state (seen bits, number of bits seen, high |
| 2236 * surrogate). */ | 1850 * surrogate). */ |
| 2237 | 1851 |
| 2238 PyObject * | 1852 PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, |
| 2239 PyUnicode_DecodeUTF7Stateful(const char *s, | 1853 Py_ssize_t size, |
| 2240 » » » Py_ssize_t size, | 1854 const char *errors, |
| 2241 » » » const char *errors, | 1855 Py_ssize_t *consumed) |
| 2242 » » » Py_ssize_t *consumed) | |
| 2243 { | 1856 { |
| 2244 const char *starts = s; | 1857 const char *starts = s; |
| 2245 Py_ssize_t startinpos; | 1858 Py_ssize_t startinpos; |
| 2246 Py_ssize_t endinpos; | 1859 Py_ssize_t endinpos; |
| 2247 Py_ssize_t outpos; | 1860 Py_ssize_t outpos; |
| 2248 const char *e; | 1861 const char *e; |
| 2249 PyUnicodeObject *unicode; | 1862 PyUnicodeObject *unicode; |
| 2250 Py_UNICODE *p; | 1863 Py_UNICODE *p; |
| 2251 const char *errmsg = ""; | 1864 const char *errmsg = ""; |
| 2252 int inShift = 0; | 1865 int inShift = 0; |
| (...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2419 return (PyObject *)unicode; | 2032 return (PyObject *)unicode; |
| 2420 | 2033 |
| 2421 onError: | 2034 onError: |
| 2422 Py_XDECREF(errorHandler); | 2035 Py_XDECREF(errorHandler); |
| 2423 Py_XDECREF(exc); | 2036 Py_XDECREF(exc); |
| 2424 Py_DECREF(unicode); | 2037 Py_DECREF(unicode); |
| 2425 return NULL; | 2038 return NULL; |
| 2426 } | 2039 } |
| 2427 | 2040 |
| 2428 | 2041 |
| 2429 PyObject * | 2042 PyObject *PyUnicode_EncodeUTF7(const Py_UNICODE *s, |
| 2430 PyUnicode_EncodeUTF7(const Py_UNICODE *s, | 2043 Py_ssize_t size, |
| 2431 » » Py_ssize_t size, | 2044 int base64SetO, |
| 2432 » » int base64SetO, | 2045 int base64WhiteSpace, |
| 2433 » » int base64WhiteSpace, | 2046 const char *errors) |
| 2434 » » const char *errors) | |
| 2435 { | 2047 { |
| 2436 PyObject *v; | 2048 PyObject *v; |
| 2437 /* It might be possible to tighten this worst case */ | 2049 /* It might be possible to tighten this worst case */ |
| 2438 Py_ssize_t allocated = 8 * size; | 2050 Py_ssize_t allocated = 8 * size; |
| 2439 int inShift = 0; | 2051 int inShift = 0; |
| 2440 Py_ssize_t i = 0; | 2052 Py_ssize_t i = 0; |
| 2441 unsigned int base64bits = 0; | 2053 unsigned int base64bits = 0; |
| 2442 unsigned long base64buffer = 0; | 2054 unsigned long base64buffer = 0; |
| 2443 char * out; | 2055 char * out; |
| 2444 char * start; | 2056 char * start; |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2529 #undef ENCODE_DIRECT | 2141 #undef ENCODE_DIRECT |
| 2530 | 2142 |
| 2531 /* --- UTF-8 Codec -------------------------------------------------------- */ | 2143 /* --- UTF-8 Codec -------------------------------------------------------- */ |
| 2532 | 2144 |
| 2533 static | 2145 static |
| 2534 char utf8_code_length[256] = { | 2146 char utf8_code_length[256] = { |
| 2535 /* Map UTF-8 encoded prefix byte to sequence length. Zero means | 2147 /* Map UTF-8 encoded prefix byte to sequence length. Zero means |
| 2536 illegal prefix. See RFC 3629 for details */ | 2148 illegal prefix. See RFC 3629 for details */ |
| 2537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */ | 2149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00-0F */ |
| 2538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2150 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2153 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 2155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */ | 2156 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70-7F */ |
| 2545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ | 2157 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ |
| 2546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 2158 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 2547 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 2159 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 2548 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ | 2160 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ |
| 2549 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */ | 2161 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* C0-C1 + C2-CF */ |
| 2550 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */ | 2162 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* D0-DF */ |
| 2551 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */ | 2163 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* E0-EF */ |
| 2552 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */ | 2164 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-F4 + F5-FF */ |
| 2553 }; | 2165 }; |
| 2554 | 2166 |
| 2555 PyObject * | 2167 PyObject *PyUnicode_DecodeUTF8(const char *s, |
| 2556 PyUnicode_DecodeUTF8(const char *s, | 2168 Py_ssize_t size, |
| 2557 » » Py_ssize_t size, | 2169 const char *errors) |
| 2558 » » const char *errors) | |
| 2559 { | 2170 { |
| 2560 return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); | 2171 return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL); |
| 2561 } | 2172 } |
| 2562 | 2173 |
| 2563 /* Mask to check or force alignment of a pointer to C 'long' boundaries */ | 2174 /* Mask to check or force alignment of a pointer to C 'long' boundaries */ |
| 2564 #define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1) | 2175 #define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1) |
| 2565 | 2176 |
| 2566 /* Mask to quickly check whether a C 'long' contains a | 2177 /* Mask to quickly check whether a C 'long' contains a |
| 2567 non-ASCII, UTF8-encoded char. */ | 2178 non-ASCII, UTF8-encoded char. */ |
| 2568 #if (SIZEOF_LONG == 8) | 2179 #if (SIZEOF_LONG == 8) |
| 2569 # define ASCII_CHAR_MASK 0x8080808080808080L | 2180 # define ASCII_CHAR_MASK 0x8080808080808080L |
| 2570 #elif (SIZEOF_LONG == 4) | 2181 #elif (SIZEOF_LONG == 4) |
| 2571 # define ASCII_CHAR_MASK 0x80808080L | 2182 # define ASCII_CHAR_MASK 0x80808080L |
| 2572 #else | 2183 #else |
| 2573 # error C 'long' size should be either 4 or 8! | 2184 # error C 'long' size should be either 4 or 8! |
| 2574 #endif | 2185 #endif |
| 2575 | 2186 |
| 2576 PyObject * | 2187 PyObject *PyUnicode_DecodeUTF8Stateful(const char *s, |
| 2577 PyUnicode_DecodeUTF8Stateful(const char *s, | 2188 Py_ssize_t size, |
| 2578 » » » Py_ssize_t size, | 2189 const char *errors, |
| 2579 » » » const char *errors, | 2190 Py_ssize_t *consumed) |
| 2580 » » » Py_ssize_t *consumed) | |
| 2581 { | 2191 { |
| 2582 const char *starts = s; | 2192 const char *starts = s; |
| 2583 int n; | 2193 int n; |
| 2584 int k; | 2194 int k; |
| 2585 Py_ssize_t startinpos; | 2195 Py_ssize_t startinpos; |
| 2586 Py_ssize_t endinpos; | 2196 Py_ssize_t endinpos; |
| 2587 Py_ssize_t outpos; | 2197 Py_ssize_t outpos; |
| 2588 const char *e, *aligned_end; | 2198 const char *e, *aligned_end; |
| 2589 PyUnicodeObject *unicode; | 2199 PyUnicodeObject *unicode; |
| 2590 Py_UNICODE *p; | 2200 Py_UNICODE *p; |
| (...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2791 | 2401 |
| 2792 onError: | 2402 onError: |
| 2793 Py_XDECREF(errorHandler); | 2403 Py_XDECREF(errorHandler); |
| 2794 Py_XDECREF(exc); | 2404 Py_XDECREF(exc); |
| 2795 Py_DECREF(unicode); | 2405 Py_DECREF(unicode); |
| 2796 return NULL; | 2406 return NULL; |
| 2797 } | 2407 } |
| 2798 | 2408 |
| 2799 #undef ASCII_CHAR_MASK | 2409 #undef ASCII_CHAR_MASK |
| 2800 | 2410 |
| 2801 #ifdef __APPLE__ | |
| 2802 | |
| 2803 /* Simplified UTF-8 decoder using surrogateescape error handler, | |
| 2804 used to decode the command line arguments on Mac OS X. */ | |
| 2805 | |
| 2806 wchar_t* | |
| 2807 _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size) | |
| 2808 { | |
| 2809 int n; | |
| 2810 const char *e; | |
| 2811 wchar_t *unicode, *p; | |
| 2812 | |
| 2813 /* Note: size will always be longer than the resulting Unicode | |
| 2814 character count */ | |
| 2815 if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) { | |
| 2816 PyErr_NoMemory(); | |
| 2817 return NULL; | |
| 2818 } | |
| 2819 unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t)); | |
| 2820 if (!unicode) | |
| 2821 return NULL; | |
| 2822 | |
| 2823 /* Unpack UTF-8 encoded data */ | |
| 2824 p = unicode; | |
| 2825 e = s + size; | |
| 2826 while (s < e) { | |
| 2827 Py_UCS4 ch = (unsigned char)*s; | |
| 2828 | |
| 2829 if (ch < 0x80) { | |
| 2830 *p++ = (wchar_t)ch; | |
| 2831 s++; | |
| 2832 continue; | |
| 2833 } | |
| 2834 | |
| 2835 n = utf8_code_length[ch]; | |
| 2836 if (s + n > e) { | |
| 2837 goto surrogateescape; | |
| 2838 } | |
| 2839 | |
| 2840 switch (n) { | |
| 2841 case 0: | |
| 2842 case 1: | |
| 2843 goto surrogateescape; | |
| 2844 | |
| 2845 case 2: | |
| 2846 if ((s[1] & 0xc0) != 0x80) | |
| 2847 goto surrogateescape; | |
| 2848 ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f); | |
| 2849 assert ((ch > 0x007F) && (ch <= 0x07FF)); | |
| 2850 *p++ = (wchar_t)ch; | |
| 2851 break; | |
| 2852 | |
| 2853 case 3: | |
| 2854 /* Decoding UTF-8 sequences in range \xed\xa0\x80-\xed\xbf\xbf | |
| 2855 will result in surrogates in range d800-dfff. Surrogates are | |
| 2856 not valid UTF-8 so they are rejected. | |
| 2857 See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf | |
| 2858 (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */ | |
| 2859 if ((s[1] & 0xc0) != 0x80 || | |
| 2860 (s[2] & 0xc0) != 0x80 || | |
| 2861 ((unsigned char)s[0] == 0xE0 && | |
| 2862 (unsigned char)s[1] < 0xA0) || | |
| 2863 ((unsigned char)s[0] == 0xED && | |
| 2864 (unsigned char)s[1] > 0x9F)) { | |
| 2865 | |
| 2866 goto surrogateescape; | |
| 2867 } | |
| 2868 ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); | |
| 2869 assert ((ch > 0x07FF) && (ch <= 0xFFFF)); | |
| 2870 *p++ = (Py_UNICODE)ch; | |
| 2871 break; | |
| 2872 | |
| 2873 case 4: | |
| 2874 if ((s[1] & 0xc0) != 0x80 || | |
| 2875 (s[2] & 0xc0) != 0x80 || | |
| 2876 (s[3] & 0xc0) != 0x80 || | |
| 2877 ((unsigned char)s[0] == 0xF0 && | |
| 2878 (unsigned char)s[1] < 0x90) || | |
| 2879 ((unsigned char)s[0] == 0xF4 && | |
| 2880 (unsigned char)s[1] > 0x8F)) { | |
| 2881 goto surrogateescape; | |
| 2882 } | |
| 2883 ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) + | |
| 2884 ((s[2] & 0x3f) << 6) + (s[3] & 0x3f); | |
| 2885 assert ((ch > 0xFFFF) && (ch <= 0x10ffff)); | |
| 2886 | |
| 2887 #if SIZEOF_WCHAR_T == 4 | |
| 2888 *p++ = (wchar_t)ch; | |
| 2889 #else | |
| 2890 /* compute and append the two surrogates: */ | |
| 2891 | |
| 2892 /* translate from 10000..10FFFF to 0..FFFF */ | |
| 2893 ch -= 0x10000; | |
| 2894 | |
| 2895 /* high surrogate = top 10 bits added to D800 */ | |
| 2896 *p++ = (wchar_t)(0xD800 + (ch >> 10)); | |
| 2897 | |
| 2898 /* low surrogate = bottom 10 bits added to DC00 */ | |
| 2899 *p++ = (wchar_t)(0xDC00 + (ch & 0x03FF)); | |
| 2900 #endif | |
| 2901 break; | |
| 2902 } | |
| 2903 s += n; | |
| 2904 continue; | |
| 2905 | |
| 2906 surrogateescape: | |
| 2907 *p++ = 0xDC00 + ch; | |
| 2908 s++; | |
| 2909 } | |
| 2910 *p = L'\0'; | |
| 2911 return unicode; | |
| 2912 } | |
| 2913 | |
| 2914 #endif /* __APPLE__ */ | |
| 2915 | 2411 |
| 2916 /* Allocation strategy: if the string is short, convert into a stack buffer | 2412 /* Allocation strategy: if the string is short, convert into a stack buffer |
| 2917 and allocate exactly as much space needed at the end. Else allocate the | 2413 and allocate exactly as much space needed at the end. Else allocate the |
| 2918 maximum possible needed (4 result bytes per Unicode character), and return | 2414 maximum possible needed (4 result bytes per Unicode character), and return |
| 2919 the excess memory at the end. | 2415 the excess memory at the end. |
| 2920 */ | 2416 */ |
| 2921 PyObject * | 2417 PyObject * |
| 2922 PyUnicode_EncodeUTF8(const Py_UNICODE *s, | 2418 PyUnicode_EncodeUTF8(const Py_UNICODE *s, |
| 2923 Py_ssize_t size, | 2419 Py_ssize_t size, |
| 2924 const char *errors) | 2420 const char *errors) |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3076 return result; | 2572 return result; |
| 3077 error: | 2573 error: |
| 3078 Py_XDECREF(errorHandler); | 2574 Py_XDECREF(errorHandler); |
| 3079 Py_XDECREF(exc); | 2575 Py_XDECREF(exc); |
| 3080 Py_XDECREF(result); | 2576 Py_XDECREF(result); |
| 3081 return NULL; | 2577 return NULL; |
| 3082 | 2578 |
| 3083 #undef MAX_SHORT_UNICHARS | 2579 #undef MAX_SHORT_UNICHARS |
| 3084 } | 2580 } |
| 3085 | 2581 |
| 3086 PyObject * | 2582 PyObject *PyUnicode_AsUTF8String(PyObject *unicode) |
| 3087 PyUnicode_AsUTF8String(PyObject *unicode) | 2583 { |
| 3088 { | |
| 3089 PyObject *utf8; | |
| 3090 if (!PyUnicode_Check(unicode)) { | 2584 if (!PyUnicode_Check(unicode)) { |
| 3091 PyErr_BadArgument(); | 2585 PyErr_BadArgument(); |
| 3092 return NULL; | 2586 return NULL; |
| 3093 } | 2587 } |
| 3094 utf8 = _PyUnicode_AsDefaultEncodedString(unicode); | 2588 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), |
| 3095 if (utf8 == NULL) | 2589 PyUnicode_GET_SIZE(unicode), |
| 3096 return NULL; | 2590 NULL); |
| 3097 Py_INCREF(utf8); | |
| 3098 return utf8; | |
| 3099 } | 2591 } |
| 3100 | 2592 |
| 3101 /* --- UTF-32 Codec ------------------------------------------------------- */ | 2593 /* --- UTF-32 Codec ------------------------------------------------------- */ |
| 3102 | 2594 |
| 3103 PyObject * | 2595 PyObject * |
| 3104 PyUnicode_DecodeUTF32(const char *s, | 2596 PyUnicode_DecodeUTF32(const char *s, |
| 3105 Py_ssize_t size, | 2597 Py_ssize_t size, |
| 3106 const char *errors, | 2598 const char *errors, |
| 3107 int *byteorder) | 2599 int *byteorder) |
| 3108 { | 2600 { |
| (...skipping 23 matching lines...) Expand all Loading... |
| 3132 int bo = 0; /* assume native ordering by default */ | 2624 int bo = 0; /* assume native ordering by default */ |
| 3133 const char *errmsg = ""; | 2625 const char *errmsg = ""; |
| 3134 /* Offsets from q for retrieving bytes in the right order. */ | 2626 /* Offsets from q for retrieving bytes in the right order. */ |
| 3135 #ifdef BYTEORDER_IS_LITTLE_ENDIAN | 2627 #ifdef BYTEORDER_IS_LITTLE_ENDIAN |
| 3136 int iorder[] = {0, 1, 2, 3}; | 2628 int iorder[] = {0, 1, 2, 3}; |
| 3137 #else | 2629 #else |
| 3138 int iorder[] = {3, 2, 1, 0}; | 2630 int iorder[] = {3, 2, 1, 0}; |
| 3139 #endif | 2631 #endif |
| 3140 PyObject *errorHandler = NULL; | 2632 PyObject *errorHandler = NULL; |
| 3141 PyObject *exc = NULL; | 2633 PyObject *exc = NULL; |
| 3142 | 2634 |
| 3143 q = (unsigned char *)s; | 2635 q = (unsigned char *)s; |
| 3144 e = q + size; | 2636 e = q + size; |
| 3145 | 2637 |
| 3146 if (byteorder) | 2638 if (byteorder) |
| 3147 bo = *byteorder; | 2639 bo = *byteorder; |
| 3148 | 2640 |
| 3149 /* Check for BOM marks (U+FEFF) in the input and adjust current | 2641 /* Check for BOM marks (U+FEFF) in the input and adjust current |
| 3150 byte order setting accordingly. In native mode, the leading BOM | 2642 byte order setting accordingly. In native mode, the leading BOM |
| 3151 mark is skipped, in all other modes, it is copied to the output | 2643 mark is skipped, in all other modes, it is copied to the output |
| 3152 stream as-is (giving a ZWNBSP character). */ | 2644 stream as-is (giving a ZWNBSP character). */ |
| (...skipping 201 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3354 } | 2846 } |
| 3355 #endif | 2847 #endif |
| 3356 STORECHAR(ch); | 2848 STORECHAR(ch); |
| 3357 } | 2849 } |
| 3358 | 2850 |
| 3359 done: | 2851 done: |
| 3360 return v; | 2852 return v; |
| 3361 #undef STORECHAR | 2853 #undef STORECHAR |
| 3362 } | 2854 } |
| 3363 | 2855 |
| 3364 PyObject * | 2856 PyObject *PyUnicode_AsUTF32String(PyObject *unicode) |
| 3365 PyUnicode_AsUTF32String(PyObject *unicode) | |
| 3366 { | 2857 { |
| 3367 if (!PyUnicode_Check(unicode)) { | 2858 if (!PyUnicode_Check(unicode)) { |
| 3368 PyErr_BadArgument(); | 2859 PyErr_BadArgument(); |
| 3369 return NULL; | 2860 return NULL; |
| 3370 } | 2861 } |
| 3371 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode), | 2862 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode), |
| 3372 PyUnicode_GET_SIZE(unicode), | 2863 PyUnicode_GET_SIZE(unicode), |
| 3373 NULL, | 2864 NULL, |
| 3374 0); | 2865 0); |
| 3375 } | 2866 } |
| (...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3745 STORECHAR(ch); | 3236 STORECHAR(ch); |
| 3746 if (ch2) | 3237 if (ch2) |
| 3747 STORECHAR(ch2); | 3238 STORECHAR(ch2); |
| 3748 } | 3239 } |
| 3749 | 3240 |
| 3750 done: | 3241 done: |
| 3751 return v; | 3242 return v; |
| 3752 #undef STORECHAR | 3243 #undef STORECHAR |
| 3753 } | 3244 } |
| 3754 | 3245 |
| 3755 PyObject * | 3246 PyObject *PyUnicode_AsUTF16String(PyObject *unicode) |
| 3756 PyUnicode_AsUTF16String(PyObject *unicode) | |
| 3757 { | 3247 { |
| 3758 if (!PyUnicode_Check(unicode)) { | 3248 if (!PyUnicode_Check(unicode)) { |
| 3759 PyErr_BadArgument(); | 3249 PyErr_BadArgument(); |
| 3760 return NULL; | 3250 return NULL; |
| 3761 } | 3251 } |
| 3762 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode), | 3252 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode), |
| 3763 PyUnicode_GET_SIZE(unicode), | 3253 PyUnicode_GET_SIZE(unicode), |
| 3764 NULL, | 3254 NULL, |
| 3765 0); | 3255 0); |
| 3766 } | 3256 } |
| 3767 | 3257 |
| 3768 /* --- Unicode Escape Codec ----------------------------------------------- */ | 3258 /* --- Unicode Escape Codec ----------------------------------------------- */ |
| 3769 | 3259 |
| 3770 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; | 3260 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; |
| 3771 | 3261 |
| 3772 PyObject * | 3262 PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, |
| 3773 PyUnicode_DecodeUnicodeEscape(const char *s, | 3263 Py_ssize_t size, |
| 3774 » » » Py_ssize_t size, | 3264 const char *errors) |
| 3775 » » » const char *errors) | |
| 3776 { | 3265 { |
| 3777 const char *starts = s; | 3266 const char *starts = s; |
| 3778 Py_ssize_t startinpos; | 3267 Py_ssize_t startinpos; |
| 3779 Py_ssize_t endinpos; | 3268 Py_ssize_t endinpos; |
| 3780 Py_ssize_t outpos; | 3269 Py_ssize_t outpos; |
| 3781 int i; | 3270 int i; |
| 3782 PyUnicodeObject *v; | 3271 PyUnicodeObject *v; |
| 3783 Py_UNICODE *p; | 3272 Py_UNICODE *p; |
| 3784 const char *end; | 3273 const char *end; |
| 3785 char* message; | 3274 char* message; |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3870 if (unicode_decode_call_errorhandler( | 3359 if (unicode_decode_call_errorhandler( |
| 3871 errors, &errorHandler, | 3360 errors, &errorHandler, |
| 3872 "unicodeescape", "end of string in escape sequence", | 3361 "unicodeescape", "end of string in escape sequence", |
| 3873 &starts, &end, &startinpos, &endinpos, &exc, &s, | 3362 &starts, &end, &startinpos, &endinpos, &exc, &s, |
| 3874 &v, &outpos, &p)) | 3363 &v, &outpos, &p)) |
| 3875 goto onError; | 3364 goto onError; |
| 3876 goto nextByte; | 3365 goto nextByte; |
| 3877 } | 3366 } |
| 3878 for (i = 0; i < digits; ++i) { | 3367 for (i = 0; i < digits; ++i) { |
| 3879 c = (unsigned char) s[i]; | 3368 c = (unsigned char) s[i]; |
| 3880 if (!Py_ISXDIGIT(c)) { | 3369 if (!ISXDIGIT(c)) { |
| 3881 endinpos = (s+i+1)-starts; | 3370 endinpos = (s+i+1)-starts; |
| 3882 if (unicode_decode_call_errorhandler( | 3371 if (unicode_decode_call_errorhandler( |
| 3883 errors, &errorHandler, | 3372 errors, &errorHandler, |
| 3884 "unicodeescape", message, | 3373 "unicodeescape", message, |
| 3885 &starts, &end, &startinpos, &endinpos, &exc, &s, | 3374 &starts, &end, &startinpos, &endinpos, &exc, &s, |
| 3886 &v, &outpos, &p)) | 3375 &v, &outpos, &p)) |
| 3887 goto onError; | 3376 goto onError; |
| 3888 goto nextByte; | 3377 goto nextByte; |
| 3889 } | 3378 } |
| 3890 chr = (chr<<4) & ~0xF; | 3379 chr = (chr<<4) & ~0xF; |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4021 if (*s == ch) | 3510 if (*s == ch) |
| 4022 return s; | 3511 return s; |
| 4023 s++; | 3512 s++; |
| 4024 } | 3513 } |
| 4025 | 3514 |
| 4026 return NULL; | 3515 return NULL; |
| 4027 } | 3516 } |
| 4028 | 3517 |
| 4029 static const char *hexdigits = "0123456789abcdef"; | 3518 static const char *hexdigits = "0123456789abcdef"; |
| 4030 | 3519 |
| 4031 PyObject * | 3520 PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, |
| 4032 PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, | 3521 Py_ssize_t size) |
| 4033 » » » Py_ssize_t size) | |
| 4034 { | 3522 { |
| 4035 PyObject *repr; | 3523 PyObject *repr; |
| 4036 char *p; | 3524 char *p; |
| 4037 | 3525 |
| 4038 #ifdef Py_UNICODE_WIDE | 3526 #ifdef Py_UNICODE_WIDE |
| 4039 const Py_ssize_t expandsize = 10; | 3527 const Py_ssize_t expandsize = 10; |
| 4040 #else | 3528 #else |
| 4041 const Py_ssize_t expandsize = 6; | 3529 const Py_ssize_t expandsize = 6; |
| 4042 #endif | 3530 #endif |
| 4043 | 3531 |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4163 else | 3651 else |
| 4164 *p++ = (char) ch; | 3652 *p++ = (char) ch; |
| 4165 } | 3653 } |
| 4166 | 3654 |
| 4167 assert(p - PyBytes_AS_STRING(repr) > 0); | 3655 assert(p - PyBytes_AS_STRING(repr) > 0); |
| 4168 if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0) | 3656 if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0) |
| 4169 return NULL; | 3657 return NULL; |
| 4170 return repr; | 3658 return repr; |
| 4171 } | 3659 } |
| 4172 | 3660 |
| 4173 PyObject * | 3661 PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode) |
| 4174 PyUnicode_AsUnicodeEscapeString(PyObject *unicode) | |
| 4175 { | 3662 { |
| 4176 PyObject *s; | 3663 PyObject *s; |
| 4177 if (!PyUnicode_Check(unicode)) { | 3664 if (!PyUnicode_Check(unicode)) { |
| 4178 PyErr_BadArgument(); | 3665 PyErr_BadArgument(); |
| 4179 return NULL; | 3666 return NULL; |
| 4180 } | 3667 } |
| 4181 s = PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode), | 3668 s = PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(unicode), |
| 4182 PyUnicode_GET_SIZE(unicode)); | 3669 PyUnicode_GET_SIZE(unicode)); |
| 4183 return s; | 3670 return s; |
| 4184 } | 3671 } |
| 4185 | 3672 |
| 4186 /* --- Raw Unicode Escape Codec ------------------------------------------- */ | 3673 /* --- Raw Unicode Escape Codec ------------------------------------------- */ |
| 4187 | 3674 |
| 4188 PyObject * | 3675 PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, |
| 4189 PyUnicode_DecodeRawUnicodeEscape(const char *s, | 3676 Py_ssize_t size, |
| 4190 » » » » Py_ssize_t size, | 3677 const char *errors) |
| 4191 » » » » const char *errors) | |
| 4192 { | 3678 { |
| 4193 const char *starts = s; | 3679 const char *starts = s; |
| 4194 Py_ssize_t startinpos; | 3680 Py_ssize_t startinpos; |
| 4195 Py_ssize_t endinpos; | 3681 Py_ssize_t endinpos; |
| 4196 Py_ssize_t outpos; | 3682 Py_ssize_t outpos; |
| 4197 PyUnicodeObject *v; | 3683 PyUnicodeObject *v; |
| 4198 Py_UNICODE *p; | 3684 Py_UNICODE *p; |
| 4199 const char *end; | 3685 const char *end; |
| 4200 const char *bs; | 3686 const char *bs; |
| 4201 PyObject *errorHandler = NULL; | 3687 PyObject *errorHandler = NULL; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4239 continue; | 3725 continue; |
| 4240 } | 3726 } |
| 4241 p--; | 3727 p--; |
| 4242 count = *s=='u' ? 4 : 8; | 3728 count = *s=='u' ? 4 : 8; |
| 4243 s++; | 3729 s++; |
| 4244 | 3730 |
| 4245 /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ | 3731 /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ |
| 4246 outpos = p-PyUnicode_AS_UNICODE(v); | 3732 outpos = p-PyUnicode_AS_UNICODE(v); |
| 4247 for (x = 0, i = 0; i < count; ++i, ++s) { | 3733 for (x = 0, i = 0; i < count; ++i, ++s) { |
| 4248 c = (unsigned char)*s; | 3734 c = (unsigned char)*s; |
| 4249 if (!Py_ISXDIGIT(c)) { | 3735 if (!ISXDIGIT(c)) { |
| 4250 endinpos = s-starts; | 3736 endinpos = s-starts; |
| 4251 if (unicode_decode_call_errorhandler( | 3737 if (unicode_decode_call_errorhandler( |
| 4252 errors, &errorHandler, | 3738 errors, &errorHandler, |
| 4253 "rawunicodeescape", "truncated \\uXXXX", | 3739 "rawunicodeescape", "truncated \\uXXXX", |
| 4254 &starts, &end, &startinpos, &endinpos, &exc, &s, | 3740 &starts, &end, &startinpos, &endinpos, &exc, &s, |
| 4255 &v, &outpos, &p)) | 3741 &v, &outpos, &p)) |
| 4256 goto onError; | 3742 goto onError; |
| 4257 goto nextByte; | 3743 goto nextByte; |
| 4258 } | 3744 } |
| 4259 x = (x<<4) & ~0xF; | 3745 x = (x<<4) & ~0xF; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4296 Py_XDECREF(exc); | 3782 Py_XDECREF(exc); |
| 4297 return (PyObject *)v; | 3783 return (PyObject *)v; |
| 4298 | 3784 |
| 4299 onError: | 3785 onError: |
| 4300 Py_XDECREF(v); | 3786 Py_XDECREF(v); |
| 4301 Py_XDECREF(errorHandler); | 3787 Py_XDECREF(errorHandler); |
| 4302 Py_XDECREF(exc); | 3788 Py_XDECREF(exc); |
| 4303 return NULL; | 3789 return NULL; |
| 4304 } | 3790 } |
| 4305 | 3791 |
| 4306 PyObject * | 3792 PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, |
| 4307 PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, | 3793 Py_ssize_t size) |
| 4308 » » » » Py_ssize_t size) | |
| 4309 { | 3794 { |
| 4310 PyObject *repr; | 3795 PyObject *repr; |
| 4311 char *p; | 3796 char *p; |
| 4312 char *q; | 3797 char *q; |
| 4313 | 3798 |
| 4314 #ifdef Py_UNICODE_WIDE | 3799 #ifdef Py_UNICODE_WIDE |
| 4315 const Py_ssize_t expandsize = 10; | 3800 const Py_ssize_t expandsize = 10; |
| 4316 #else | 3801 #else |
| 4317 const Py_ssize_t expandsize = 6; | 3802 const Py_ssize_t expandsize = 6; |
| 4318 #endif | 3803 #endif |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4385 *p++ = (char) ch; | 3870 *p++ = (char) ch; |
| 4386 } | 3871 } |
| 4387 size = p - q; | 3872 size = p - q; |
| 4388 | 3873 |
| 4389 assert(size > 0); | 3874 assert(size > 0); |
| 4390 if (_PyBytes_Resize(&repr, size) < 0) | 3875 if (_PyBytes_Resize(&repr, size) < 0) |
| 4391 return NULL; | 3876 return NULL; |
| 4392 return repr; | 3877 return repr; |
| 4393 } | 3878 } |
| 4394 | 3879 |
| 4395 PyObject * | 3880 PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) |
| 4396 PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) | |
| 4397 { | 3881 { |
| 4398 PyObject *s; | 3882 PyObject *s; |
| 4399 if (!PyUnicode_Check(unicode)) { | 3883 if (!PyUnicode_Check(unicode)) { |
| 4400 PyErr_BadArgument(); | 3884 PyErr_BadArgument(); |
| 4401 return NULL; | 3885 return NULL; |
| 4402 } | 3886 } |
| 4403 s = PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode), | 3887 s = PyUnicode_EncodeRawUnicodeEscape(PyUnicode_AS_UNICODE(unicode), |
| 4404 PyUnicode_GET_SIZE(unicode)); | 3888 PyUnicode_GET_SIZE(unicode)); |
| 4405 | 3889 |
| 4406 return s; | 3890 return s; |
| 4407 } | 3891 } |
| 4408 | 3892 |
| 4409 /* --- Unicode Internal Codec ------------------------------------------- */ | 3893 /* --- Unicode Internal Codec ------------------------------------------- */ |
| 4410 | 3894 |
| 4411 PyObject * | 3895 PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s, |
| 4412 _PyUnicode_DecodeUnicodeInternal(const char *s, | 3896 Py_ssize_t size, |
| 4413 » » » » Py_ssize_t size, | 3897 const char *errors) |
| 4414 » » » » const char *errors) | |
| 4415 { | 3898 { |
| 4416 const char *starts = s; | 3899 const char *starts = s; |
| 4417 Py_ssize_t startinpos; | 3900 Py_ssize_t startinpos; |
| 4418 Py_ssize_t endinpos; | 3901 Py_ssize_t endinpos; |
| 4419 Py_ssize_t outpos; | 3902 Py_ssize_t outpos; |
| 4420 PyUnicodeObject *v; | 3903 PyUnicodeObject *v; |
| 4421 Py_UNICODE *p; | 3904 Py_UNICODE *p; |
| 4422 const char *end; | 3905 const char *end; |
| 4423 const char *reason; | 3906 const char *reason; |
| 4424 PyObject *errorHandler = NULL; | 3907 PyObject *errorHandler = NULL; |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4480 | 3963 |
| 4481 onError: | 3964 onError: |
| 4482 Py_XDECREF(v); | 3965 Py_XDECREF(v); |
| 4483 Py_XDECREF(errorHandler); | 3966 Py_XDECREF(errorHandler); |
| 4484 Py_XDECREF(exc); | 3967 Py_XDECREF(exc); |
| 4485 return NULL; | 3968 return NULL; |
| 4486 } | 3969 } |
| 4487 | 3970 |
| 4488 /* --- Latin-1 Codec ------------------------------------------------------ */ | 3971 /* --- Latin-1 Codec ------------------------------------------------------ */ |
| 4489 | 3972 |
| 4490 PyObject * | 3973 PyObject *PyUnicode_DecodeLatin1(const char *s, |
| 4491 PyUnicode_DecodeLatin1(const char *s, | 3974 Py_ssize_t size, |
| 4492 » » Py_ssize_t size, | 3975 const char *errors) |
| 4493 » » const char *errors) | |
| 4494 { | 3976 { |
| 4495 PyUnicodeObject *v; | 3977 PyUnicodeObject *v; |
| 4496 Py_UNICODE *p; | 3978 Py_UNICODE *p; |
| 4497 const char *e, *unrolled_end; | 3979 const char *e, *unrolled_end; |
| 4498 | 3980 |
| 4499 /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ | 3981 /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ |
| 4500 if (size == 1) { | 3982 if (size == 1) { |
| 4501 Py_UNICODE r = *(unsigned char*)s; | 3983 Py_UNICODE r = *(unsigned char*)s; |
| 4502 return PyUnicode_FromUnicode(&r, 1); | 3984 return PyUnicode_FromUnicode(&r, 1); |
| 4503 } | 3985 } |
| (...skipping 19 matching lines...) Expand all Loading... |
| 4523 while (s < e) | 4005 while (s < e) |
| 4524 *p++ = (unsigned char) *s++; | 4006 *p++ = (unsigned char) *s++; |
| 4525 return (PyObject *)v; | 4007 return (PyObject *)v; |
| 4526 | 4008 |
| 4527 onError: | 4009 onError: |
| 4528 Py_XDECREF(v); | 4010 Py_XDECREF(v); |
| 4529 return NULL; | 4011 return NULL; |
| 4530 } | 4012 } |
| 4531 | 4013 |
| 4532 /* create or adjust a UnicodeEncodeError */ | 4014 /* create or adjust a UnicodeEncodeError */ |
| 4533 static void | 4015 static void make_encode_exception(PyObject **exceptionObject, |
| 4534 make_encode_exception(PyObject **exceptionObject, | 4016 const char *encoding, |
| 4535 » » const char *encoding, | 4017 const Py_UNICODE *unicode, Py_ssize_t size, |
| 4536 » » const Py_UNICODE *unicode, Py_ssize_t size, | 4018 Py_ssize_t startpos, Py_ssize_t endpos, |
| 4537 » » Py_ssize_t startpos, Py_ssize_t endpos, | 4019 const char *reason) |
| 4538 » » const char *reason) | |
| 4539 { | 4020 { |
| 4540 if (*exceptionObject == NULL) { | 4021 if (*exceptionObject == NULL) { |
| 4541 *exceptionObject = PyUnicodeEncodeError_Create( | 4022 *exceptionObject = PyUnicodeEncodeError_Create( |
| 4542 encoding, unicode, size, startpos, endpos, reason); | 4023 encoding, unicode, size, startpos, endpos, reason); |
| 4543 } | 4024 } |
| 4544 else { | 4025 else { |
| 4545 if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos)) | 4026 if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos)) |
| 4546 goto onError; | 4027 goto onError; |
| 4547 if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos)) | 4028 if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos)) |
| 4548 goto onError; | 4029 goto onError; |
| 4549 if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason)) | 4030 if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason)) |
| 4550 goto onError; | 4031 goto onError; |
| 4551 return; | 4032 return; |
| 4552 onError: | 4033 onError: |
| 4553 Py_DECREF(*exceptionObject); | 4034 Py_DECREF(*exceptionObject); |
| 4554 *exceptionObject = NULL; | 4035 *exceptionObject = NULL; |
| 4555 } | 4036 } |
| 4556 } | 4037 } |
| 4557 | 4038 |
| 4558 /* raises a UnicodeEncodeError */ | 4039 /* raises a UnicodeEncodeError */ |
| 4559 static void | 4040 static void raise_encode_exception(PyObject **exceptionObject, |
| 4560 raise_encode_exception(PyObject **exceptionObject, | 4041 const char *encoding, |
| 4561 » » const char *encoding, | 4042 const Py_UNICODE *unicode, Py_ssize_t size, |
| 4562 » » const Py_UNICODE *unicode, Py_ssize_t size, | 4043 Py_ssize_t startpos, Py_ssize_t endpos, |
| 4563 » » Py_ssize_t startpos, Py_ssize_t endpos, | 4044 const char *reason) |
| 4564 » » const char *reason) | |
| 4565 { | 4045 { |
| 4566 make_encode_exception(exceptionObject, | 4046 make_encode_exception(exceptionObject, |
| 4567 encoding, unicode, size, startpos, endpos, reason); | 4047 encoding, unicode, size, startpos, endpos, reason); |
| 4568 if (*exceptionObject != NULL) | 4048 if (*exceptionObject != NULL) |
| 4569 PyCodec_StrictErrors(*exceptionObject); | 4049 PyCodec_StrictErrors(*exceptionObject); |
| 4570 } | 4050 } |
| 4571 | 4051 |
| 4572 /* error handling callback helper: | 4052 /* error handling callback helper: |
| 4573 build arguments, call the callback and check the arguments, | 4053 build arguments, call the callback and check the arguments, |
| 4574 put the result into newpos and return the replacement string, which | 4054 put the result into newpos and return the replacement string, which |
| 4575 has to be freed by the caller */ | 4055 has to be freed by the caller */ |
| 4576 static PyObject * | 4056 static PyObject *unicode_encode_call_errorhandler(const char *errors, |
| 4577 unicode_encode_call_errorhandler(const char *errors, | 4057 PyObject **errorHandler, |
| 4578 » » » » PyObject **errorHandler, | 4058 const char *encoding, const ch
ar *reason, |
| 4579 » » » » const char *encoding, const char *reason, | 4059 const Py_UNICODE *unicode, Py_
ssize_t size, PyObject **exceptionObject, |
| 4580 » » » » const Py_UNICODE *unicode, Py_ssize_t size, PyO
bject **exceptionObject, | 4060 Py_ssize_t startpos, Py_ssize_
t endpos, |
| 4581 » » » » Py_ssize_t startpos, Py_ssize_t endpos, | 4061 Py_ssize_t *newpos) |
| 4582 » » » » Py_ssize_t *newpos) | |
| 4583 { | 4062 { |
| 4584 static char *argparse = "On;encoding error handler must return (str/bytes, i
nt) tuple"; | 4063 static char *argparse = "On;encoding error handler must return (str/bytes, i
nt) tuple"; |
| 4585 | 4064 |
| 4586 PyObject *restuple; | 4065 PyObject *restuple; |
| 4587 PyObject *resunicode; | 4066 PyObject *resunicode; |
| 4588 | 4067 |
| 4589 if (*errorHandler == NULL) { | 4068 if (*errorHandler == NULL) { |
| 4590 *errorHandler = PyCodec_LookupError(errors); | 4069 *errorHandler = PyCodec_LookupError(errors); |
| 4591 if (*errorHandler == NULL) | 4070 if (*errorHandler == NULL) |
| 4592 return NULL; | 4071 return NULL; |
| (...skipping 28 matching lines...) Expand all Loading... |
| 4621 if (*newpos<0 || *newpos>size) { | 4100 if (*newpos<0 || *newpos>size) { |
| 4622 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of b
ounds", *newpos); | 4101 PyErr_Format(PyExc_IndexError, "position %zd from error handler out of b
ounds", *newpos); |
| 4623 Py_DECREF(restuple); | 4102 Py_DECREF(restuple); |
| 4624 return NULL; | 4103 return NULL; |
| 4625 } | 4104 } |
| 4626 Py_INCREF(resunicode); | 4105 Py_INCREF(resunicode); |
| 4627 Py_DECREF(restuple); | 4106 Py_DECREF(restuple); |
| 4628 return resunicode; | 4107 return resunicode; |
| 4629 } | 4108 } |
| 4630 | 4109 |
| 4631 static PyObject * | 4110 static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, |
| 4632 unicode_encode_ucs1(const Py_UNICODE *p, | 4111 Py_ssize_t size, |
| 4633 » » Py_ssize_t size, | 4112 const char *errors, |
| 4634 » » const char *errors, | 4113 int limit) |
| 4635 » » int limit) | |
| 4636 { | 4114 { |
| 4637 /* output object */ | 4115 /* output object */ |
| 4638 PyObject *res; | 4116 PyObject *res; |
| 4639 /* pointers to the beginning and end+1 of input */ | 4117 /* pointers to the beginning and end+1 of input */ |
| 4640 const Py_UNICODE *startp = p; | 4118 const Py_UNICODE *startp = p; |
| 4641 const Py_UNICODE *endp = p + size; | 4119 const Py_UNICODE *endp = p + size; |
| 4642 /* pointer to the beginning of the unencodable characters */ | 4120 /* pointer to the beginning of the unencodable characters */ |
| 4643 /* const Py_UNICODE *badp = NULL; */ | 4121 /* const Py_UNICODE *badp = NULL; */ |
| 4644 /* pointer into the output */ | 4122 /* pointer into the output */ |
| 4645 char *str; | 4123 char *str; |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4818 Py_XDECREF(exc); | 4296 Py_XDECREF(exc); |
| 4819 return res; | 4297 return res; |
| 4820 | 4298 |
| 4821 onError: | 4299 onError: |
| 4822 Py_XDECREF(res); | 4300 Py_XDECREF(res); |
| 4823 Py_XDECREF(errorHandler); | 4301 Py_XDECREF(errorHandler); |
| 4824 Py_XDECREF(exc); | 4302 Py_XDECREF(exc); |
| 4825 return NULL; | 4303 return NULL; |
| 4826 } | 4304 } |
| 4827 | 4305 |
| 4828 PyObject * | 4306 PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p, |
| 4829 PyUnicode_EncodeLatin1(const Py_UNICODE *p, | 4307 Py_ssize_t size, |
| 4830 » » Py_ssize_t size, | 4308 const char *errors) |
| 4831 » » const char *errors) | |
| 4832 { | 4309 { |
| 4833 return unicode_encode_ucs1(p, size, errors, 256); | 4310 return unicode_encode_ucs1(p, size, errors, 256); |
| 4834 } | 4311 } |
| 4835 | 4312 |
| 4836 PyObject * | 4313 PyObject *PyUnicode_AsLatin1String(PyObject *unicode) |
| 4837 PyUnicode_AsLatin1String(PyObject *unicode) | |
| 4838 { | 4314 { |
| 4839 if (!PyUnicode_Check(unicode)) { | 4315 if (!PyUnicode_Check(unicode)) { |
| 4840 PyErr_BadArgument(); | 4316 PyErr_BadArgument(); |
| 4841 return NULL; | 4317 return NULL; |
| 4842 } | 4318 } |
| 4843 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), | 4319 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), |
| 4844 PyUnicode_GET_SIZE(unicode), | 4320 PyUnicode_GET_SIZE(unicode), |
| 4845 NULL); | 4321 NULL); |
| 4846 } | 4322 } |
| 4847 | 4323 |
| 4848 /* --- 7-bit ASCII Codec -------------------------------------------------- */ | 4324 /* --- 7-bit ASCII Codec -------------------------------------------------- */ |
| 4849 | 4325 |
| 4850 PyObject * | 4326 PyObject *PyUnicode_DecodeASCII(const char *s, |
| 4851 PyUnicode_DecodeASCII(const char *s, | 4327 Py_ssize_t size, |
| 4852 Py_ssize_t size, | 4328 const char *errors) |
| 4853 const char *errors) | |
| 4854 { | 4329 { |
| 4855 const char *starts = s; | 4330 const char *starts = s; |
| 4856 PyUnicodeObject *v; | 4331 PyUnicodeObject *v; |
| 4857 Py_UNICODE *p; | 4332 Py_UNICODE *p; |
| 4858 Py_ssize_t startinpos; | 4333 Py_ssize_t startinpos; |
| 4859 Py_ssize_t endinpos; | 4334 Py_ssize_t endinpos; |
| 4860 Py_ssize_t outpos; | 4335 Py_ssize_t outpos; |
| 4861 const char *e; | 4336 const char *e; |
| 4862 PyObject *errorHandler = NULL; | 4337 PyObject *errorHandler = NULL; |
| 4863 PyObject *exc = NULL; | 4338 PyObject *exc = NULL; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4900 Py_XDECREF(exc); | 4375 Py_XDECREF(exc); |
| 4901 return (PyObject *)v; | 4376 return (PyObject *)v; |
| 4902 | 4377 |
| 4903 onError: | 4378 onError: |
| 4904 Py_XDECREF(v); | 4379 Py_XDECREF(v); |
| 4905 Py_XDECREF(errorHandler); | 4380 Py_XDECREF(errorHandler); |
| 4906 Py_XDECREF(exc); | 4381 Py_XDECREF(exc); |
| 4907 return NULL; | 4382 return NULL; |
| 4908 } | 4383 } |
| 4909 | 4384 |
| 4910 PyObject * | 4385 PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p, |
| 4911 PyUnicode_EncodeASCII(const Py_UNICODE *p, | 4386 Py_ssize_t size, |
| 4912 Py_ssize_t size, | 4387 const char *errors) |
| 4913 const char *errors) | |
| 4914 { | 4388 { |
| 4915 return unicode_encode_ucs1(p, size, errors, 128); | 4389 return unicode_encode_ucs1(p, size, errors, 128); |
| 4916 } | 4390 } |
| 4917 | 4391 |
| 4918 PyObject * | 4392 PyObject *PyUnicode_AsASCIIString(PyObject *unicode) |
| 4919 PyUnicode_AsASCIIString(PyObject *unicode) | |
| 4920 { | 4393 { |
| 4921 if (!PyUnicode_Check(unicode)) { | 4394 if (!PyUnicode_Check(unicode)) { |
| 4922 PyErr_BadArgument(); | 4395 PyErr_BadArgument(); |
| 4923 return NULL; | 4396 return NULL; |
| 4924 } | 4397 } |
| 4925 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), | 4398 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), |
| 4926 PyUnicode_GET_SIZE(unicode), | 4399 PyUnicode_GET_SIZE(unicode), |
| 4927 NULL); | 4400 NULL); |
| 4928 } | 4401 } |
| 4929 | 4402 |
| 4930 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) | 4403 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) |
| 4931 | 4404 |
| 4932 /* --- MBCS codecs for Windows -------------------------------------------- */ | 4405 /* --- MBCS codecs for Windows -------------------------------------------- */ |
| 4933 | 4406 |
| 4934 #if SIZEOF_INT < SIZEOF_SIZE_T | 4407 #if SIZEOF_INT < SIZEOF_SIZE_T |
| 4935 #define NEED_RETRY | 4408 #define NEED_RETRY |
| 4936 #endif | 4409 #endif |
| 4937 | 4410 |
| 4938 /* XXX This code is limited to "true" double-byte encodings, as | 4411 /* XXX This code is limited to "true" double-byte encodings, as |
| 4939 a) it assumes an incomplete character consists of a single byte, and | 4412 a) it assumes an incomplete character consists of a single byte, and |
| 4940 b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte | 4413 b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte |
| 4941 encodings, see IsDBCSLeadByteEx documentation. */ | 4414 encodings, see IsDBCSLeadByteEx documentation. */ |
| 4942 | 4415 |
| 4943 static int | 4416 static int is_dbcs_lead_byte(const char *s, int offset) |
| 4944 is_dbcs_lead_byte(const char *s, int offset) | |
| 4945 { | 4417 { |
| 4946 const char *curr = s + offset; | 4418 const char *curr = s + offset; |
| 4947 | 4419 |
| 4948 if (IsDBCSLeadByte(*curr)) { | 4420 if (IsDBCSLeadByte(*curr)) { |
| 4949 const char *prev = CharPrev(s, curr); | 4421 const char *prev = CharPrev(s, curr); |
| 4950 return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2); | 4422 return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2); |
| 4951 } | 4423 } |
| 4952 return 0; | 4424 return 0; |
| 4953 } | 4425 } |
| 4954 | 4426 |
| 4955 /* | 4427 /* |
| 4956 * Decode MBCS string into unicode object. If 'final' is set, converts | 4428 * Decode MBCS string into unicode object. If 'final' is set, converts |
| 4957 * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise. | 4429 * trailing lead-byte too. Returns consumed size if succeed, -1 otherwise. |
| 4958 */ | 4430 */ |
| 4959 static int | 4431 static int decode_mbcs(PyUnicodeObject **v, |
| 4960 decode_mbcs(PyUnicodeObject **v, | 4432 const char *s, /* MBCS string */ |
| 4961 const char *s, /* MBCS string */ | 4433 int size, /* sizeof MBCS string */ |
| 4962 int size, /* sizeof MBCS string */ | 4434 int final) |
| 4963 int final, | |
| 4964 const char *errors) | |
| 4965 { | 4435 { |
| 4966 Py_UNICODE *p; | 4436 Py_UNICODE *p; |
| 4967 Py_ssize_t n; | 4437 Py_ssize_t n = 0; |
| 4968 DWORD usize; | 4438 int usize = 0; |
| 4969 DWORD flags; | |
| 4970 | 4439 |
| 4971 assert(size >= 0); | 4440 assert(size >= 0); |
| 4972 | |
| 4973 /* check and handle 'errors' arg */ | |
| 4974 if (errors==NULL || strcmp(errors, "strict")==0) | |
| 4975 flags = MB_ERR_INVALID_CHARS; | |
| 4976 else if (strcmp(errors, "ignore")==0) | |
| 4977 flags = 0; | |
| 4978 else { | |
| 4979 PyErr_Format(PyExc_ValueError, | |
| 4980 "mbcs encoding does not support errors='%s'", | |
| 4981 errors); | |
| 4982 return -1; | |
| 4983 } | |
| 4984 | 4441 |
| 4985 /* Skip trailing lead-byte unless 'final' is set */ | 4442 /* Skip trailing lead-byte unless 'final' is set */ |
| 4986 if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) | 4443 if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1)) |
| 4987 --size; | 4444 --size; |
| 4988 | 4445 |
| 4989 /* First get the size of the result */ | 4446 /* First get the size of the result */ |
| 4990 if (size > 0) { | 4447 if (size > 0) { |
| 4991 usize = MultiByteToWideChar(CP_ACP, flags, s, size, NULL, 0); | 4448 usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0); |
| 4992 if (usize==0) | 4449 if (usize == 0) { |
| 4993 goto mbcs_decode_error; | 4450 PyErr_SetFromWindowsErrWithFilename(0, NULL); |
| 4994 } else | 4451 return -1; |
| 4995 usize = 0; | 4452 } |
| 4453 } |
| 4996 | 4454 |
| 4997 if (*v == NULL) { | 4455 if (*v == NULL) { |
| 4998 /* Create unicode object */ | 4456 /* Create unicode object */ |
| 4999 *v = _PyUnicode_New(usize); | 4457 *v = _PyUnicode_New(usize); |
| 5000 if (*v == NULL) | 4458 if (*v == NULL) |
| 5001 return -1; | 4459 return -1; |
| 5002 n = 0; | |
| 5003 } | 4460 } |
| 5004 else { | 4461 else { |
| 5005 /* Extend unicode object */ | 4462 /* Extend unicode object */ |
| 5006 n = PyUnicode_GET_SIZE(*v); | 4463 n = PyUnicode_GET_SIZE(*v); |
| 5007 if (_PyUnicode_Resize(v, n + usize) < 0) | 4464 if (_PyUnicode_Resize(v, n + usize) < 0) |
| 5008 return -1; | 4465 return -1; |
| 5009 } | 4466 } |
| 5010 | 4467 |
| 5011 /* Do the conversion */ | 4468 /* Do the conversion */ |
| 5012 if (usize > 0) { | 4469 if (size > 0) { |
| 5013 p = PyUnicode_AS_UNICODE(*v) + n; | 4470 p = PyUnicode_AS_UNICODE(*v) + n; |
| 5014 if (0 == MultiByteToWideChar(CP_ACP, flags, s, size, p, usize)) { | 4471 if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) { |
| 5015 goto mbcs_decode_error; | 4472 PyErr_SetFromWindowsErrWithFilename(0, NULL); |
| 5016 } | 4473 return -1; |
| 5017 } | 4474 } |
| 4475 } |
| 4476 |
| 5018 return size; | 4477 return size; |
| 5019 | 4478 } |
| 5020 mbcs_decode_error: | 4479 |
| 5021 /* If the last error was ERROR_NO_UNICODE_TRANSLATION, then | 4480 PyObject *PyUnicode_DecodeMBCSStateful(const char *s, |
| 5022 we raise a UnicodeDecodeError - else it is a 'generic' | 4481 Py_ssize_t size, |
| 5023 windows error | 4482 const char *errors, |
| 5024 */ | 4483 Py_ssize_t *consumed) |
| 5025 if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) { | |
| 5026 /* Ideally, we should get reason from FormatMessage - this | |
| 5027 is the Windows 2000 English version of the message | |
| 5028 */ | |
| 5029 PyObject *exc = NULL; | |
| 5030 const char *reason = "No mapping for the Unicode character exists " | |
| 5031 "in the target multi-byte code page."; | |
| 5032 make_decode_exception(&exc, "mbcs", s, size, 0, 0, reason); | |
| 5033 if (exc != NULL) { | |
| 5034 PyCodec_StrictErrors(exc); | |
| 5035 Py_DECREF(exc); | |
| 5036 } | |
| 5037 } else { | |
| 5038 PyErr_SetFromWindowsErrWithFilename(0, NULL); | |
| 5039 } | |
| 5040 return -1; | |
| 5041 } | |
| 5042 | |
| 5043 PyObject * | |
| 5044 PyUnicode_DecodeMBCSStateful(const char *s, | |
| 5045 Py_ssize_t size, | |
| 5046 const char *errors, | |
| 5047 Py_ssize_t *consumed) | |
| 5048 { | 4484 { |
| 5049 PyUnicodeObject *v = NULL; | 4485 PyUnicodeObject *v = NULL; |
| 5050 int done; | 4486 int done; |
| 5051 | 4487 |
| 5052 if (consumed) | 4488 if (consumed) |
| 5053 *consumed = 0; | 4489 *consumed = 0; |
| 5054 | 4490 |
| 5055 #ifdef NEED_RETRY | 4491 #ifdef NEED_RETRY |
| 5056 retry: | 4492 retry: |
| 5057 if (size > INT_MAX) | 4493 if (size > INT_MAX) |
| 5058 done = decode_mbcs(&v, s, INT_MAX, 0, errors); | 4494 done = decode_mbcs(&v, s, INT_MAX, 0); |
| 5059 else | 4495 else |
| 5060 #endif | 4496 #endif |
| 5061 done = decode_mbcs(&v, s, (int)size, !consumed, errors); | 4497 done = decode_mbcs(&v, s, (int)size, !consumed); |
| 5062 | 4498 |
| 5063 if (done < 0) { | 4499 if (done < 0) { |
| 5064 Py_XDECREF(v); | 4500 Py_XDECREF(v); |
| 5065 return NULL; | 4501 return NULL; |
| 5066 } | 4502 } |
| 5067 | 4503 |
| 5068 if (consumed) | 4504 if (consumed) |
| 5069 *consumed += done; | 4505 *consumed += done; |
| 5070 | 4506 |
| 5071 #ifdef NEED_RETRY | 4507 #ifdef NEED_RETRY |
| 5072 if (size > INT_MAX) { | 4508 if (size > INT_MAX) { |
| 5073 s += done; | 4509 s += done; |
| 5074 size -= done; | 4510 size -= done; |
| 5075 goto retry; | 4511 goto retry; |
| 5076 } | 4512 } |
| 5077 #endif | 4513 #endif |
| 5078 | 4514 |
| 5079 return (PyObject *)v; | 4515 return (PyObject *)v; |
| 5080 } | 4516 } |
| 5081 | 4517 |
| 5082 PyObject * | 4518 PyObject *PyUnicode_DecodeMBCS(const char *s, |
| 5083 PyUnicode_DecodeMBCS(const char *s, | 4519 Py_ssize_t size, |
| 5084 Py_ssize_t size, | 4520 const char *errors) |
| 5085 const char *errors) | |
| 5086 { | 4521 { |
| 5087 return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); | 4522 return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL); |
| 5088 } | 4523 } |
| 5089 | 4524 |
| 5090 /* | 4525 /* |
| 5091 * Convert unicode into string object (MBCS). | 4526 * Convert unicode into string object (MBCS). |
| 5092 * Returns 0 if succeed, -1 otherwise. | 4527 * Returns 0 if succeed, -1 otherwise. |
| 5093 */ | 4528 */ |
| 5094 static int | 4529 static int encode_mbcs(PyObject **repr, |
| 5095 encode_mbcs(PyObject **repr, | 4530 const Py_UNICODE *p, /* unicode */ |
| 5096 const Py_UNICODE *p, /* unicode */ | 4531 int size) /* size of unicode */ |
| 5097 int size, /* size of unicode */ | 4532 { |
| 5098 const char* errors) | 4533 int mbcssize = 0; |
| 5099 { | 4534 Py_ssize_t n = 0; |
| 5100 BOOL usedDefaultChar = FALSE; | |
| 5101 BOOL *pusedDefaultChar; | |
| 5102 int mbcssize; | |
| 5103 Py_ssize_t n; | |
| 5104 PyObject *exc = NULL; | |
| 5105 DWORD flags; | |
| 5106 | 4535 |
| 5107 assert(size >= 0); | 4536 assert(size >= 0); |
| 5108 | |
| 5109 /* check and handle 'errors' arg */ | |
| 5110 if (errors==NULL || strcmp(errors, "strict")==0) { | |
| 5111 flags = WC_NO_BEST_FIT_CHARS; | |
| 5112 pusedDefaultChar = &usedDefaultChar; | |
| 5113 } else if (strcmp(errors, "replace")==0) { | |
| 5114 flags = 0; | |
| 5115 pusedDefaultChar = NULL; | |
| 5116 } else { | |
| 5117 PyErr_Format(PyExc_ValueError, | |
| 5118 "mbcs encoding does not support errors='%s'", | |
| 5119 errors); | |
| 5120 return -1; | |
| 5121 } | |
| 5122 | 4537 |
| 5123 /* First get the size of the result */ | 4538 /* First get the size of the result */ |
| 5124 if (size > 0) { | 4539 if (size > 0) { |
| 5125 mbcssize = WideCharToMultiByte(CP_ACP, flags, p, size, NULL, 0, | 4540 mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL); |
| 5126 NULL, pusedDefaultChar); | |
| 5127 if (mbcssize == 0) { | 4541 if (mbcssize == 0) { |
| 5128 PyErr_SetFromWindowsErrWithFilename(0, NULL); | 4542 PyErr_SetFromWindowsErrWithFilename(0, NULL); |
| 5129 return -1; | 4543 return -1; |
| 5130 } | 4544 } |
| 5131 /* If we used a default char, then we failed! */ | |
| 5132 if (pusedDefaultChar && *pusedDefaultChar) | |
| 5133 goto mbcs_encode_error; | |
| 5134 } else { | |
| 5135 mbcssize = 0; | |
| 5136 } | 4545 } |
| 5137 | 4546 |
| 5138 if (*repr == NULL) { | 4547 if (*repr == NULL) { |
| 5139 /* Create string object */ | 4548 /* Create string object */ |
| 5140 *repr = PyBytes_FromStringAndSize(NULL, mbcssize); | 4549 *repr = PyBytes_FromStringAndSize(NULL, mbcssize); |
| 5141 if (*repr == NULL) | 4550 if (*repr == NULL) |
| 5142 return -1; | 4551 return -1; |
| 5143 n = 0; | |
| 5144 } | 4552 } |
| 5145 else { | 4553 else { |
| 5146 /* Extend string object */ | 4554 /* Extend string object */ |
| 5147 n = PyBytes_Size(*repr); | 4555 n = PyBytes_Size(*repr); |
| 5148 if (_PyBytes_Resize(repr, n + mbcssize) < 0) | 4556 if (_PyBytes_Resize(repr, n + mbcssize) < 0) |
| 5149 return -1; | 4557 return -1; |
| 5150 } | 4558 } |
| 5151 | 4559 |
| 5152 /* Do the conversion */ | 4560 /* Do the conversion */ |
| 5153 if (size > 0) { | 4561 if (size > 0) { |
| 5154 char *s = PyBytes_AS_STRING(*repr) + n; | 4562 char *s = PyBytes_AS_STRING(*repr) + n; |
| 5155 if (0 == WideCharToMultiByte(CP_ACP, flags, p, size, s, mbcssize, | 4563 if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL
)) { |
| 5156 NULL, pusedDefaultChar)) { | |
| 5157 PyErr_SetFromWindowsErrWithFilename(0, NULL); | 4564 PyErr_SetFromWindowsErrWithFilename(0, NULL); |
| 5158 return -1; | 4565 return -1; |
| 5159 } | 4566 } |
| 5160 if (pusedDefaultChar && *pusedDefaultChar) | 4567 } |
| 5161 goto mbcs_encode_error; | 4568 |
| 5162 } | |
| 5163 return 0; | 4569 return 0; |
| 5164 | 4570 } |
| 5165 mbcs_encode_error: | 4571 |
| 5166 raise_encode_exception(&exc, "mbcs", p, size, 0, 0, "invalid character"); | 4572 PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p, |
| 5167 Py_XDECREF(exc); | 4573 Py_ssize_t size, |
| 5168 return -1; | 4574 const char *errors) |
| 5169 } | |
| 5170 | |
| 5171 PyObject * | |
| 5172 PyUnicode_EncodeMBCS(const Py_UNICODE *p, | |
| 5173 Py_ssize_t size, | |
| 5174 const char *errors) | |
| 5175 { | 4575 { |
| 5176 PyObject *repr = NULL; | 4576 PyObject *repr = NULL; |
| 5177 int ret; | 4577 int ret; |
| 5178 | 4578 |
| 5179 #ifdef NEED_RETRY | 4579 #ifdef NEED_RETRY |
| 5180 retry: | 4580 retry: |
| 5181 if (size > INT_MAX) | 4581 if (size > INT_MAX) |
| 5182 ret = encode_mbcs(&repr, p, INT_MAX, errors); | 4582 ret = encode_mbcs(&repr, p, INT_MAX); |
| 5183 else | 4583 else |
| 5184 #endif | 4584 #endif |
| 5185 ret = encode_mbcs(&repr, p, (int)size, errors); | 4585 ret = encode_mbcs(&repr, p, (int)size); |
| 5186 | 4586 |
| 5187 if (ret < 0) { | 4587 if (ret < 0) { |
| 5188 Py_XDECREF(repr); | 4588 Py_XDECREF(repr); |
| 5189 return NULL; | 4589 return NULL; |
| 5190 } | 4590 } |
| 5191 | 4591 |
| 5192 #ifdef NEED_RETRY | 4592 #ifdef NEED_RETRY |
| 5193 if (size > INT_MAX) { | 4593 if (size > INT_MAX) { |
| 5194 p += INT_MAX; | 4594 p += INT_MAX; |
| 5195 size -= INT_MAX; | 4595 size -= INT_MAX; |
| 5196 goto retry; | 4596 goto retry; |
| 5197 } | 4597 } |
| 5198 #endif | 4598 #endif |
| 5199 | 4599 |
| 5200 return repr; | 4600 return repr; |
| 5201 } | 4601 } |
| 5202 | 4602 |
| 5203 PyObject * | 4603 PyObject *PyUnicode_AsMBCSString(PyObject *unicode) |
| 5204 PyUnicode_AsMBCSString(PyObject *unicode) | |
| 5205 { | 4604 { |
| 5206 if (!PyUnicode_Check(unicode)) { | 4605 if (!PyUnicode_Check(unicode)) { |
| 5207 PyErr_BadArgument(); | 4606 PyErr_BadArgument(); |
| 5208 return NULL; | 4607 return NULL; |
| 5209 } | 4608 } |
| 5210 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), | 4609 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), |
| 5211 PyUnicode_GET_SIZE(unicode), | 4610 PyUnicode_GET_SIZE(unicode), |
| 5212 NULL); | 4611 NULL); |
| 5213 } | 4612 } |
| 5214 | 4613 |
| 5215 #undef NEED_RETRY | 4614 #undef NEED_RETRY |
| 5216 | 4615 |
| 5217 #endif /* MS_WINDOWS */ | 4616 #endif /* MS_WINDOWS */ |
| 5218 | 4617 |
| 5219 /* --- Character Mapping Codec -------------------------------------------- */ | 4618 /* --- Character Mapping Codec -------------------------------------------- */ |
| 5220 | 4619 |
| 5221 PyObject * | 4620 PyObject *PyUnicode_DecodeCharmap(const char *s, |
| 5222 PyUnicode_DecodeCharmap(const char *s, | 4621 Py_ssize_t size, |
| 5223 Py_ssize_t size, | 4622 PyObject *mapping, |
| 5224 PyObject *mapping, | 4623 const char *errors) |
| 5225 const char *errors) | |
| 5226 { | 4624 { |
| 5227 const char *starts = s; | 4625 const char *starts = s; |
| 5228 Py_ssize_t startinpos; | 4626 Py_ssize_t startinpos; |
| 5229 Py_ssize_t endinpos; | 4627 Py_ssize_t endinpos; |
| 5230 Py_ssize_t outpos; | 4628 Py_ssize_t outpos; |
| 5231 const char *e; | 4629 const char *e; |
| 5232 PyUnicodeObject *v; | 4630 PyUnicodeObject *v; |
| 5233 Py_UNICODE *p; | 4631 Py_UNICODE *p; |
| 5234 Py_ssize_t extrachars = 0; | 4632 Py_ssize_t extrachars = 0; |
| 5235 PyObject *errorHandler = NULL; | 4633 PyObject *errorHandler = NULL; |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5375 | 4773 |
| 5376 onError: | 4774 onError: |
| 5377 Py_XDECREF(errorHandler); | 4775 Py_XDECREF(errorHandler); |
| 5378 Py_XDECREF(exc); | 4776 Py_XDECREF(exc); |
| 5379 Py_XDECREF(v); | 4777 Py_XDECREF(v); |
| 5380 return NULL; | 4778 return NULL; |
| 5381 } | 4779 } |
| 5382 | 4780 |
| 5383 /* Charmap encoding: the lookup table */ | 4781 /* Charmap encoding: the lookup table */ |
| 5384 | 4782 |
| 5385 struct encoding_map { | 4783 struct encoding_map{ |
| 5386 PyObject_HEAD | 4784 PyObject_HEAD |
| 5387 unsigned char level1[32]; | 4785 unsigned char level1[32]; |
| 5388 int count2, count3; | 4786 int count2, count3; |
| 5389 unsigned char level23[1]; | 4787 unsigned char level23[1]; |
| 5390 }; | 4788 }; |
| 5391 | 4789 |
| 5392 static PyObject* | 4790 static PyObject* |
| 5393 encoding_map_size(PyObject *obj, PyObject* args) | 4791 encoding_map_size(PyObject *obj, PyObject* args) |
| 5394 { | 4792 { |
| 5395 struct encoding_map *map = (struct encoding_map*)obj; | 4793 struct encoding_map *map = (struct encoding_map*)obj; |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5502 | 4900 |
| 5503 if (count2 >= 0xFF || count3 >= 0xFF) | 4901 if (count2 >= 0xFF || count3 >= 0xFF) |
| 5504 need_dict = 1; | 4902 need_dict = 1; |
| 5505 | 4903 |
| 5506 if (need_dict) { | 4904 if (need_dict) { |
| 5507 PyObject *result = PyDict_New(); | 4905 PyObject *result = PyDict_New(); |
| 5508 PyObject *key, *value; | 4906 PyObject *key, *value; |
| 5509 if (!result) | 4907 if (!result) |
| 5510 return NULL; | 4908 return NULL; |
| 5511 for (i = 0; i < 256; i++) { | 4909 for (i = 0; i < 256; i++) { |
| 4910 key = value = NULL; |
| 5512 key = PyLong_FromLong(decode[i]); | 4911 key = PyLong_FromLong(decode[i]); |
| 5513 value = PyLong_FromLong(i); | 4912 value = PyLong_FromLong(i); |
| 5514 if (!key || !value) | 4913 if (!key || !value) |
| 5515 goto failed1; | 4914 goto failed1; |
| 5516 if (PyDict_SetItem(result, key, value) == -1) | 4915 if (PyDict_SetItem(result, key, value) == -1) |
| 5517 goto failed1; | 4916 goto failed1; |
| 5518 Py_DECREF(key); | 4917 Py_DECREF(key); |
| 5519 Py_DECREF(value); | 4918 Py_DECREF(value); |
| 5520 } | 4919 } |
| 5521 return result; | 4920 return result; |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5589 i = map->level23[16*map->count2 + 128*i + l3]; | 4988 i = map->level23[16*map->count2 + 128*i + l3]; |
| 5590 if (i == 0) { | 4989 if (i == 0) { |
| 5591 return -1; | 4990 return -1; |
| 5592 } | 4991 } |
| 5593 return i; | 4992 return i; |
| 5594 } | 4993 } |
| 5595 | 4994 |
| 5596 /* Lookup the character ch in the mapping. If the character | 4995 /* Lookup the character ch in the mapping. If the character |
| 5597 can't be found, Py_None is returned (or NULL, if another | 4996 can't be found, Py_None is returned (or NULL, if another |
| 5598 error occurred). */ | 4997 error occurred). */ |
| 5599 static PyObject * | 4998 static PyObject *charmapencode_lookup(Py_UNICODE c, PyObject *mapping) |
| 5600 charmapencode_lookup(Py_UNICODE c, PyObject *mapping) | |
| 5601 { | 4999 { |
| 5602 PyObject *w = PyLong_FromLong((long)c); | 5000 PyObject *w = PyLong_FromLong((long)c); |
| 5603 PyObject *x; | 5001 PyObject *x; |
| 5604 | 5002 |
| 5605 if (w == NULL) | 5003 if (w == NULL) |
| 5606 return NULL; | 5004 return NULL; |
| 5607 x = PyObject_GetItem(mapping, w); | 5005 x = PyObject_GetItem(mapping, w); |
| 5608 Py_DECREF(w); | 5006 Py_DECREF(w); |
| 5609 if (x == NULL) { | 5007 if (x == NULL) { |
| 5610 if (PyErr_ExceptionMatches(PyExc_LookupError)) { | 5008 if (PyErr_ExceptionMatches(PyExc_LookupError)) { |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5647 /* exponentially overallocate to minimize reallocations */ | 5045 /* exponentially overallocate to minimize reallocations */ |
| 5648 if (requiredsize < 2*outsize) | 5046 if (requiredsize < 2*outsize) |
| 5649 requiredsize = 2*outsize; | 5047 requiredsize = 2*outsize; |
| 5650 if (_PyBytes_Resize(outobj, requiredsize)) | 5048 if (_PyBytes_Resize(outobj, requiredsize)) |
| 5651 return -1; | 5049 return -1; |
| 5652 return 0; | 5050 return 0; |
| 5653 } | 5051 } |
| 5654 | 5052 |
| 5655 typedef enum charmapencode_result { | 5053 typedef enum charmapencode_result { |
| 5656 enc_SUCCESS, enc_FAILED, enc_EXCEPTION | 5054 enc_SUCCESS, enc_FAILED, enc_EXCEPTION |
| 5657 } charmapencode_result; | 5055 }charmapencode_result; |
| 5658 /* lookup the character, put the result in the output string and adjust | 5056 /* lookup the character, put the result in the output string and adjust |
| 5659 various state variables. Resize the output bytes object if not enough | 5057 various state variables. Resize the output bytes object if not enough |
| 5660 space is available. Return a new reference to the object that | 5058 space is available. Return a new reference to the object that |
| 5661 was put in the output buffer, or Py_None, if the mapping was undefined | 5059 was put in the output buffer, or Py_None, if the mapping was undefined |
| 5662 (in which case no character was written) or NULL, if a | 5060 (in which case no character was written) or NULL, if a |
| 5663 reallocation error occurred. The caller must decref the result */ | 5061 reallocation error occurred. The caller must decref the result */ |
| 5664 static charmapencode_result | 5062 static |
| 5665 charmapencode_output(Py_UNICODE c, PyObject *mapping, | 5063 charmapencode_result charmapencode_output(Py_UNICODE c, PyObject *mapping, |
| 5666 PyObject **outobj, Py_ssize_t *outpos) | 5064 PyObject **outobj, Py_ssize_t *outpos) |
| 5667 { | 5065 { |
| 5668 PyObject *rep; | 5066 PyObject *rep; |
| 5669 char *outstart; | 5067 char *outstart; |
| 5670 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); | 5068 Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj); |
| 5671 | 5069 |
| 5672 if (Py_TYPE(mapping) == &EncodingMapType) { | 5070 if (Py_TYPE(mapping) == &EncodingMapType) { |
| 5673 int res = encoding_map_lookup(c, mapping); | 5071 int res = encoding_map_lookup(c, mapping); |
| 5674 Py_ssize_t requiredsize = *outpos+1; | 5072 Py_ssize_t requiredsize = *outpos+1; |
| 5675 if (res == -1) | 5073 if (res == -1) |
| 5676 return enc_FAILED; | 5074 return enc_FAILED; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5712 memcpy(outstart + *outpos, repchars, repsize); | 5110 memcpy(outstart + *outpos, repchars, repsize); |
| 5713 *outpos += repsize; | 5111 *outpos += repsize; |
| 5714 } | 5112 } |
| 5715 } | 5113 } |
| 5716 Py_DECREF(rep); | 5114 Py_DECREF(rep); |
| 5717 return enc_SUCCESS; | 5115 return enc_SUCCESS; |
| 5718 } | 5116 } |
| 5719 | 5117 |
| 5720 /* handle an error in PyUnicode_EncodeCharmap | 5118 /* handle an error in PyUnicode_EncodeCharmap |
| 5721 Return 0 on success, -1 on error */ | 5119 Return 0 on success, -1 on error */ |
| 5722 static int | 5120 static |
| 5723 charmap_encoding_error( | 5121 int charmap_encoding_error( |
| 5724 const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, | 5122 const Py_UNICODE *p, Py_ssize_t size, Py_ssize_t *inpos, PyObject *mapping, |
| 5725 PyObject **exceptionObject, | 5123 PyObject **exceptionObject, |
| 5726 int *known_errorHandler, PyObject **errorHandler, const char *errors, | 5124 int *known_errorHandler, PyObject **errorHandler, const char *errors, |
| 5727 PyObject **res, Py_ssize_t *respos) | 5125 PyObject **res, Py_ssize_t *respos) |
| 5728 { | 5126 { |
| 5729 PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ | 5127 PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ |
| 5730 Py_ssize_t repsize; | 5128 Py_ssize_t repsize; |
| 5731 Py_ssize_t newpos; | 5129 Py_ssize_t newpos; |
| 5732 Py_UNICODE *uni2; | 5130 Py_UNICODE *uni2; |
| 5733 /* startpos for collecting unencodable chars */ | 5131 /* startpos for collecting unencodable chars */ |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5847 raise_encode_exception(exceptionObject, encoding, p, size, colls
tartpos, collendpos, reason); | 5245 raise_encode_exception(exceptionObject, encoding, p, size, colls
tartpos, collendpos, reason); |
| 5848 return -1; | 5246 return -1; |
| 5849 } | 5247 } |
| 5850 } | 5248 } |
| 5851 *inpos = newpos; | 5249 *inpos = newpos; |
| 5852 Py_DECREF(repunicode); | 5250 Py_DECREF(repunicode); |
| 5853 } | 5251 } |
| 5854 return 0; | 5252 return 0; |
| 5855 } | 5253 } |
| 5856 | 5254 |
| 5857 PyObject * | 5255 PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p, |
| 5858 PyUnicode_EncodeCharmap(const Py_UNICODE *p, | 5256 Py_ssize_t size, |
| 5859 Py_ssize_t size, | 5257 PyObject *mapping, |
| 5860 PyObject *mapping, | 5258 const char *errors) |
| 5861 const char *errors) | |
| 5862 { | 5259 { |
| 5863 /* output object */ | 5260 /* output object */ |
| 5864 PyObject *res = NULL; | 5261 PyObject *res = NULL; |
| 5865 /* current input position */ | 5262 /* current input position */ |
| 5866 Py_ssize_t inpos = 0; | 5263 Py_ssize_t inpos = 0; |
| 5867 /* current output position */ | 5264 /* current output position */ |
| 5868 Py_ssize_t respos = 0; | 5265 Py_ssize_t respos = 0; |
| 5869 PyObject *errorHandler = NULL; | 5266 PyObject *errorHandler = NULL; |
| 5870 PyObject *exc = NULL; | 5267 PyObject *exc = NULL; |
| 5871 /* the following variable is used for caching string comparisons | 5268 /* the following variable is used for caching string comparisons |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5912 Py_XDECREF(errorHandler); | 5309 Py_XDECREF(errorHandler); |
| 5913 return res; | 5310 return res; |
| 5914 | 5311 |
| 5915 onError: | 5312 onError: |
| 5916 Py_XDECREF(res); | 5313 Py_XDECREF(res); |
| 5917 Py_XDECREF(exc); | 5314 Py_XDECREF(exc); |
| 5918 Py_XDECREF(errorHandler); | 5315 Py_XDECREF(errorHandler); |
| 5919 return NULL; | 5316 return NULL; |
| 5920 } | 5317 } |
| 5921 | 5318 |
| 5922 PyObject * | 5319 PyObject *PyUnicode_AsCharmapString(PyObject *unicode, |
| 5923 PyUnicode_AsCharmapString(PyObject *unicode, | 5320 PyObject *mapping) |
| 5924 PyObject *mapping) | |
| 5925 { | 5321 { |
| 5926 if (!PyUnicode_Check(unicode) || mapping == NULL) { | 5322 if (!PyUnicode_Check(unicode) || mapping == NULL) { |
| 5927 PyErr_BadArgument(); | 5323 PyErr_BadArgument(); |
| 5928 return NULL; | 5324 return NULL; |
| 5929 } | 5325 } |
| 5930 return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode), | 5326 return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode), |
| 5931 PyUnicode_GET_SIZE(unicode), | 5327 PyUnicode_GET_SIZE(unicode), |
| 5932 mapping, | 5328 mapping, |
| 5933 NULL); | 5329 NULL); |
| 5934 } | 5330 } |
| 5935 | 5331 |
| 5936 /* create or adjust a UnicodeTranslateError */ | 5332 /* create or adjust a UnicodeTranslateError */ |
| 5937 static void | 5333 static void make_translate_exception(PyObject **exceptionObject, |
| 5938 make_translate_exception(PyObject **exceptionObject, | 5334 const Py_UNICODE *unicode, Py_ssize_t size, |
| 5939 const Py_UNICODE *unicode, Py_ssize_t size, | 5335 Py_ssize_t startpos, Py_ssize_t endpos, |
| 5940 Py_ssize_t startpos, Py_ssize_t endpos, | 5336 const char *reason) |
| 5941 const char *reason) | |
| 5942 { | 5337 { |
| 5943 if (*exceptionObject == NULL) { | 5338 if (*exceptionObject == NULL) { |
| 5944 *exceptionObject = PyUnicodeTranslateError_Create( | 5339 *exceptionObject = PyUnicodeTranslateError_Create( |
| 5945 unicode, size, startpos, endpos, reason); | 5340 unicode, size, startpos, endpos, reason); |
| 5946 } | 5341 } |
| 5947 else { | 5342 else { |
| 5948 if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos)) | 5343 if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos)) |
| 5949 goto onError; | 5344 goto onError; |
| 5950 if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos)) | 5345 if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos)) |
| 5951 goto onError; | 5346 goto onError; |
| 5952 if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason)) | 5347 if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason)) |
| 5953 goto onError; | 5348 goto onError; |
| 5954 return; | 5349 return; |
| 5955 onError: | 5350 onError: |
| 5956 Py_DECREF(*exceptionObject); | 5351 Py_DECREF(*exceptionObject); |
| 5957 *exceptionObject = NULL; | 5352 *exceptionObject = NULL; |
| 5958 } | 5353 } |
| 5959 } | 5354 } |
| 5960 | 5355 |
| 5961 /* raises a UnicodeTranslateError */ | 5356 /* raises a UnicodeTranslateError */ |
| 5962 static void | 5357 static void raise_translate_exception(PyObject **exceptionObject, |
| 5963 raise_translate_exception(PyObject **exceptionObject, | 5358 const Py_UNICODE *unicode, Py_ssize_t size
, |
| 5964 const Py_UNICODE *unicode, Py_ssize_t size, | 5359 Py_ssize_t startpos, Py_ssize_t endpos, |
| 5965 Py_ssize_t startpos, Py_ssize_t endpos, | 5360 const char *reason) |
| 5966 const char *reason) | |
| 5967 { | 5361 { |
| 5968 make_translate_exception(exceptionObject, | 5362 make_translate_exception(exceptionObject, |
| 5969 unicode, size, startpos, endpos, reason); | 5363 unicode, size, startpos, endpos, reason); |
| 5970 if (*exceptionObject != NULL) | 5364 if (*exceptionObject != NULL) |
| 5971 PyCodec_StrictErrors(*exceptionObject); | 5365 PyCodec_StrictErrors(*exceptionObject); |
| 5972 } | 5366 } |
| 5973 | 5367 |
| 5974 /* error handling callback helper: | 5368 /* error handling callback helper: |
| 5975 build arguments, call the callback and check the arguments, | 5369 build arguments, call the callback and check the arguments, |
| 5976 put the result into newpos and return the replacement string, which | 5370 put the result into newpos and return the replacement string, which |
| 5977 has to be freed by the caller */ | 5371 has to be freed by the caller */ |
| 5978 static PyObject * | 5372 static PyObject *unicode_translate_call_errorhandler(const char *errors, |
| 5979 unicode_translate_call_errorhandler(const char *errors, | 5373 PyObject **errorHandler, |
| 5980 PyObject **errorHandler, | 5374 const char *reason, |
| 5981 const char *reason, | 5375 const Py_UNICODE *unicode,
Py_ssize_t size, PyObject **exceptionObject, |
| 5982 const Py_UNICODE *unicode, Py_ssize_t size,
PyObject **exceptionObject, | 5376 Py_ssize_t startpos, Py_ssi
ze_t endpos, |
| 5983 Py_ssize_t startpos, Py_ssize_t endpos, | 5377 Py_ssize_t *newpos) |
| 5984 Py_ssize_t *newpos) | |
| 5985 { | 5378 { |
| 5986 static char *argparse = "O!n;translating error handler must return (str, int
) tuple"; | 5379 static char *argparse = "O!n;translating error handler must return (str, int
) tuple"; |
| 5987 | 5380 |
| 5988 Py_ssize_t i_newpos; | 5381 Py_ssize_t i_newpos; |
| 5989 PyObject *restuple; | 5382 PyObject *restuple; |
| 5990 PyObject *resunicode; | 5383 PyObject *resunicode; |
| 5991 | 5384 |
| 5992 if (*errorHandler == NULL) { | 5385 if (*errorHandler == NULL) { |
| 5993 *errorHandler = PyCodec_LookupError(errors); | 5386 *errorHandler = PyCodec_LookupError(errors); |
| 5994 if (*errorHandler == NULL) | 5387 if (*errorHandler == NULL) |
| (...skipping 29 matching lines...) Expand all Loading... |
| 6024 return NULL; | 5417 return NULL; |
| 6025 } | 5418 } |
| 6026 Py_INCREF(resunicode); | 5419 Py_INCREF(resunicode); |
| 6027 Py_DECREF(restuple); | 5420 Py_DECREF(restuple); |
| 6028 return resunicode; | 5421 return resunicode; |
| 6029 } | 5422 } |
| 6030 | 5423 |
| 6031 /* Lookup the character ch in the mapping and put the result in result, | 5424 /* Lookup the character ch in the mapping and put the result in result, |
| 6032 which must be decrefed by the caller. | 5425 which must be decrefed by the caller. |
| 6033 Return 0 on success, -1 on error */ | 5426 Return 0 on success, -1 on error */ |
| 6034 static int | 5427 static |
| 6035 charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) | 5428 int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result) |
| 6036 { | 5429 { |
| 6037 PyObject *w = PyLong_FromLong((long)c); | 5430 PyObject *w = PyLong_FromLong((long)c); |
| 6038 PyObject *x; | 5431 PyObject *x; |
| 6039 | 5432 |
| 6040 if (w == NULL) | 5433 if (w == NULL) |
| 6041 return -1; | 5434 return -1; |
| 6042 x = PyObject_GetItem(mapping, w); | 5435 x = PyObject_GetItem(mapping, w); |
| 6043 Py_DECREF(w); | 5436 Py_DECREF(w); |
| 6044 if (x == NULL) { | 5437 if (x == NULL) { |
| 6045 if (PyErr_ExceptionMatches(PyExc_LookupError)) { | 5438 if (PyErr_ExceptionMatches(PyExc_LookupError)) { |
| (...skipping 28 matching lines...) Expand all Loading... |
| 6074 /* wrong return value */ | 5467 /* wrong return value */ |
| 6075 PyErr_SetString(PyExc_TypeError, | 5468 PyErr_SetString(PyExc_TypeError, |
| 6076 "character mapping must return integer, None or str"); | 5469 "character mapping must return integer, None or str"); |
| 6077 Py_DECREF(x); | 5470 Py_DECREF(x); |
| 6078 return -1; | 5471 return -1; |
| 6079 } | 5472 } |
| 6080 } | 5473 } |
| 6081 /* ensure that *outobj is at least requiredsize characters long, | 5474 /* ensure that *outobj is at least requiredsize characters long, |
| 6082 if not reallocate and adjust various state variables. | 5475 if not reallocate and adjust various state variables. |
| 6083 Return 0 on success, -1 on error */ | 5476 Return 0 on success, -1 on error */ |
| 6084 static int | 5477 static |
| 6085 charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, | 5478 int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, |
| 6086 Py_ssize_t requiredsize) | 5479 Py_ssize_t requiredsize) |
| 6087 { | 5480 { |
| 6088 Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj); | 5481 Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj); |
| 6089 if (requiredsize > oldsize) { | 5482 if (requiredsize > oldsize) { |
| 6090 /* remember old output position */ | 5483 /* remember old output position */ |
| 6091 Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj); | 5484 Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj); |
| 6092 /* exponentially overallocate to minimize reallocations */ | 5485 /* exponentially overallocate to minimize reallocations */ |
| 6093 if (requiredsize < 2 * oldsize) | 5486 if (requiredsize < 2 * oldsize) |
| 6094 requiredsize = 2 * oldsize; | 5487 requiredsize = 2 * oldsize; |
| 6095 if (PyUnicode_Resize(outobj, requiredsize) < 0) | 5488 if (PyUnicode_Resize(outobj, requiredsize) < 0) |
| 6096 return -1; | 5489 return -1; |
| 6097 *outp = PyUnicode_AS_UNICODE(*outobj) + outpos; | 5490 *outp = PyUnicode_AS_UNICODE(*outobj) + outpos; |
| 6098 } | 5491 } |
| 6099 return 0; | 5492 return 0; |
| 6100 } | 5493 } |
| 6101 /* lookup the character, put the result in the output string and adjust | 5494 /* lookup the character, put the result in the output string and adjust |
| 6102 various state variables. Return a new reference to the object that | 5495 various state variables. Return a new reference to the object that |
| 6103 was put in the output buffer in *result, or Py_None, if the mapping was | 5496 was put in the output buffer in *result, or Py_None, if the mapping was |
| 6104 undefined (in which case no character was written). | 5497 undefined (in which case no character was written). |
| 6105 The called must decref result. | 5498 The called must decref result. |
| 6106 Return 0 on success, -1 on error. */ | 5499 Return 0 on success, -1 on error. */ |
| 6107 static int | 5500 static |
| 6108 charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp, | 5501 int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp
, |
| 6109 Py_ssize_t insize, PyObject *mapping, PyObject **outobj,
Py_UNICODE **outp, | 5502 Py_ssize_t insize, PyObject *mapping, PyObject **out
obj, Py_UNICODE **outp, |
| 6110 PyObject **res) | 5503 PyObject **res) |
| 6111 { | 5504 { |
| 6112 if (charmaptranslate_lookup(*curinp, mapping, res)) | 5505 if (charmaptranslate_lookup(*curinp, mapping, res)) |
| 6113 return -1; | 5506 return -1; |
| 6114 if (*res==NULL) { | 5507 if (*res==NULL) { |
| 6115 /* not found => default to 1:1 mapping */ | 5508 /* not found => default to 1:1 mapping */ |
| 6116 *(*outp)++ = *curinp; | 5509 *(*outp)++ = *curinp; |
| 6117 } | 5510 } |
| 6118 else if (*res==Py_None) | 5511 else if (*res==Py_None) |
| 6119 ; | 5512 ; |
| 6120 else if (PyLong_Check(*res)) { | 5513 else if (PyLong_Check(*res)) { |
| (...skipping 15 matching lines...) Expand all Loading... |
| 6136 return -1; | 5529 return -1; |
| 6137 memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize
); | 5530 memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize
); |
| 6138 *outp += repsize; | 5531 *outp += repsize; |
| 6139 } | 5532 } |
| 6140 } | 5533 } |
| 6141 else | 5534 else |
| 6142 return -1; | 5535 return -1; |
| 6143 return 0; | 5536 return 0; |
| 6144 } | 5537 } |
| 6145 | 5538 |
| 6146 PyObject * | 5539 PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p, |
| 6147 PyUnicode_TranslateCharmap(const Py_UNICODE *p, | 5540 Py_ssize_t size, |
| 6148 Py_ssize_t size, | 5541 PyObject *mapping, |
| 6149 PyObject *mapping, | 5542 const char *errors) |
| 6150 const char *errors) | |
| 6151 { | 5543 { |
| 6152 /* output object */ | 5544 /* output object */ |
| 6153 PyObject *res = NULL; | 5545 PyObject *res = NULL; |
| 6154 /* pointers to the beginning and end+1 of input */ | 5546 /* pointers to the beginning and end+1 of input */ |
| 6155 const Py_UNICODE *startp = p; | 5547 const Py_UNICODE *startp = p; |
| 6156 const Py_UNICODE *endp = p + size; | 5548 const Py_UNICODE *endp = p + size; |
| 6157 /* pointer into the output */ | 5549 /* pointer into the output */ |
| 6158 Py_UNICODE *str; | 5550 Py_UNICODE *str; |
| 6159 /* current output position */ | 5551 /* current output position */ |
| 6160 Py_ssize_t respos = 0; | 5552 Py_ssize_t respos = 0; |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6279 Py_XDECREF(errorHandler); | 5671 Py_XDECREF(errorHandler); |
| 6280 return res; | 5672 return res; |
| 6281 | 5673 |
| 6282 onError: | 5674 onError: |
| 6283 Py_XDECREF(res); | 5675 Py_XDECREF(res); |
| 6284 Py_XDECREF(exc); | 5676 Py_XDECREF(exc); |
| 6285 Py_XDECREF(errorHandler); | 5677 Py_XDECREF(errorHandler); |
| 6286 return NULL; | 5678 return NULL; |
| 6287 } | 5679 } |
| 6288 | 5680 |
| 6289 PyObject * | 5681 PyObject *PyUnicode_Translate(PyObject *str, |
| 6290 PyUnicode_Translate(PyObject *str, | 5682 PyObject *mapping, |
| 6291 PyObject *mapping, | 5683 const char *errors) |
| 6292 const char *errors) | |
| 6293 { | 5684 { |
| 6294 PyObject *result; | 5685 PyObject *result; |
| 6295 | 5686 |
| 6296 str = PyUnicode_FromObject(str); | 5687 str = PyUnicode_FromObject(str); |
| 6297 if (str == NULL) | 5688 if (str == NULL) |
| 6298 goto onError; | 5689 goto onError; |
| 6299 result = PyUnicode_TranslateCharmap(PyUnicode_AS_UNICODE(str), | 5690 result = PyUnicode_TranslateCharmap(PyUnicode_AS_UNICODE(str), |
| 6300 PyUnicode_GET_SIZE(str), | 5691 PyUnicode_GET_SIZE(str), |
| 6301 mapping, | 5692 mapping, |
| 6302 errors); | 5693 errors); |
| 6303 Py_DECREF(str); | 5694 Py_DECREF(str); |
| 6304 return result; | 5695 return result; |
| 6305 | 5696 |
| 6306 onError: | 5697 onError: |
| 6307 Py_XDECREF(str); | 5698 Py_XDECREF(str); |
| 6308 return NULL; | 5699 return NULL; |
| 6309 } | 5700 } |
| 6310 | 5701 |
| 6311 PyObject * | |
| 6312 PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, | |
| 6313 Py_ssize_t length) | |
| 6314 { | |
| 6315 PyObject *result; | |
| 6316 Py_UNICODE *p; /* write pointer into result */ | |
| 6317 Py_ssize_t i; | |
| 6318 /* Copy to a new string */ | |
| 6319 result = (PyObject *)_PyUnicode_New(length); | |
| 6320 Py_UNICODE_COPY(PyUnicode_AS_UNICODE(result), s, length); | |
| 6321 if (result == NULL) | |
| 6322 return result; | |
| 6323 p = PyUnicode_AS_UNICODE(result); | |
| 6324 /* Iterate over code points */ | |
| 6325 for (i = 0; i < length; i++) { | |
| 6326 Py_UNICODE ch =s[i]; | |
| 6327 if (ch > 127) { | |
| 6328 int decimal = Py_UNICODE_TODECIMAL(ch); | |
| 6329 if (decimal >= 0) | |
| 6330 p[i] = '0' + decimal; | |
| 6331 } | |
| 6332 } | |
| 6333 return result; | |
| 6334 } | |
| 6335 /* --- Decimal Encoder ---------------------------------------------------- */ | 5702 /* --- Decimal Encoder ---------------------------------------------------- */ |
| 6336 | 5703 |
| 6337 int | 5704 int PyUnicode_EncodeDecimal(Py_UNICODE *s, |
| 6338 PyUnicode_EncodeDecimal(Py_UNICODE *s, | 5705 Py_ssize_t length, |
| 6339 Py_ssize_t length, | 5706 char *output, |
| 6340 char *output, | 5707 const char *errors) |
| 6341 const char *errors) | |
| 6342 { | 5708 { |
| 6343 Py_UNICODE *p, *end; | 5709 Py_UNICODE *p, *end; |
| 6344 PyObject *errorHandler = NULL; | 5710 PyObject *errorHandler = NULL; |
| 6345 PyObject *exc = NULL; | 5711 PyObject *exc = NULL; |
| 6346 const char *encoding = "decimal"; | 5712 const char *encoding = "decimal"; |
| 6347 const char *reason = "invalid decimal Unicode string"; | 5713 const char *reason = "invalid decimal Unicode string"; |
| 6348 /* the following variable is used for caching string comparisons | 5714 /* the following variable is used for caching string comparisons |
| 6349 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharre
freplace */ | 5715 * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharre
freplace */ |
| 6350 int known_errorHandler = -1; | 5716 int known_errorHandler = -1; |
| 6351 | 5717 |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6467 onError: | 5833 onError: |
| 6468 Py_XDECREF(exc); | 5834 Py_XDECREF(exc); |
| 6469 Py_XDECREF(errorHandler); | 5835 Py_XDECREF(errorHandler); |
| 6470 return -1; | 5836 return -1; |
| 6471 } | 5837 } |
| 6472 | 5838 |
| 6473 /* --- Helpers ------------------------------------------------------------ */ | 5839 /* --- Helpers ------------------------------------------------------------ */ |
| 6474 | 5840 |
| 6475 #include "stringlib/unicodedefs.h" | 5841 #include "stringlib/unicodedefs.h" |
| 6476 #include "stringlib/fastsearch.h" | 5842 #include "stringlib/fastsearch.h" |
| 6477 | |
| 6478 #include "stringlib/count.h" | 5843 #include "stringlib/count.h" |
| 5844 /* Include _ParseTupleFinds from find.h */ |
| 5845 #define FROM_UNICODE |
| 6479 #include "stringlib/find.h" | 5846 #include "stringlib/find.h" |
| 6480 #include "stringlib/partition.h" | 5847 #include "stringlib/partition.h" |
| 6481 #include "stringlib/split.h" | |
| 6482 | 5848 |
| 6483 #define _Py_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping | 5849 #define _Py_InsertThousandsGrouping _PyUnicode_InsertThousandsGrouping |
| 6484 #define _Py_InsertThousandsGroupingLocale _PyUnicode_InsertThousandsGroupingLoca
le | 5850 #define _Py_InsertThousandsGroupingLocale _PyUnicode_InsertThousandsGroupingLoca
le |
| 6485 #include "stringlib/localeutil.h" | 5851 #include "stringlib/localeutil.h" |
| 6486 | 5852 |
| 6487 /* helper macro to fixup start/end slice values */ | 5853 /* helper macro to fixup start/end slice values */ |
| 6488 #define ADJUST_INDICES(start, end, len) \ | 5854 #define FIX_START_END(obj) \ |
| 6489 if (end > len) \ | 5855 if (start < 0) \ |
| 6490 end = len; \ | 5856 start += (obj)->length; \ |
| 6491 else if (end < 0) { \ | 5857 if (start < 0) \ |
| 6492 end += len; \ | 5858 start = 0; \ |
| 6493 if (end < 0) \ | 5859 if (end > (obj)->length) \ |
| 6494 end = 0; \ | 5860 end = (obj)->length; \ |
| 6495 } \ | 5861 if (end < 0) \ |
| 6496 if (start < 0) { \ | 5862 end += (obj)->length; \ |
| 6497 start += len; \ | 5863 if (end < 0) \ |
| 6498 if (start < 0) \ | 5864 end = 0; |
| 6499 start = 0; \ | 5865 |
| 6500 } | 5866 Py_ssize_t PyUnicode_Count(PyObject *str, |
| 6501 | 5867 PyObject *substr, |
| 6502 Py_ssize_t | 5868 Py_ssize_t start, |
| 6503 PyUnicode_Count(PyObject *str, | 5869 Py_ssize_t end) |
| 6504 PyObject *substr, | |
| 6505 Py_ssize_t start, | |
| 6506 Py_ssize_t end) | |
| 6507 { | 5870 { |
| 6508 Py_ssize_t result; | 5871 Py_ssize_t result; |
| 6509 PyUnicodeObject* str_obj; | 5872 PyUnicodeObject* str_obj; |
| 6510 PyUnicodeObject* sub_obj; | 5873 PyUnicodeObject* sub_obj; |
| 6511 | 5874 |
| 6512 str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str); | 5875 str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str); |
| 6513 if (!str_obj) | 5876 if (!str_obj) |
| 6514 return -1; | 5877 return -1; |
| 6515 sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr); | 5878 sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr); |
| 6516 if (!sub_obj) { | 5879 if (!sub_obj) { |
| 6517 Py_DECREF(str_obj); | 5880 Py_DECREF(str_obj); |
| 6518 return -1; | 5881 return -1; |
| 6519 } | 5882 } |
| 6520 | 5883 |
| 6521 ADJUST_INDICES(start, end, str_obj->length); | 5884 FIX_START_END(str_obj); |
| 5885 |
| 6522 result = stringlib_count( | 5886 result = stringlib_count( |
| 6523 str_obj->str + start, end - start, sub_obj->str, sub_obj->length, | 5887 str_obj->str + start, end - start, sub_obj->str, sub_obj->length |
| 6524 PY_SSIZE_T_MAX | |
| 6525 ); | 5888 ); |
| 6526 | 5889 |
| 6527 Py_DECREF(sub_obj); | 5890 Py_DECREF(sub_obj); |
| 6528 Py_DECREF(str_obj); | 5891 Py_DECREF(str_obj); |
| 6529 | 5892 |
| 6530 return result; | 5893 return result; |
| 6531 } | 5894 } |
| 6532 | 5895 |
| 6533 Py_ssize_t | 5896 Py_ssize_t PyUnicode_Find(PyObject *str, |
| 6534 PyUnicode_Find(PyObject *str, | 5897 PyObject *sub, |
| 6535 PyObject *sub, | 5898 Py_ssize_t start, |
| 6536 Py_ssize_t start, | 5899 Py_ssize_t end, |
| 6537 Py_ssize_t end, | 5900 int direction) |
| 6538 int direction) | |
| 6539 { | 5901 { |
| 6540 Py_ssize_t result; | 5902 Py_ssize_t result; |
| 6541 | 5903 |
| 6542 str = PyUnicode_FromObject(str); | 5904 str = PyUnicode_FromObject(str); |
| 6543 if (!str) | 5905 if (!str) |
| 6544 return -2; | 5906 return -2; |
| 6545 sub = PyUnicode_FromObject(sub); | 5907 sub = PyUnicode_FromObject(sub); |
| 6546 if (!sub) { | 5908 if (!sub) { |
| 6547 Py_DECREF(str); | 5909 Py_DECREF(str); |
| 6548 return -2; | 5910 return -2; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 6560 PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub), | 5922 PyUnicode_AS_UNICODE(sub), PyUnicode_GET_SIZE(sub), |
| 6561 start, end | 5923 start, end |
| 6562 ); | 5924 ); |
| 6563 | 5925 |
| 6564 Py_DECREF(str); | 5926 Py_DECREF(str); |
| 6565 Py_DECREF(sub); | 5927 Py_DECREF(sub); |
| 6566 | 5928 |
| 6567 return result; | 5929 return result; |
| 6568 } | 5930 } |
| 6569 | 5931 |
| 6570 static int | 5932 static |
| 6571 tailmatch(PyUnicodeObject *self, | 5933 int tailmatch(PyUnicodeObject *self, |
| 6572 PyUnicodeObject *substring, | 5934 PyUnicodeObject *substring, |
| 6573 Py_ssize_t start, | 5935 Py_ssize_t start, |
| 6574 Py_ssize_t end, | 5936 Py_ssize_t end, |
| 6575 int direction) | 5937 int direction) |
| 6576 { | 5938 { |
| 6577 if (substring->length == 0) | 5939 if (substring->length == 0) |
| 6578 return 1; | 5940 return 1; |
| 6579 | 5941 |
| 6580 ADJUST_INDICES(start, end, self->length); | 5942 FIX_START_END(self); |
| 5943 |
| 6581 end -= substring->length; | 5944 end -= substring->length; |
| 6582 if (end < start) | 5945 if (end < start) |
| 6583 return 0; | 5946 return 0; |
| 6584 | 5947 |
| 6585 if (direction > 0) { | 5948 if (direction > 0) { |
| 6586 if (Py_UNICODE_MATCH(self, end, substring)) | 5949 if (Py_UNICODE_MATCH(self, end, substring)) |
| 6587 return 1; | 5950 return 1; |
| 6588 } else { | 5951 } else { |
| 6589 if (Py_UNICODE_MATCH(self, start, substring)) | 5952 if (Py_UNICODE_MATCH(self, start, substring)) |
| 6590 return 1; | 5953 return 1; |
| 6591 } | 5954 } |
| 6592 | 5955 |
| 6593 return 0; | 5956 return 0; |
| 6594 } | 5957 } |
| 6595 | 5958 |
| 6596 Py_ssize_t | 5959 Py_ssize_t PyUnicode_Tailmatch(PyObject *str, |
| 6597 PyUnicode_Tailmatch(PyObject *str, | 5960 PyObject *substr, |
| 6598 PyObject *substr, | 5961 Py_ssize_t start, |
| 6599 Py_ssize_t start, | 5962 Py_ssize_t end, |
| 6600 Py_ssize_t end, | 5963 int direction) |
| 6601 int direction) | |
| 6602 { | 5964 { |
| 6603 Py_ssize_t result; | 5965 Py_ssize_t result; |
| 6604 | 5966 |
| 6605 str = PyUnicode_FromObject(str); | 5967 str = PyUnicode_FromObject(str); |
| 6606 if (str == NULL) | 5968 if (str == NULL) |
| 6607 return -1; | 5969 return -1; |
| 6608 substr = PyUnicode_FromObject(substr); | 5970 substr = PyUnicode_FromObject(substr); |
| 6609 if (substr == NULL) { | 5971 if (substr == NULL) { |
| 6610 Py_DECREF(str); | 5972 Py_DECREF(str); |
| 6611 return -1; | 5973 return -1; |
| 6612 } | 5974 } |
| 6613 | 5975 |
| 6614 result = tailmatch((PyUnicodeObject *)str, | 5976 result = tailmatch((PyUnicodeObject *)str, |
| 6615 (PyUnicodeObject *)substr, | 5977 (PyUnicodeObject *)substr, |
| 6616 start, end, direction); | 5978 start, end, direction); |
| 6617 Py_DECREF(str); | 5979 Py_DECREF(str); |
| 6618 Py_DECREF(substr); | 5980 Py_DECREF(substr); |
| 6619 return result; | 5981 return result; |
| 6620 } | 5982 } |
| 6621 | 5983 |
| 6622 /* Apply fixfct filter to the Unicode object self and return a | 5984 /* Apply fixfct filter to the Unicode object self and return a |
| 6623 reference to the modified object */ | 5985 reference to the modified object */ |
| 6624 | 5986 |
| 6625 static PyObject * | 5987 static |
| 6626 fixup(PyUnicodeObject *self, | 5988 PyObject *fixup(PyUnicodeObject *self, |
| 6627 int (*fixfct)(PyUnicodeObject *s)) | 5989 int (*fixfct)(PyUnicodeObject *s)) |
| 6628 { | 5990 { |
| 6629 | 5991 |
| 6630 PyUnicodeObject *u; | 5992 PyUnicodeObject *u; |
| 6631 | 5993 |
| 6632 u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); | 5994 u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); |
| 6633 if (u == NULL) | 5995 if (u == NULL) |
| 6634 return NULL; | 5996 return NULL; |
| 6635 | 5997 |
| 6636 Py_UNICODE_COPY(u->str, self->str, self->length); | 5998 Py_UNICODE_COPY(u->str, self->str, self->length); |
| 6637 | 5999 |
| 6638 if (!fixfct(u) && PyUnicode_CheckExact(self)) { | 6000 if (!fixfct(u) && PyUnicode_CheckExact(self)) { |
| 6639 /* fixfct should return TRUE if it modified the buffer. If | 6001 /* fixfct should return TRUE if it modified the buffer. If |
| 6640 FALSE, return a reference to the original buffer instead | 6002 FALSE, return a reference to the original buffer instead |
| 6641 (to save space, not time) */ | 6003 (to save space, not time) */ |
| 6642 Py_INCREF(self); | 6004 Py_INCREF(self); |
| 6643 Py_DECREF(u); | 6005 Py_DECREF(u); |
| 6644 return (PyObject*) self; | 6006 return (PyObject*) self; |
| 6645 } | 6007 } |
| 6646 return (PyObject*) u; | 6008 return (PyObject*) u; |
| 6647 } | 6009 } |
| 6648 | 6010 |
| 6649 static int | 6011 static |
| 6650 fixupper(PyUnicodeObject *self) | 6012 int fixupper(PyUnicodeObject *self) |
| 6651 { | 6013 { |
| 6652 Py_ssize_t len = self->length; | 6014 Py_ssize_t len = self->length; |
| 6653 Py_UNICODE *s = self->str; | 6015 Py_UNICODE *s = self->str; |
| 6654 int status = 0; | 6016 int status = 0; |
| 6655 | 6017 |
| 6656 while (len-- > 0) { | 6018 while (len-- > 0) { |
| 6657 register Py_UNICODE ch; | 6019 register Py_UNICODE ch; |
| 6658 | 6020 |
| 6659 ch = Py_UNICODE_TOUPPER(*s); | 6021 ch = Py_UNICODE_TOUPPER(*s); |
| 6660 if (ch != *s) { | 6022 if (ch != *s) { |
| 6661 status = 1; | 6023 status = 1; |
| 6662 *s = ch; | 6024 *s = ch; |
| 6663 } | 6025 } |
| 6664 s++; | 6026 s++; |
| 6665 } | 6027 } |
| 6666 | 6028 |
| 6667 return status; | 6029 return status; |
| 6668 } | 6030 } |
| 6669 | 6031 |
| 6670 static int | 6032 static |
| 6671 fixlower(PyUnicodeObject *self) | 6033 int fixlower(PyUnicodeObject *self) |
| 6672 { | 6034 { |
| 6673 Py_ssize_t len = self->length; | 6035 Py_ssize_t len = self->length; |
| 6674 Py_UNICODE *s = self->str; | 6036 Py_UNICODE *s = self->str; |
| 6675 int status = 0; | 6037 int status = 0; |
| 6676 | 6038 |
| 6677 while (len-- > 0) { | 6039 while (len-- > 0) { |
| 6678 register Py_UNICODE ch; | 6040 register Py_UNICODE ch; |
| 6679 | 6041 |
| 6680 ch = Py_UNICODE_TOLOWER(*s); | 6042 ch = Py_UNICODE_TOLOWER(*s); |
| 6681 if (ch != *s) { | 6043 if (ch != *s) { |
| 6682 status = 1; | 6044 status = 1; |
| 6683 *s = ch; | 6045 *s = ch; |
| 6684 } | 6046 } |
| 6685 s++; | 6047 s++; |
| 6686 } | 6048 } |
| 6687 | 6049 |
| 6688 return status; | 6050 return status; |
| 6689 } | 6051 } |
| 6690 | 6052 |
| 6691 static int | 6053 static |
| 6692 fixswapcase(PyUnicodeObject *self) | 6054 int fixswapcase(PyUnicodeObject *self) |
| 6693 { | 6055 { |
| 6694 Py_ssize_t len = self->length; | 6056 Py_ssize_t len = self->length; |
| 6695 Py_UNICODE *s = self->str; | 6057 Py_UNICODE *s = self->str; |
| 6696 int status = 0; | 6058 int status = 0; |
| 6697 | 6059 |
| 6698 while (len-- > 0) { | 6060 while (len-- > 0) { |
| 6699 if (Py_UNICODE_ISUPPER(*s)) { | 6061 if (Py_UNICODE_ISUPPER(*s)) { |
| 6700 *s = Py_UNICODE_TOLOWER(*s); | 6062 *s = Py_UNICODE_TOLOWER(*s); |
| 6701 status = 1; | 6063 status = 1; |
| 6702 } else if (Py_UNICODE_ISLOWER(*s)) { | 6064 } else if (Py_UNICODE_ISLOWER(*s)) { |
| 6703 *s = Py_UNICODE_TOUPPER(*s); | 6065 *s = Py_UNICODE_TOUPPER(*s); |
| 6704 status = 1; | 6066 status = 1; |
| 6705 } | 6067 } |
| 6706 s++; | 6068 s++; |
| 6707 } | 6069 } |
| 6708 | 6070 |
| 6709 return status; | 6071 return status; |
| 6710 } | 6072 } |
| 6711 | 6073 |
| 6712 static int | 6074 static |
| 6713 fixcapitalize(PyUnicodeObject *self) | 6075 int fixcapitalize(PyUnicodeObject *self) |
| 6714 { | 6076 { |
| 6715 Py_ssize_t len = self->length; | 6077 Py_ssize_t len = self->length; |
| 6716 Py_UNICODE *s = self->str; | 6078 Py_UNICODE *s = self->str; |
| 6717 int status = 0; | 6079 int status = 0; |
| 6718 | 6080 |
| 6719 if (len == 0) | 6081 if (len == 0) |
| 6720 return 0; | 6082 return 0; |
| 6721 if (Py_UNICODE_ISLOWER(*s)) { | 6083 if (Py_UNICODE_ISLOWER(*s)) { |
| 6722 *s = Py_UNICODE_TOUPPER(*s); | 6084 *s = Py_UNICODE_TOUPPER(*s); |
| 6723 status = 1; | 6085 status = 1; |
| 6724 } | 6086 } |
| 6725 s++; | 6087 s++; |
| 6726 while (--len > 0) { | 6088 while (--len > 0) { |
| 6727 if (Py_UNICODE_ISUPPER(*s)) { | 6089 if (Py_UNICODE_ISUPPER(*s)) { |
| 6728 *s = Py_UNICODE_TOLOWER(*s); | 6090 *s = Py_UNICODE_TOLOWER(*s); |
| 6729 status = 1; | 6091 status = 1; |
| 6730 } | 6092 } |
| 6731 s++; | 6093 s++; |
| 6732 } | 6094 } |
| 6733 return status; | 6095 return status; |
| 6734 } | 6096 } |
| 6735 | 6097 |
| 6736 static int | 6098 static |
| 6737 fixtitle(PyUnicodeObject *self) | 6099 int fixtitle(PyUnicodeObject *self) |
| 6738 { | 6100 { |
| 6739 register Py_UNICODE *p = PyUnicode_AS_UNICODE(self); | 6101 register Py_UNICODE *p = PyUnicode_AS_UNICODE(self); |
| 6740 register Py_UNICODE *e; | 6102 register Py_UNICODE *e; |
| 6741 int previous_is_cased; | 6103 int previous_is_cased; |
| 6742 | 6104 |
| 6743 /* Shortcut for single character strings */ | 6105 /* Shortcut for single character strings */ |
| 6744 if (PyUnicode_GET_SIZE(self) == 1) { | 6106 if (PyUnicode_GET_SIZE(self) == 1) { |
| 6745 Py_UNICODE ch = Py_UNICODE_TOTITLE(*p); | 6107 Py_UNICODE ch = Py_UNICODE_TOTITLE(*p); |
| 6746 if (*p != ch) { | 6108 if (*p != ch) { |
| 6747 *p = ch; | 6109 *p = ch; |
| (...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6877 Done: | 6239 Done: |
| 6878 Py_DECREF(fseq); | 6240 Py_DECREF(fseq); |
| 6879 return (PyObject *)res; | 6241 return (PyObject *)res; |
| 6880 | 6242 |
| 6881 onError: | 6243 onError: |
| 6882 Py_DECREF(fseq); | 6244 Py_DECREF(fseq); |
| 6883 Py_XDECREF(res); | 6245 Py_XDECREF(res); |
| 6884 return NULL; | 6246 return NULL; |
| 6885 } | 6247 } |
| 6886 | 6248 |
| 6887 static PyUnicodeObject * | 6249 static |
| 6888 pad(PyUnicodeObject *self, | 6250 PyUnicodeObject *pad(PyUnicodeObject *self, |
| 6889 Py_ssize_t left, | 6251 Py_ssize_t left, |
| 6890 Py_ssize_t right, | 6252 Py_ssize_t right, |
| 6891 Py_UNICODE fill) | 6253 Py_UNICODE fill) |
| 6892 { | 6254 { |
| 6893 PyUnicodeObject *u; | 6255 PyUnicodeObject *u; |
| 6894 | 6256 |
| 6895 if (left < 0) | 6257 if (left < 0) |
| 6896 left = 0; | 6258 left = 0; |
| 6897 if (right < 0) | 6259 if (right < 0) |
| 6898 right = 0; | 6260 right = 0; |
| 6899 | 6261 |
| 6900 if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) { | 6262 if (left == 0 && right == 0 && PyUnicode_CheckExact(self)) { |
| 6901 Py_INCREF(self); | 6263 Py_INCREF(self); |
| (...skipping 10 matching lines...) |