Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(168206)

Delta Between Two Patch Sets: Objects/unicodeobject.c

Issue 28128: Improve the warning message for invalid escape sequences
Left Patch Set: Created 3 years, 2 months ago
Right Patch Set: Created 3 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Objects/bytesobject.c ('k') | Python/ast.c » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 2
3 Unicode implementation based on original code by Fredrik Lundh, 3 Unicode implementation based on original code by Fredrik Lundh,
4 modified by Marc-Andre Lemburg <mal@lemburg.com>. 4 modified by Marc-Andre Lemburg <mal@lemburg.com>.
5 5
6 Major speed upgrades to the method implementations at the Reykjavik 6 Major speed upgrades to the method implementations at the Reykjavik
7 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. 7 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8 8
9 Copyright (c) Corporation for National Research Initiatives. 9 Copyright (c) Corporation for National Research Initiatives.
10 10
(...skipping 1531 matching lines...) Expand 10 before | Expand all | Expand 10 after
1542 if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) { 1542 if (!PyUnicode_Check(from) || !PyUnicode_Check(to)) {
1543 PyErr_BadInternalCall(); 1543 PyErr_BadInternalCall();
1544 return -1; 1544 return -1;
1545 } 1545 }
1546 1546
1547 if (PyUnicode_READY(from) == -1) 1547 if (PyUnicode_READY(from) == -1)
1548 return -1; 1548 return -1;
1549 if (PyUnicode_READY(to) == -1) 1549 if (PyUnicode_READY(to) == -1)
1550 return -1; 1550 return -1;
1551 1551
1552 if (from_start < 0) { 1552 if ((size_t)from_start > (size_t)PyUnicode_GET_LENGTH(from)) {
1553 PyErr_SetString(PyExc_IndexError, "string index out of range"); 1553 PyErr_SetString(PyExc_IndexError, "string index out of range");
1554 return -1; 1554 return -1;
1555 } 1555 }
1556 if (to_start < 0) { 1556 if ((size_t)to_start > (size_t)PyUnicode_GET_LENGTH(to)) {
1557 PyErr_SetString(PyExc_IndexError, "string index out of range"); 1557 PyErr_SetString(PyExc_IndexError, "string index out of range");
1558 return -1; 1558 return -1;
1559 } 1559 }
1560 how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); 1560 if (how_many < 0) {
1561 PyErr_SetString(PyExc_SystemError, "how_many cannot be negative");
1562 return -1;
1563 }
1564 how_many = Py_MIN(PyUnicode_GET_LENGTH(from)-from_start, how_many);
1561 if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { 1565 if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
1562 PyErr_Format(PyExc_SystemError, 1566 PyErr_Format(PyExc_SystemError,
1563 "Cannot write %zi characters at %zi " 1567 "Cannot write %zi characters at %zi "
1564 "in a string of %zi characters", 1568 "in a string of %zi characters",
1565 how_many, to_start, PyUnicode_GET_LENGTH(to)); 1569 how_many, to_start, PyUnicode_GET_LENGTH(to));
1566 return -1; 1570 return -1;
1567 } 1571 }
1568 1572
1569 if (how_many == 0) 1573 if (how_many == 0)
1570 return 0; 1574 return 0;
(...skipping 1296 matching lines...) Expand 10 before | Expand all | Expand 10 after
2867 PyUnicode_FromFormatV(const char *format, va_list vargs) 2871 PyUnicode_FromFormatV(const char *format, va_list vargs)
2868 { 2872 {
2869 va_list vargs2; 2873 va_list vargs2;
2870 const char *f; 2874 const char *f;
2871 _PyUnicodeWriter writer; 2875 _PyUnicodeWriter writer;
2872 2876
2873 _PyUnicodeWriter_Init(&writer); 2877 _PyUnicodeWriter_Init(&writer);
2874 writer.min_length = strlen(format) + 100; 2878 writer.min_length = strlen(format) + 100;
2875 writer.overallocate = 1; 2879 writer.overallocate = 1;
2876 2880
2877 /* va_list may be an array (of 1 item) on some platforms (ex: AMD64). 2881 // Copy varags to be able to pass a reference to a subfunction.
2878 Copy it to be able to pass a reference to a subfunction. */ 2882 va_copy(vargs2, vargs);
2879 Py_VA_COPY(vargs2, vargs);
2880 2883
2881 for (f = format; *f; ) { 2884 for (f = format; *f; ) {
2882 if (*f == '%') { 2885 if (*f == '%') {
2883 f = unicode_fromformat_arg(&writer, f, &vargs2); 2886 f = unicode_fromformat_arg(&writer, f, &vargs2);
2884 if (f == NULL) 2887 if (f == NULL)
2885 goto fail; 2888 goto fail;
2886 } 2889 }
2887 else { 2890 else {
2888 const char *p; 2891 const char *p;
2889 Py_ssize_t len; 2892 Py_ssize_t len;
2890 2893
2891 p = f; 2894 p = f;
2892 do 2895 do
2893 { 2896 {
2894 if ((unsigned char)*p > 127) { 2897 if ((unsigned char)*p > 127) {
2895 PyErr_Format(PyExc_ValueError, 2898 PyErr_Format(PyExc_ValueError,
2896 "PyUnicode_FromFormatV() expects an ASCII-encoded format " 2899 "PyUnicode_FromFormatV() expects an ASCII-encoded format "
2897 "string, got a non-ASCII byte: 0x%02x", 2900 "string, got a non-ASCII byte: 0x%02x",
2898 (unsigned char)*p); 2901 (unsigned char)*p);
2899 return NULL; 2902 goto fail;
2900 } 2903 }
2901 p++; 2904 p++;
2902 } 2905 }
2903 while (*p != '\0' && *p != '%'); 2906 while (*p != '\0' && *p != '%');
2904 len = p - f; 2907 len = p - f;
2905 2908
2906 if (*p == '\0') 2909 if (*p == '\0')
2907 writer.overallocate = 0; 2910 writer.overallocate = 0;
2908 2911
2909 if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0) 2912 if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
2910 goto fail; 2913 goto fail;
2911 2914
2912 f = p; 2915 f = p;
2913 } 2916 }
2914 } 2917 }
2918 va_end(vargs2);
2915 return _PyUnicodeWriter_Finish(&writer); 2919 return _PyUnicodeWriter_Finish(&writer);
2916 2920
2917 fail: 2921 fail:
2922 va_end(vargs2);
2918 _PyUnicodeWriter_Dealloc(&writer); 2923 _PyUnicodeWriter_Dealloc(&writer);
2919 return NULL; 2924 return NULL;
2920 } 2925 }
2921 2926
2922 PyObject * 2927 PyObject *
2923 PyUnicode_FromFormat(const char *format, ...) 2928 PyUnicode_FromFormat(const char *format, ...)
2924 { 2929 {
2925 PyObject* ret; 2930 PyObject* ret;
2926 va_list vargs; 2931 va_list vargs;
2927 2932
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
3088 if (buffer.len == 0) { 3093 if (buffer.len == 0) {
3089 PyBuffer_Release(&buffer); 3094 PyBuffer_Release(&buffer);
3090 _Py_RETURN_UNICODE_EMPTY(); 3095 _Py_RETURN_UNICODE_EMPTY();
3091 } 3096 }
3092 3097
3093 v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors); 3098 v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
3094 PyBuffer_Release(&buffer); 3099 PyBuffer_Release(&buffer);
3095 return v; 3100 return v;
3096 } 3101 }
3097 3102
3098 /* Normalize an encoding name: C implementation of 3103 /* Normalize an encoding name: similar to encodings.normalize_encoding(), but
3099 encodings.normalize_encoding(). Return 1 on success, or 0 on error (encoding 3104 also convert to lowercase. Return 1 on success, or 0 on error (encoding is
3100 is longer than lower_len-1). */ 3105 longer than lower_len-1). */
3101 int 3106 int
3102 _Py_normalize_encoding(const char *encoding, 3107 _Py_normalize_encoding(const char *encoding,
3103 char *lower, 3108 char *lower,
3104 size_t lower_len) 3109 size_t lower_len)
3105 { 3110 {
3106 const char *e; 3111 const char *e;
3107 char *l; 3112 char *l;
3108 char *l_end; 3113 char *l_end;
3109 int punct; 3114 int punct;
3110 3115
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
3207 if (buffer == NULL) 3212 if (buffer == NULL)
3208 goto onError; 3213 goto onError;
3209 unicode = _PyCodec_DecodeText(buffer, encoding, errors); 3214 unicode = _PyCodec_DecodeText(buffer, encoding, errors);
3210 if (unicode == NULL) 3215 if (unicode == NULL)
3211 goto onError; 3216 goto onError;
3212 if (!PyUnicode_Check(unicode)) { 3217 if (!PyUnicode_Check(unicode)) {
3213 PyErr_Format(PyExc_TypeError, 3218 PyErr_Format(PyExc_TypeError,
3214 "'%.400s' decoder returned '%.400s' instead of 'str'; " 3219 "'%.400s' decoder returned '%.400s' instead of 'str'; "
3215 "use codecs.decode() to decode to arbitrary types", 3220 "use codecs.decode() to decode to arbitrary types",
3216 encoding, 3221 encoding,
3217 Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); 3222 Py_TYPE(unicode)->tp_name);
3218 Py_DECREF(unicode); 3223 Py_DECREF(unicode);
3219 goto onError; 3224 goto onError;
3220 } 3225 }
3221 Py_DECREF(buffer); 3226 Py_DECREF(buffer);
3222 return unicode_result(unicode); 3227 return unicode_result(unicode);
3223 3228
3224 onError: 3229 onError:
3225 Py_XDECREF(buffer); 3230 Py_XDECREF(buffer);
3226 return NULL; 3231 return NULL;
3227 } 3232 }
3228 3233
3229 PyObject * 3234 PyObject *
3230 PyUnicode_AsDecodedObject(PyObject *unicode, 3235 PyUnicode_AsDecodedObject(PyObject *unicode,
3231 const char *encoding, 3236 const char *encoding,
3232 const char *errors) 3237 const char *errors)
3233 { 3238 {
3234 PyObject *v;
3235
3236 if (!PyUnicode_Check(unicode)) { 3239 if (!PyUnicode_Check(unicode)) {
3237 PyErr_BadArgument(); 3240 PyErr_BadArgument();
3238 goto onError; 3241 return NULL;
3239 } 3242 }
3243
3244 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3245 "PyUnicode_AsDecodedObject() is deprecated; "
3246 "use PyCodec_Decode() to decode from str", 1) < 0)
3247 return NULL;
3240 3248
3241 if (encoding == NULL) 3249 if (encoding == NULL)
3242 encoding = PyUnicode_GetDefaultEncoding(); 3250 encoding = PyUnicode_GetDefaultEncoding();
3243 3251
3244 /* Decode via the codec registry */ 3252 /* Decode via the codec registry */
3245 v = PyCodec_Decode(unicode, encoding, errors); 3253 return PyCodec_Decode(unicode, encoding, errors);
3246 if (v == NULL)
3247 goto onError;
3248 return unicode_result(v);
3249
3250 onError:
3251 return NULL;
3252 } 3254 }
3253 3255
3254 PyObject * 3256 PyObject *
3255 PyUnicode_AsDecodedUnicode(PyObject *unicode, 3257 PyUnicode_AsDecodedUnicode(PyObject *unicode,
3256 const char *encoding, 3258 const char *encoding,
3257 const char *errors) 3259 const char *errors)
3258 { 3260 {
3259 PyObject *v; 3261 PyObject *v;
3260 3262
3261 if (!PyUnicode_Check(unicode)) { 3263 if (!PyUnicode_Check(unicode)) {
3262 PyErr_BadArgument(); 3264 PyErr_BadArgument();
3263 goto onError; 3265 goto onError;
3264 } 3266 }
3267
3268 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3269 "PyUnicode_AsDecodedUnicode() is deprecated; "
3270 "use PyCodec_Decode() to decode from str to str", 1) < 0)
3271 return NULL;
3265 3272
3266 if (encoding == NULL) 3273 if (encoding == NULL)
3267 encoding = PyUnicode_GetDefaultEncoding(); 3274 encoding = PyUnicode_GetDefaultEncoding();
3268 3275
3269 /* Decode via the codec registry */ 3276 /* Decode via the codec registry */
3270 v = PyCodec_Decode(unicode, encoding, errors); 3277 v = PyCodec_Decode(unicode, encoding, errors);
3271 if (v == NULL) 3278 if (v == NULL)
3272 goto onError; 3279 goto onError;
3273 if (!PyUnicode_Check(v)) { 3280 if (!PyUnicode_Check(v)) {
3274 PyErr_Format(PyExc_TypeError, 3281 PyErr_Format(PyExc_TypeError,
3275 "'%.400s' decoder returned '%.400s' instead of 'str'; " 3282 "'%.400s' decoder returned '%.400s' instead of 'str'; "
3276 "use codecs.decode() to decode to arbitrary types", 3283 "use codecs.decode() to decode to arbitrary types",
3277 encoding, 3284 encoding,
3278 Py_TYPE(unicode)->tp_name, Py_TYPE(unicode)->tp_name); 3285 Py_TYPE(unicode)->tp_name);
3279 Py_DECREF(v); 3286 Py_DECREF(v);
3280 goto onError; 3287 goto onError;
3281 } 3288 }
3282 return unicode_result(v); 3289 return unicode_result(v);
3283 3290
3284 onError: 3291 onError:
3285 return NULL; 3292 return NULL;
3286 } 3293 }
3287 3294
3288 PyObject * 3295 PyObject *
(...skipping 16 matching lines...) Expand all
3305 PyUnicode_AsEncodedObject(PyObject *unicode, 3312 PyUnicode_AsEncodedObject(PyObject *unicode,
3306 const char *encoding, 3313 const char *encoding,
3307 const char *errors) 3314 const char *errors)
3308 { 3315 {
3309 PyObject *v; 3316 PyObject *v;
3310 3317
3311 if (!PyUnicode_Check(unicode)) { 3318 if (!PyUnicode_Check(unicode)) {
3312 PyErr_BadArgument(); 3319 PyErr_BadArgument();
3313 goto onError; 3320 goto onError;
3314 } 3321 }
3322
3323 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3324 "PyUnicode_AsEncodedObject() is deprecated; "
3325 "use PyUnicode_AsEncodedString() to encode from str to byte s "
3326 "or PyCodec_Encode() for generic encoding", 1) < 0)
3327 return NULL;
3315 3328
3316 if (encoding == NULL) 3329 if (encoding == NULL)
3317 encoding = PyUnicode_GetDefaultEncoding(); 3330 encoding = PyUnicode_GetDefaultEncoding();
3318 3331
3319 /* Encode via the codec registry */ 3332 /* Encode via the codec registry */
3320 v = PyCodec_Encode(unicode, encoding, errors); 3333 v = PyCodec_Encode(unicode, encoding, errors);
3321 if (v == NULL) 3334 if (v == NULL)
3322 goto onError; 3335 goto onError;
3323 return v; 3336 return v;
3324 3337
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after
3614 3627
3615 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v)); 3628 b = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
3616 Py_DECREF(v); 3629 Py_DECREF(v);
3617 return b; 3630 return b;
3618 } 3631 }
3619 3632
3620 PyErr_Format(PyExc_TypeError, 3633 PyErr_Format(PyExc_TypeError,
3621 "'%.400s' encoder returned '%.400s' instead of 'bytes'; " 3634 "'%.400s' encoder returned '%.400s' instead of 'bytes'; "
3622 "use codecs.encode() to encode to arbitrary types", 3635 "use codecs.encode() to encode to arbitrary types",
3623 encoding, 3636 encoding,
3624 Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); 3637 Py_TYPE(v)->tp_name);
3625 Py_DECREF(v); 3638 Py_DECREF(v);
3626 return NULL; 3639 return NULL;
3627 } 3640 }
3628 3641
3629 PyObject * 3642 PyObject *
3630 PyUnicode_AsEncodedUnicode(PyObject *unicode, 3643 PyUnicode_AsEncodedUnicode(PyObject *unicode,
3631 const char *encoding, 3644 const char *encoding,
3632 const char *errors) 3645 const char *errors)
3633 { 3646 {
3634 PyObject *v; 3647 PyObject *v;
3635 3648
3636 if (!PyUnicode_Check(unicode)) { 3649 if (!PyUnicode_Check(unicode)) {
3637 PyErr_BadArgument(); 3650 PyErr_BadArgument();
3638 goto onError; 3651 goto onError;
3639 } 3652 }
3653
3654 if (PyErr_WarnEx(PyExc_DeprecationWarning,
3655 "PyUnicode_AsEncodedUnicode() is deprecated; "
3656 "use PyCodec_Encode() to encode from str to str", 1) < 0)
3657 return NULL;
3640 3658
3641 if (encoding == NULL) 3659 if (encoding == NULL)
3642 encoding = PyUnicode_GetDefaultEncoding(); 3660 encoding = PyUnicode_GetDefaultEncoding();
3643 3661
3644 /* Encode via the codec registry */ 3662 /* Encode via the codec registry */
3645 v = PyCodec_Encode(unicode, encoding, errors); 3663 v = PyCodec_Encode(unicode, encoding, errors);
3646 if (v == NULL) 3664 if (v == NULL)
3647 goto onError; 3665 goto onError;
3648 if (!PyUnicode_Check(v)) { 3666 if (!PyUnicode_Check(v)) {
3649 PyErr_Format(PyExc_TypeError, 3667 PyErr_Format(PyExc_TypeError,
3650 "'%.400s' encoder returned '%.400s' instead of 'str'; " 3668 "'%.400s' encoder returned '%.400s' instead of 'str'; "
3651 "use codecs.encode() to encode to arbitrary types", 3669 "use codecs.encode() to encode to arbitrary types",
3652 encoding, 3670 encoding,
3653 Py_TYPE(v)->tp_name, Py_TYPE(v)->tp_name); 3671 Py_TYPE(v)->tp_name);
3654 Py_DECREF(v); 3672 Py_DECREF(v);
3655 goto onError; 3673 goto onError;
3656 } 3674 }
3657 return v; 3675 return v;
3658 3676
3659 onError: 3677 onError:
3660 return NULL; 3678 return NULL;
3661 } 3679 }
3662 3680
3663 static size_t 3681 static size_t
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
3823 3841
3824 Py_FileSystemDefaultEncoding is shared between all interpreters, we 3842 Py_FileSystemDefaultEncoding is shared between all interpreters, we
3825 cannot only rely on it: check also interp->fscodec_initialized for 3843 cannot only rely on it: check also interp->fscodec_initialized for
3826 subinterpreters. */ 3844 subinterpreters. */
3827 if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) { 3845 if (Py_FileSystemDefaultEncoding && interp->fscodec_initialized) {
3828 PyObject *res = PyUnicode_Decode(s, size, 3846 PyObject *res = PyUnicode_Decode(s, size,
3829 Py_FileSystemDefaultEncoding, 3847 Py_FileSystemDefaultEncoding,
3830 Py_FileSystemDefaultEncodeErrors); 3848 Py_FileSystemDefaultEncodeErrors);
3831 #ifdef MS_WINDOWS 3849 #ifdef MS_WINDOWS
3832 if (!res && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 3850 if (!res && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
3833 PyObject *exc, *val, *tb; 3851 _PyErr_FormatFromCause(PyExc_RuntimeError,
3834 PyErr_Fetch(&exc, &val, &tb); 3852 "filesystem path bytes were not correctly encoded with '%s'. "
3835 PyErr_Format(PyExc_RuntimeError,
3836 "filesystem path bytes were not correctly encoded with '%s'. " \
3837 "Please report this at http://bugs.python.org/issue27781", 3853 "Please report this at http://bugs.python.org/issue27781",
3838 Py_FileSystemDefaultEncoding); 3854 Py_FileSystemDefaultEncoding);
3839 _PyErr_ChainExceptions(exc, val, tb);
3840 } 3855 }
3841 #endif 3856 #endif
3842 return res; 3857 return res;
3843 } 3858 }
3844 else { 3859 else {
3845 return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncode Errors); 3860 return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncode Errors);
3846 } 3861 }
3847 #endif 3862 #endif
3848 } 3863 }
3849 3864
(...skipping 2027 matching lines...) Expand 10 before | Expand all | Expand 10 after
5877 } 5892 }
5878 5893
5879 /* --- Unicode Escape Codec ----------------------------------------------- */ 5894 /* --- Unicode Escape Codec ----------------------------------------------- */
5880 5895
5881 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; 5896 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
5882 5897
5883 PyObject * 5898 PyObject *
5884 _PyUnicode_DecodeUnicodeEscape(const char *s, 5899 _PyUnicode_DecodeUnicodeEscape(const char *s,
5885 Py_ssize_t size, 5900 Py_ssize_t size,
5886 const char *errors, 5901 const char *errors,
5887 char *first_invalid_escape_char, 5902 const char **first_invalid_escape)
5888 Py_ssize_t *first_invalid_escape_idx)
5889 { 5903 {
5890 const char *starts = s; 5904 const char *starts = s;
5891 _PyUnicodeWriter writer; 5905 _PyUnicodeWriter writer;
5892 const char *end; 5906 const char *end;
5893 PyObject *errorHandler = NULL; 5907 PyObject *errorHandler = NULL;
5894 PyObject *exc = NULL; 5908 PyObject *exc = NULL;
5895 5909
5896 // so we can remember if we've seen an invalid escape char or not 5910 // so we can remember if we've seen an invalid escape char or not
5897 *first_invalid_escape_idx = -1; 5911 *first_invalid_escape = NULL;
5898 5912
5899 if (size == 0) { 5913 if (size == 0) {
5900 _Py_RETURN_UNICODE_EMPTY(); 5914 _Py_RETURN_UNICODE_EMPTY();
5901 } 5915 }
5902 /* Escaped strings will always be longer than the resulting 5916 /* Escaped strings will always be longer than the resulting
5903 Unicode string, so we start with size here and then reduce the 5917 Unicode string, so we start with size here and then reduce the
5904 length after conversion to the true value. 5918 length after conversion to the true value.
5905 (but if the error callback returns a long replacement string 5919 (but if the error callback returns a long replacement string
5906 we'll have to allocate more space) */ 5920 we'll have to allocate more space) */
5907 _PyUnicodeWriter_Init(&writer); 5921 _PyUnicodeWriter_Init(&writer);
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
6063 assert(ch <= MAX_UNICODE); 6077 assert(ch <= MAX_UNICODE);
6064 WRITE_CHAR(ch); 6078 WRITE_CHAR(ch);
6065 continue; 6079 continue;
6066 } 6080 }
6067 message = "unknown Unicode character name"; 6081 message = "unknown Unicode character name";
6068 } 6082 }
6069 } 6083 }
6070 goto error; 6084 goto error;
6071 6085
6072 default: 6086 default:
6073 if (*first_invalid_escape_idx == -1) { 6087 if (*first_invalid_escape == NULL) {
6074 *first_invalid_escape_idx = s - starts; 6088 *first_invalid_escape = s-1; /* Back up one char, since we've
6075 *first_invalid_escape_char = c; 6089 already incremented s. */
6076 } 6090 }
6077 WRITE_ASCII_CHAR('\\'); 6091 WRITE_ASCII_CHAR('\\');
6078 WRITE_CHAR(c); 6092 WRITE_CHAR(c);
6079 continue; 6093 continue;
6080 } 6094 }
6081 6095
6082 error: 6096 error:
6083 endinpos = s-starts; 6097 endinpos = s-starts;
6084 writer.min_length = end - s + writer.pos; 6098 writer.min_length = end - s + writer.pos;
6085 if (unicode_decode_call_errorhandler_writer( 6099 if (unicode_decode_call_errorhandler_writer(
(...skipping 20 matching lines...) Expand all
6106 Py_XDECREF(errorHandler); 6120 Py_XDECREF(errorHandler);
6107 Py_XDECREF(exc); 6121 Py_XDECREF(exc);
6108 return NULL; 6122 return NULL;
6109 } 6123 }
6110 6124
6111 PyObject * 6125 PyObject *
6112 PyUnicode_DecodeUnicodeEscape(const char *s, 6126 PyUnicode_DecodeUnicodeEscape(const char *s,
6113 Py_ssize_t size, 6127 Py_ssize_t size,
6114 const char *errors) 6128 const char *errors)
6115 { 6129 {
6116 char first_invalid_escape_char; 6130 const char *first_invalid_escape;
6117 Py_ssize_t first_invalid_escape_idx; 6131 PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
6118 PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors, &first_in valid_escape_char, &first_invalid_escape_idx); 6132 &first_invalid_escape);
6119 if (result == NULL) 6133 if (result == NULL)
6120 return NULL; 6134 return NULL;
6121 if (first_invalid_escape_idx != -1) { 6135 if (first_invalid_escape != NULL) {
6122 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, 6136 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6123 "invalid escape sequence '\\%c'", first_invalid_esc ape_char) < 0) { 6137 "invalid escape sequence '\\%c'",
6138 *first_invalid_escape) < 0) {
6124 Py_DECREF(result); 6139 Py_DECREF(result);
6125 return NULL; 6140 return NULL;
6126 } 6141 }
6127 } 6142 }
6128 return result; 6143 return result;
6129 } 6144 }
6130 6145
6131 /* Return a Unicode-Escape string version of the Unicode object. 6146 /* Return a Unicode-Escape string version of the Unicode object.
6132 6147
6133 If quotes is true, the string is enclosed in u"" or u'' quotes as 6148 If quotes is true, the string is enclosed in u"" or u'' quotes as
(...skipping 7461 matching lines...) Expand 10 before | Expand all | Expand 10 after
13595 return -1; 13610 return -1;
13596 unicode_write_cstr(writer->buffer, writer->pos, str, len); 13611 unicode_write_cstr(writer->buffer, writer->pos, str, len);
13597 writer->pos += len; 13612 writer->pos += len;
13598 return 0; 13613 return 0;
13599 } 13614 }
13600 13615
13601 PyObject * 13616 PyObject *
13602 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) 13617 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
13603 { 13618 {
13604 PyObject *str; 13619 PyObject *str;
13620
13605 if (writer->pos == 0) { 13621 if (writer->pos == 0) {
13606 Py_CLEAR(writer->buffer); 13622 Py_CLEAR(writer->buffer);
13607 _Py_RETURN_UNICODE_EMPTY(); 13623 _Py_RETURN_UNICODE_EMPTY();
13608 } 13624 }
13625
13626 str = writer->buffer;
13627 writer->buffer = NULL;
13628
13609 if (writer->readonly) { 13629 if (writer->readonly) {
13610 str = writer->buffer;
13611 writer->buffer = NULL;
13612 assert(PyUnicode_GET_LENGTH(str) == writer->pos); 13630 assert(PyUnicode_GET_LENGTH(str) == writer->pos);
13613 return str; 13631 return str;
13614 } 13632 }
13615 if (writer->pos == 0) { 13633
13616 Py_CLEAR(writer->buffer); 13634 if (PyUnicode_GET_LENGTH(str) != writer->pos) {
13617 13635 PyObject *str2;
13618 /* Get the empty Unicode string singleton ('') */ 13636 str2 = resize_compact(str, writer->pos);
13619 _Py_INCREF_UNICODE_EMPTY(); 13637 if (str2 == NULL) {
13620 str = unicode_empty; 13638 Py_DECREF(str);
13621 } 13639 return NULL;
13622 else { 13640 }
13623 str = writer->buffer; 13641 str = str2;
13624 writer->buffer = NULL;
13625
13626 if (PyUnicode_GET_LENGTH(str) != writer->pos) {
13627 PyObject *str2;
13628 str2 = resize_compact(str, writer->pos);
13629 if (str2 == NULL)
13630 return NULL;
13631 str = str2;
13632 }
13633 } 13642 }
13634 13643
13635 assert(_PyUnicode_CheckConsistency(str, 1)); 13644 assert(_PyUnicode_CheckConsistency(str, 1));
13636 return unicode_result_ready(str); 13645 return unicode_result_ready(str);
13637 } 13646 }
13638 13647
13639 void 13648 void
13640 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) 13649 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
13641 { 13650 {
13642 Py_CLEAR(writer->buffer); 13651 Py_CLEAR(writer->buffer);
(...skipping 1870 matching lines...) Expand 10 before | Expand all | Expand 10 after
15513 PyMODINIT_FUNC 15522 PyMODINIT_FUNC
15514 PyInit__string(void) 15523 PyInit__string(void)
15515 { 15524 {
15516 return PyModule_Create(&_string_module); 15525 return PyModule_Create(&_string_module);
15517 } 15526 }
15518 15527
15519 15528
15520 #ifdef __cplusplus 15529 #ifdef __cplusplus
15521 } 15530 }
15522 #endif 15531 #endif
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+