Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(169089)

Side by Side Diff: Objects/unicodeobject.c

Issue 28128: Improve the warning message for invalid escape sequences
Patch Set: Created 3 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Objects/bytesobject.c ('k') | Python/ast.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 2
3 Unicode implementation based on original code by Fredrik Lundh, 3 Unicode implementation based on original code by Fredrik Lundh,
4 modified by Marc-Andre Lemburg <mal@lemburg.com>. 4 modified by Marc-Andre Lemburg <mal@lemburg.com>.
5 5
6 Major speed upgrades to the method implementations at the Reykjavik 6 Major speed upgrades to the method implementations at the Reykjavik
7 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. 7 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8 8
9 Copyright (c) Corporation for National Research Initiatives. 9 Copyright (c) Corporation for National Research Initiatives.
10 10
(...skipping 5878 matching lines...) Expand 10 before | Expand all | Expand 10 after
5889 PyUnicode_AsUTF16String(PyObject *unicode) 5889 PyUnicode_AsUTF16String(PyObject *unicode)
5890 { 5890 {
5891 return _PyUnicode_EncodeUTF16(unicode, NULL, 0); 5891 return _PyUnicode_EncodeUTF16(unicode, NULL, 0);
5892 } 5892 }
5893 5893
5894 /* --- Unicode Escape Codec ----------------------------------------------- */ 5894 /* --- Unicode Escape Codec ----------------------------------------------- */
5895 5895
5896 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; 5896 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
5897 5897
5898 PyObject * 5898 PyObject *
5899 PyUnicode_DecodeUnicodeEscape(const char *s, 5899 _PyUnicode_DecodeUnicodeEscape(const char *s,
5900 Py_ssize_t size, 5900 Py_ssize_t size,
5901 const char *errors) 5901 const char *errors,
5902 const char **first_invalid_escape)
5902 { 5903 {
5903 const char *starts = s; 5904 const char *starts = s;
5904 _PyUnicodeWriter writer; 5905 _PyUnicodeWriter writer;
5905 const char *end; 5906 const char *end;
5906 PyObject *errorHandler = NULL; 5907 PyObject *errorHandler = NULL;
5907 PyObject *exc = NULL; 5908 PyObject *exc = NULL;
5909
5910 // so we can remember if we've seen an invalid escape char or not
5911 *first_invalid_escape = NULL;
5908 5912
5909 if (size == 0) { 5913 if (size == 0) {
5910 _Py_RETURN_UNICODE_EMPTY(); 5914 _Py_RETURN_UNICODE_EMPTY();
5911 } 5915 }
5912 /* Escaped strings will always be longer than the resulting 5916 /* Escaped strings will always be longer than the resulting
5913 Unicode string, so we start with size here and then reduce the 5917 Unicode string, so we start with size here and then reduce the
5914 length after conversion to the true value. 5918 length after conversion to the true value.
5915 (but if the error callback returns a long replacement string 5919 (but if the error callback returns a long replacement string
5916 we'll have to allocate more space) */ 5920 we'll have to allocate more space) */
5917 _PyUnicodeWriter_Init(&writer); 5921 _PyUnicodeWriter_Init(&writer);
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
6073 assert(ch <= MAX_UNICODE); 6077 assert(ch <= MAX_UNICODE);
6074 WRITE_CHAR(ch); 6078 WRITE_CHAR(ch);
6075 continue; 6079 continue;
6076 } 6080 }
6077 message = "unknown Unicode character name"; 6081 message = "unknown Unicode character name";
6078 } 6082 }
6079 } 6083 }
6080 goto error; 6084 goto error;
6081 6085
6082 default: 6086 default:
6083 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, 6087 if (*first_invalid_escape == NULL) {
6084 "invalid escape sequence '\\%c'", c) < 0) 6088 *first_invalid_escape = s-1; /* Back up one char, since we've
6085 goto onError; 6089 already incremented s. */
6090 }
6086 WRITE_ASCII_CHAR('\\'); 6091 WRITE_ASCII_CHAR('\\');
6087 WRITE_CHAR(c); 6092 WRITE_CHAR(c);
6088 continue; 6093 continue;
6089 } 6094 }
6090 6095
6091 error: 6096 error:
6092 endinpos = s-starts; 6097 endinpos = s-starts;
6093 writer.min_length = end - s + writer.pos; 6098 writer.min_length = end - s + writer.pos;
6094 if (unicode_decode_call_errorhandler_writer( 6099 if (unicode_decode_call_errorhandler_writer(
6095 errors, &errorHandler, 6100 errors, &errorHandler,
(...skipping 12 matching lines...) Expand all
6108 6113
6109 Py_XDECREF(errorHandler); 6114 Py_XDECREF(errorHandler);
6110 Py_XDECREF(exc); 6115 Py_XDECREF(exc);
6111 return _PyUnicodeWriter_Finish(&writer); 6116 return _PyUnicodeWriter_Finish(&writer);
6112 6117
6113 onError: 6118 onError:
6114 _PyUnicodeWriter_Dealloc(&writer); 6119 _PyUnicodeWriter_Dealloc(&writer);
6115 Py_XDECREF(errorHandler); 6120 Py_XDECREF(errorHandler);
6116 Py_XDECREF(exc); 6121 Py_XDECREF(exc);
6117 return NULL; 6122 return NULL;
6123 }
6124
6125 PyObject *
6126 PyUnicode_DecodeUnicodeEscape(const char *s,
6127 Py_ssize_t size,
6128 const char *errors)
6129 {
6130 const char *first_invalid_escape;
6131 PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
6132 &first_invalid_escape);
6133 if (result == NULL)
6134 return NULL;
6135 if (first_invalid_escape != NULL) {
6136 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
6137 "invalid escape sequence '\\%c'",
6138 *first_invalid_escape) < 0) {
6139 Py_DECREF(result);
6140 return NULL;
6141 }
6142 }
6143 return result;
6118 } 6144 }
6119 6145
6120 /* Return a Unicode-Escape string version of the Unicode object. 6146 /* Return a Unicode-Escape string version of the Unicode object.
6121 6147
6122 If quotes is true, the string is enclosed in u"" or u'' quotes as 6148 If quotes is true, the string is enclosed in u"" or u'' quotes as
6123 appropriate. 6149 appropriate.
6124 6150
6125 */ 6151 */
6126 6152
6127 PyObject * 6153 PyObject *
(...skipping 9368 matching lines...) Expand 10 before | Expand all | Expand 10 after
15496 PyMODINIT_FUNC 15522 PyMODINIT_FUNC
15497 PyInit__string(void) 15523 PyInit__string(void)
15498 { 15524 {
15499 return PyModule_Create(&_string_module); 15525 return PyModule_Create(&_string_module);
15500 } 15526 }
15501 15527
15502 15528
15503 #ifdef __cplusplus 15529 #ifdef __cplusplus
15504 } 15530 }
15505 #endif 15531 #endif
OLDNEW
« no previous file with comments | « Objects/bytesobject.c ('k') | Python/ast.c » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+