Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(154682)

Side by Side Diff: Python/ast.c

Issue 28128: Improve the warning message for invalid escape sequences
Patch Set: Created 3 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Objects/unicodeobject.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to 2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode(). 3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 * 4 *
5 */ 5 */
6 #include "Python.h" 6 #include "Python.h"
7 #include "Python-ast.h" 7 #include "Python-ast.h"
8 #include "node.h" 8 #include "node.h"
9 #include "ast.h" 9 #include "ast.h"
10 #include "token.h" 10 #include "token.h"
(...skipping 4095 matching lines...) Expand 10 before | Expand all | Expand 10 after
4106 decode_utf8(struct compiling *c, const char **sPtr, const char *end) 4106 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4107 { 4107 {
4108 const char *s, *t; 4108 const char *s, *t;
4109 t = s = *sPtr; 4109 t = s = *sPtr;
4110 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ 4110 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4111 while (s < end && (*s & 0x80)) s++; 4111 while (s < end && (*s & 0x80)) s++;
4112 *sPtr = s; 4112 *sPtr = s;
4113 return PyUnicode_DecodeUTF8(t, s - t, NULL); 4113 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4114 } 4114 }
4115 4115
4116 static int
4117 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4118 char first_invalid_escape_char)
4119 {
4120 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4121 first_invalid_escape_char);
4122 if (msg == NULL) {
4123 return -1;
4124 }
4125 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4126 c->c_filename, LINENO(n),
4127 NULL, NULL) < 0 &&
4128 PyErr_ExceptionMatches(PyExc_DeprecationWarning))
4129 {
4130 const char *s = PyUnicode_AsUTF8(msg);
4131 if (s != NULL) {
4132 ast_error(c, n, s);
4133 }
4134 Py_DECREF(msg);
4135 return -1;
4136 }
4137 Py_DECREF(msg);
4138 return 0;
4139 }
4140
4116 static PyObject * 4141 static PyObject *
4117 decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) 4142 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4143 size_t len)
4118 { 4144 {
4119 PyObject *v, *u; 4145 PyObject *v, *u;
4120 char *buf; 4146 char *buf;
4121 char *p; 4147 char *p;
4122 const char *end; 4148 const char *end;
4123 4149
4124 /* check for integer overflow */ 4150 /* check for integer overflow */
4125 if (len > SIZE_MAX / 6) 4151 if (len > SIZE_MAX / 6)
4126 return NULL; 4152 return NULL;
4127 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 4153 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
4160 /* Should be impossible to overflow */ 4186 /* Should be impossible to overflow */
4161 assert(p - buf <= Py_SIZE(u)); 4187 assert(p - buf <= Py_SIZE(u));
4162 Py_DECREF(w); 4188 Py_DECREF(w);
4163 } else { 4189 } else {
4164 *p++ = *s++; 4190 *p++ = *s++;
4165 } 4191 }
4166 } 4192 }
4167 len = p - buf; 4193 len = p - buf;
4168 s = buf; 4194 s = buf;
4169 4195
4170 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); 4196 const char *first_invalid_escape;
4197 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4198
4199 if (v != NULL && first_invalid_escape != NULL) {
4200 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4201 /* We have not decref u before because first_invalid_escape points
4202 inside u. */
4203 Py_XDECREF(u);
4204 Py_DECREF(v);
4205 return NULL;
4206 }
4207 }
4171 Py_XDECREF(u); 4208 Py_XDECREF(u);
4172 return v; 4209 return v;
4210 }
4211
4212 static PyObject *
4213 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4214 size_t len)
4215 {
4216 const char *first_invalid_escape;
4217 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4218 &first_invalid_escape);
4219 if (result == NULL)
4220 return NULL;
4221
4222 if (first_invalid_escape != NULL) {
4223 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4224 Py_DECREF(result);
4225 return NULL;
4226 }
4227 }
4228 return result;
4173 } 4229 }
4174 4230
4175 /* Compile this expression in to an expr_ty. Add parens around the 4231 /* Compile this expression in to an expr_ty. Add parens around the
4176 expression, in order to allow leading spaces in the expression. */ 4232 expression, in order to allow leading spaces in the expression. */
4177 static expr_ty 4233 static expr_ty
4178 fstring_compile_expr(const char *expr_start, const char *expr_end, 4234 fstring_compile_expr(const char *expr_start, const char *expr_end,
4179 struct compiling *c, const node *n) 4235 struct compiling *c, const node *n)
4180 4236
4181 { 4237 {
4182 int all_whitespace = 1; 4238 int all_whitespace = 1;
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
4303 literal_end = *str; 4359 literal_end = *str;
4304 assert(*str <= end); 4360 assert(*str <= end);
4305 assert(*str == end || **str == '{' || **str == '}'); 4361 assert(*str == end || **str == '{' || **str == '}');
4306 done: 4362 done:
4307 if (literal_start != literal_end) { 4363 if (literal_start != literal_end) {
4308 if (raw) 4364 if (raw)
4309 *literal = PyUnicode_DecodeUTF8Stateful(literal_start, 4365 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4310 literal_end-literal_start, 4366 literal_end-literal_start,
4311 NULL, NULL); 4367 NULL, NULL);
4312 else 4368 else
4313 *literal = decode_unicode_with_escapes(c, literal_start, 4369 *literal = decode_unicode_with_escapes(c, n, literal_start,
4314 literal_end-literal_start); 4370 literal_end-literal_start);
4315 if (!*literal) 4371 if (!*literal)
4316 return -1; 4372 return -1;
4317 } 4373 }
4318 return result; 4374 return result;
4319 } 4375 }
4320 4376
4321 /* Forward declaration because parsing is recursive. */ 4377 /* Forward declaration because parsing is recursive. */
4322 static expr_ty 4378 static expr_ty
4323 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, 4379 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after
5041 for (ch = s; *ch; ch++) { 5097 for (ch = s; *ch; ch++) {
5042 if (Py_CHARMASK(*ch) >= 0x80) { 5098 if (Py_CHARMASK(*ch) >= 0x80) {
5043 ast_error(c, n, "bytes can only contain ASCII " 5099 ast_error(c, n, "bytes can only contain ASCII "
5044 "literal characters."); 5100 "literal characters.");
5045 return -1; 5101 return -1;
5046 } 5102 }
5047 } 5103 }
5048 if (*rawmode) 5104 if (*rawmode)
5049 *result = PyBytes_FromStringAndSize(s, len); 5105 *result = PyBytes_FromStringAndSize(s, len);
5050 else 5106 else
5051 *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); 5107 *result = decode_bytes_with_escapes(c, n, s, len);
5052 } else { 5108 } else {
5053 if (*rawmode) 5109 if (*rawmode)
5054 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); 5110 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5055 else 5111 else
5056 *result = decode_unicode_with_escapes(c, s, len); 5112 *result = decode_unicode_with_escapes(c, n, s, len);
5057 } 5113 }
5058 return *result == NULL ? -1 : 0; 5114 return *result == NULL ? -1 : 0;
5059 } 5115 }
5060 5116
5061 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through 5117 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5062 each STRING atom, and process it as needed. For bytes, just 5118 each STRING atom, and process it as needed. For bytes, just
5063 concatenate them together, and the result will be a Bytes node. For 5119 concatenate them together, and the result will be a Bytes node. For
5064 normal strings and f-strings, concatenate them together. The result 5120 normal strings and f-strings, concatenate them together. The result
5065 will be a Str node if there were no f-strings; a FormattedValue 5121 will be a Str node if there were no f-strings; a FormattedValue
5066 node if there's just an f-string (with no leading or trailing 5122 node if there's just an f-string (with no leading or trailing
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
5138 /* We're not a bytes string, bytes_str should never have been set. */ 5194 /* We're not a bytes string, bytes_str should never have been set. */
5139 assert(bytes_str == NULL); 5195 assert(bytes_str == NULL);
5140 5196
5141 return FstringParser_Finish(&state, c, n); 5197 return FstringParser_Finish(&state, c, n);
5142 5198
5143 error: 5199 error:
5144 Py_XDECREF(bytes_str); 5200 Py_XDECREF(bytes_str);
5145 FstringParser_Dealloc(&state); 5201 FstringParser_Dealloc(&state);
5146 return NULL; 5202 return NULL;
5147 } 5203 }
OLDNEW
« no previous file with comments | « Objects/unicodeobject.c ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+