Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1)

Side by Side Diff: Python/ast.c

Issue 28128: Improve the warning message for invalid escape sequences
Patch Set: Created 3 years, 3 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Objects/unicodeobject.c ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to 2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode(). 3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 * 4 *
5 */ 5 */
6 #include "Python.h" 6 #include "Python.h"
7 #include "Python-ast.h" 7 #include "Python-ast.h"
8 #include "node.h" 8 #include "node.h"
9 #include "ast.h" 9 #include "ast.h"
10 #include "token.h" 10 #include "token.h"
(...skipping 4095 matching lines...) Expand 10 before | Expand all | Expand 10 after
4106 decode_utf8(struct compiling *c, const char **sPtr, const char *end) 4106 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4107 { 4107 {
4108 const char *s, *t; 4108 const char *s, *t;
4109 t = s = *sPtr; 4109 t = s = *sPtr;
4110 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ 4110 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4111 while (s < end && (*s & 0x80)) s++; 4111 while (s < end && (*s & 0x80)) s++;
4112 *sPtr = s; 4112 *sPtr = s;
4113 return PyUnicode_DecodeUTF8(t, s - t, NULL); 4113 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4114 } 4114 }
4115 4115
4116 static int
4117 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4118 char first_invalid_escape_char)
4119 {
4120 int res;
4121 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4122 first_invalid_escape_char);
4123 if (msg == NULL) {
4124 return -1;
4125 }
4126 res = PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4127 c->c_filename, LINENO(n),
4128 NULL, NULL);
4129 Py_DECREF(msg);
4130 return res;
4131 }
4132
4116 static PyObject * 4133 static PyObject *
4117 decode_unicode_with_escapes(struct compiling *c, const char *s, size_t len) 4134 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4135 size_t len)
4118 { 4136 {
4119 PyObject *v, *u; 4137 PyObject *v, *u;
4120 char *buf; 4138 char *buf;
4121 char *p; 4139 char *p;
4122 const char *end; 4140 const char *end;
4123 4141
4124 /* check for integer overflow */ 4142 /* check for integer overflow */
4125 if (len > SIZE_MAX / 6) 4143 if (len > SIZE_MAX / 6)
4126 return NULL; 4144 return NULL;
4127 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 4145 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
4160 /* Should be impossible to overflow */ 4178 /* Should be impossible to overflow */
4161 assert(p - buf <= Py_SIZE(u)); 4179 assert(p - buf <= Py_SIZE(u));
4162 Py_DECREF(w); 4180 Py_DECREF(w);
4163 } else { 4181 } else {
4164 *p++ = *s++; 4182 *p++ = *s++;
4165 } 4183 }
4166 } 4184 }
4167 len = p - buf; 4185 len = p - buf;
4168 s = buf; 4186 s = buf;
4169 4187
4170 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); 4188 const char *first_invalid_escape;
4189 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4190
4191 if (v != NULL && first_invalid_escape != NULL) {
4192 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4193 /* We have not decref u before because first_invalid_escape points
4194 inside u. */
4195 Py_XDECREF(u);
4196 Py_DECREF(v);
4197 return NULL;
4198 }
4199 }
4171 Py_XDECREF(u); 4200 Py_XDECREF(u);
4172 return v; 4201 return v;
4202 }
4203
4204 static PyObject *
4205 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4206 size_t len)
4207 {
4208 const char *first_invalid_escape;
4209 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4210 &first_invalid_escape);
4211 if (result == NULL)
4212 return NULL;
4213
4214 if (first_invalid_escape != NULL) {
4215 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4216 Py_DECREF(result);
4217 return NULL;
4218 }
4219 }
4220 return result;
4173 } 4221 }
4174 4222
4175 /* Compile this expression in to an expr_ty. Add parens around the 4223 /* Compile this expression in to an expr_ty. Add parens around the
4176 expression, in order to allow leading spaces in the expression. */ 4224 expression, in order to allow leading spaces in the expression. */
4177 static expr_ty 4225 static expr_ty
4178 fstring_compile_expr(const char *expr_start, const char *expr_end, 4226 fstring_compile_expr(const char *expr_start, const char *expr_end,
4179 struct compiling *c, const node *n) 4227 struct compiling *c, const node *n)
4180 4228
4181 { 4229 {
4182 int all_whitespace = 1; 4230 int all_whitespace = 1;
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
4303 literal_end = *str; 4351 literal_end = *str;
4304 assert(*str <= end); 4352 assert(*str <= end);
4305 assert(*str == end || **str == '{' || **str == '}'); 4353 assert(*str == end || **str == '{' || **str == '}');
4306 done: 4354 done:
4307 if (literal_start != literal_end) { 4355 if (literal_start != literal_end) {
4308 if (raw) 4356 if (raw)
4309 *literal = PyUnicode_DecodeUTF8Stateful(literal_start, 4357 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4310 literal_end-literal_start, 4358 literal_end-literal_start,
4311 NULL, NULL); 4359 NULL, NULL);
4312 else 4360 else
4313 *literal = decode_unicode_with_escapes(c, literal_start, 4361 *literal = decode_unicode_with_escapes(c, n, literal_start,
4314 literal_end-literal_start); 4362 literal_end-literal_start);
4315 if (!*literal) 4363 if (!*literal)
4316 return -1; 4364 return -1;
4317 } 4365 }
4318 return result; 4366 return result;
4319 } 4367 }
4320 4368
4321 /* Forward declaration because parsing is recursive. */ 4369 /* Forward declaration because parsing is recursive. */
4322 static expr_ty 4370 static expr_ty
4323 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl, 4371 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
(...skipping 717 matching lines...) Expand 10 before | Expand all | Expand 10 after
5041 for (ch = s; *ch; ch++) { 5089 for (ch = s; *ch; ch++) {
5042 if (Py_CHARMASK(*ch) >= 0x80) { 5090 if (Py_CHARMASK(*ch) >= 0x80) {
5043 ast_error(c, n, "bytes can only contain ASCII " 5091 ast_error(c, n, "bytes can only contain ASCII "
5044 "literal characters."); 5092 "literal characters.");
5045 return -1; 5093 return -1;
5046 } 5094 }
5047 } 5095 }
5048 if (*rawmode) 5096 if (*rawmode)
5049 *result = PyBytes_FromStringAndSize(s, len); 5097 *result = PyBytes_FromStringAndSize(s, len);
5050 else 5098 else
5051 *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); 5099 *result = decode_bytes_with_escapes(c, n, s, len);
5052 } else { 5100 } else {
5053 if (*rawmode) 5101 if (*rawmode)
5054 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); 5102 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5055 else 5103 else
5056 *result = decode_unicode_with_escapes(c, s, len); 5104 *result = decode_unicode_with_escapes(c, n, s, len);
5057 } 5105 }
5058 return *result == NULL ? -1 : 0; 5106 return *result == NULL ? -1 : 0;
5059 } 5107 }
5060 5108
5061 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through 5109 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5062 each STRING atom, and process it as needed. For bytes, just 5110 each STRING atom, and process it as needed. For bytes, just
5063 concatenate them together, and the result will be a Bytes node. For 5111 concatenate them together, and the result will be a Bytes node. For
5064 normal strings and f-strings, concatenate them together. The result 5112 normal strings and f-strings, concatenate them together. The result
5065 will be a Str node if there were no f-strings; a FormattedValue 5113 will be a Str node if there were no f-strings; a FormattedValue
5066 node if there's just an f-string (with no leading or trailing 5114 node if there's just an f-string (with no leading or trailing
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
5138 /* We're not a bytes string, bytes_str should never have been set. */ 5186 /* We're not a bytes string, bytes_str should never have been set. */
5139 assert(bytes_str == NULL); 5187 assert(bytes_str == NULL);
5140 5188
5141 return FstringParser_Finish(&state, c, n); 5189 return FstringParser_Finish(&state, c, n);
5142 5190
5143 error: 5191 error:
5144 Py_XDECREF(bytes_str); 5192 Py_XDECREF(bytes_str);
5145 FstringParser_Dealloc(&state); 5193 FstringParser_Dealloc(&state);
5146 return NULL; 5194 return NULL;
5147 } 5195 }
OLDNEW
« no previous file with comments | « Objects/unicodeobject.c ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+