Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(198110)

Delta Between Two Patch Sets: Python/ast.c

Issue 28128: Improve the warning message for invalid escape sequences
Left Patch Set: Created 3 years, 2 months ago
Right Patch Set: Created 3 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Objects/unicodeobject.c ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 /* 1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to 2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode(). 3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 * 4 *
5 */ 5 */
6 #include "Python.h" 6 #include "Python.h"
7 #include "Python-ast.h" 7 #include "Python-ast.h"
8 #include "node.h" 8 #include "node.h"
9 #include "ast.h" 9 #include "ast.h"
10 #include "token.h" 10 #include "token.h"
(...skipping 3131 matching lines...) Expand 10 before | Expand all | Expand 10 after
3142 if (!cause) 3142 if (!cause)
3143 return NULL; 3143 return NULL;
3144 } 3144 }
3145 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c _arena); 3145 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c _arena);
3146 } 3146 }
3147 default: 3147 default:
3148 PyErr_Format(PyExc_SystemError, 3148 PyErr_Format(PyExc_SystemError,
3149 "unexpected flow_stmt: %d", TYPE(ch)); 3149 "unexpected flow_stmt: %d", TYPE(ch));
3150 return NULL; 3150 return NULL;
3151 } 3151 }
3152
3153 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
3154 return NULL;
3155 } 3152 }
3156 3153
3157 static alias_ty 3154 static alias_ty
3158 alias_for_import_name(struct compiling *c, const node *n, int store) 3155 alias_for_import_name(struct compiling *c, const node *n, int store)
3159 { 3156 {
3160 /* 3157 /*
3161 import_as_name: NAME ['as' NAME] 3158 import_as_name: NAME ['as' NAME]
3162 dotted_as_name: dotted_name ['as' NAME] 3159 dotted_as_name: dotted_name ['as' NAME]
3163 dotted_name: NAME ('.' NAME)* 3160 dotted_name: NAME ('.' NAME)*
3164 */ 3161 */
(...skipping 944 matching lines...) Expand 10 before | Expand all | Expand 10 after
4109 decode_utf8(struct compiling *c, const char **sPtr, const char *end) 4106 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4110 { 4107 {
4111 const char *s, *t; 4108 const char *s, *t;
4112 t = s = *sPtr; 4109 t = s = *sPtr;
4113 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ 4110 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4114 while (s < end && (*s & 0x80)) s++; 4111 while (s < end && (*s & 0x80)) s++;
4115 *sPtr = s; 4112 *sPtr = s;
4116 return PyUnicode_DecodeUTF8(t, s - t, NULL); 4113 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4117 } 4114 }
4118 4115
4119 PyObject * 4116 static int
4120 _PyUnicode_DecodeUnicodeEscape(const char *s, 4117 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4121 Py_ssize_t size, 4118 char first_invalid_escape_char)
4122 const char *errors, 4119 {
4123 char *first_invalid_escape_char, 4120 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4124 Py_ssize_t *first_invalid_escape_idx); 4121 first_invalid_escape_char);
4122 if (msg == NULL) {
4123 return -1;
4124 }
4125 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4126 c->c_filename, LINENO(n),
4127 NULL, NULL) < 0 &&
4128 PyErr_ExceptionMatches(PyExc_DeprecationWarning))
4129 {
4130 const char *s = PyUnicode_AsUTF8(msg);
4131 if (s != NULL) {
4132 ast_error(c, n, s);
4133 }
4134 Py_DECREF(msg);
4135 return -1;
4136 }
4137 Py_DECREF(msg);
4138 return 0;
4139 }
4125 4140
4126 static PyObject * 4141 static PyObject *
4127 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s, s ize_t len) 4142 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4143 size_t len)
4128 { 4144 {
4129 PyObject *v, *u; 4145 PyObject *v, *u;
4130 char *buf; 4146 char *buf;
4131 char *p; 4147 char *p;
4132 const char *end; 4148 const char *end;
4133 4149
4134 /* check for integer overflow */ 4150 /* check for integer overflow */
4135 if (len > SIZE_MAX / 6) 4151 if (len > SIZE_MAX / 6)
4136 return NULL; 4152 return NULL;
4137 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 4153 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
4170 /* Should be impossible to overflow */ 4186 /* Should be impossible to overflow */
4171 assert(p - buf <= Py_SIZE(u)); 4187 assert(p - buf <= Py_SIZE(u));
4172 Py_DECREF(w); 4188 Py_DECREF(w);
4173 } else { 4189 } else {
4174 *p++ = *s++; 4190 *p++ = *s++;
4175 } 4191 }
4176 } 4192 }
4177 len = p - buf; 4193 len = p - buf;
4178 s = buf; 4194 s = buf;
4179 4195
4180 char first_invalid_escape_char; 4196 const char *first_invalid_escape;
4181 Py_ssize_t first_invalid_escape_idx; 4197 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4182 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape_char, &first_invalid_escape_idx); 4198
4199 if (v != NULL && first_invalid_escape != NULL) {
4200 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4201 /* We have not decref u before because first_invalid_escape points
4202 inside u. */
4203 Py_XDECREF(u);
4204 Py_DECREF(v);
4205 return NULL;
4206 }
4207 }
4183 Py_XDECREF(u); 4208 Py_XDECREF(u);
4184
4185 if (v != NULL && first_invalid_escape_idx != -1) {
4186 Py_DECREF(v);
4187 char buf[300];
4188 PyOS_snprintf(buf, sizeof(buf),
4189 "invalid escape sequence \\%c",
4190 first_invalid_escape_char);
4191 ast_error(c, n, buf);
4192 return NULL;
4193 }
4194 return v; 4209 return v;
4210 }
4211
4212 static PyObject *
4213 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4214 size_t len)
4215 {
4216 const char *first_invalid_escape;
4217 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4218 &first_invalid_escape);
4219 if (result == NULL)
4220 return NULL;
4221
4222 if (first_invalid_escape != NULL) {
4223 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4224 Py_DECREF(result);
4225 return NULL;
4226 }
4227 }
4228 return result;
4195 } 4229 }
4196 4230
4197 /* Compile this expression in to an expr_ty. Add parens around the 4231 /* Compile this expression in to an expr_ty. Add parens around the
4198 expression, in order to allow leading spaces in the expression. */ 4232 expression, in order to allow leading spaces in the expression. */
4199 static expr_ty 4233 static expr_ty
4200 fstring_compile_expr(const char *expr_start, const char *expr_end, 4234 fstring_compile_expr(const char *expr_start, const char *expr_end,
4201 struct compiling *c, const node *n) 4235 struct compiling *c, const node *n)
4202 4236
4203 { 4237 {
4204 int all_whitespace = 1; 4238 int all_whitespace = 1;
(...skipping 858 matching lines...) Expand 10 before | Expand all | Expand 10 after
5063 for (ch = s; *ch; ch++) { 5097 for (ch = s; *ch; ch++) {
5064 if (Py_CHARMASK(*ch) >= 0x80) { 5098 if (Py_CHARMASK(*ch) >= 0x80) {
5065 ast_error(c, n, "bytes can only contain ASCII " 5099 ast_error(c, n, "bytes can only contain ASCII "
5066 "literal characters."); 5100 "literal characters.");
5067 return -1; 5101 return -1;
5068 } 5102 }
5069 } 5103 }
5070 if (*rawmode) 5104 if (*rawmode)
5071 *result = PyBytes_FromStringAndSize(s, len); 5105 *result = PyBytes_FromStringAndSize(s, len);
5072 else 5106 else
5073 *result = PyBytes_DecodeEscape(s, len, NULL, /* ignored */ 0, NULL); 5107 *result = decode_bytes_with_escapes(c, n, s, len);
5074 } else { 5108 } else {
5075 if (*rawmode) 5109 if (*rawmode)
5076 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL); 5110 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5077 else 5111 else
5078 *result = decode_unicode_with_escapes(c, n, s, len); 5112 *result = decode_unicode_with_escapes(c, n, s, len);
5079 } 5113 }
5080 return *result == NULL ? -1 : 0; 5114 return *result == NULL ? -1 : 0;
5081 } 5115 }
5082 5116
5083 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through 5117 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
5160 /* We're not a bytes string, bytes_str should never have been set. */ 5194 /* We're not a bytes string, bytes_str should never have been set. */
5161 assert(bytes_str == NULL); 5195 assert(bytes_str == NULL);
5162 5196
5163 return FstringParser_Finish(&state, c, n); 5197 return FstringParser_Finish(&state, c, n);
5164 5198
5165 error: 5199 error:
5166 Py_XDECREF(bytes_str); 5200 Py_XDECREF(bytes_str);
5167 FstringParser_Dealloc(&state); 5201 FstringParser_Dealloc(&state);
5168 return NULL; 5202 return NULL;
5169 } 5203 }
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+