Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(169289)

Side by Side Diff: Objects/bytesobject.c

Issue 28128: Improve the warning message for invalid escape sequences
Patch Set: Created 3 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Lib/test/test_unicode.py ('k') | Objects/unicodeobject.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* bytes object implementation */ 1 /* bytes object implementation */
2 2
3 #define PY_SSIZE_T_CLEAN 3 #define PY_SSIZE_T_CLEAN
4 4
5 #include "Python.h" 5 #include "Python.h"
6 6
7 #include "bytes_methods.h" 7 #include "bytes_methods.h"
8 #include "pystrhex.h" 8 #include "pystrhex.h"
9 #include <stddef.h> 9 #include <stddef.h>
10 10
(...skipping 1087 matching lines...) Expand 10 before | Expand all | Expand 10 after
1098 PyBytes_AS_STRING(w), 1098 PyBytes_AS_STRING(w),
1099 PyBytes_GET_SIZE(w)); 1099 PyBytes_GET_SIZE(w));
1100 Py_DECREF(w); 1100 Py_DECREF(w);
1101 if (p == NULL) 1101 if (p == NULL)
1102 return NULL; 1102 return NULL;
1103 1103
1104 *s = t; 1104 *s = t;
1105 return p; 1105 return p;
1106 } 1106 }
1107 1107
1108 PyObject *PyBytes_DecodeEscape(const char *s, 1108 PyObject *_PyBytes_DecodeEscape(const char *s,
1109 Py_ssize_t len, 1109 Py_ssize_t len,
1110 const char *errors, 1110 const char *errors,
1111 Py_ssize_t unicode, 1111 Py_ssize_t unicode,
1112 const char *recode_encoding) 1112 const char *recode_encoding,
1113 const char **first_invalid_escape)
1113 { 1114 {
1114 int c; 1115 int c;
1115 char *p; 1116 char *p;
1116 const char *end; 1117 const char *end;
1117 _PyBytesWriter writer; 1118 _PyBytesWriter writer;
1118 1119
1119 _PyBytesWriter_Init(&writer); 1120 _PyBytesWriter_Init(&writer);
1120 1121
1121 p = _PyBytesWriter_Alloc(&writer, len); 1122 p = _PyBytesWriter_Alloc(&writer, len);
1122 if (p == NULL) 1123 if (p == NULL)
1123 return NULL; 1124 return NULL;
1124 writer.overallocate = 1; 1125 writer.overallocate = 1;
1126
1127 *first_invalid_escape = NULL;
1125 1128
1126 end = s + len; 1129 end = s + len;
1127 while (s < end) { 1130 while (s < end) {
1128 if (*s != '\\') { 1131 if (*s != '\\') {
1129 non_esc: 1132 non_esc:
1130 if (!(recode_encoding && (*s & 0x80))) { 1133 if (!(recode_encoding && (*s & 0x80))) {
1131 *p++ = *s++; 1134 *p++ = *s++;
1132 } 1135 }
1133 else { 1136 else {
1134 /* non-ASCII character and need to recode */ 1137 /* non-ASCII character and need to recode */
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
1200 "error handling code: %.400s", 1203 "error handling code: %.400s",
1201 errors); 1204 errors);
1202 goto failed; 1205 goto failed;
1203 } 1206 }
1204 /* skip \x */ 1207 /* skip \x */
1205 if (s < end && Py_ISXDIGIT(s[0])) 1208 if (s < end && Py_ISXDIGIT(s[0]))
1206 s++; /* and a hexdigit */ 1209 s++; /* and a hexdigit */
1207 break; 1210 break;
1208 1211
1209 default: 1212 default:
1210 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "invalid escape se quence '\\%c'", *(--s)) < 0) 1213 if (*first_invalid_escape == NULL) {
1211 goto failed; 1214 *first_invalid_escape = s-1; /* Back up one char, since we've
1215 already incremented s. */
1216 }
1212 *p++ = '\\'; 1217 *p++ = '\\';
1218 s--;
1213 goto non_esc; /* an arbitrary number of unescaped 1219 goto non_esc; /* an arbitrary number of unescaped
1214 UTF-8 bytes may follow. */ 1220 UTF-8 bytes may follow. */
1215 } 1221 }
1216 } 1222 }
1217 1223
1218 return _PyBytesWriter_Finish(&writer, p); 1224 return _PyBytesWriter_Finish(&writer, p);
1219 1225
1220 failed: 1226 failed:
1221 _PyBytesWriter_Dealloc(&writer); 1227 _PyBytesWriter_Dealloc(&writer);
1222 return NULL; 1228 return NULL;
1223 } 1229 }
1224 1230
1231 PyObject *PyBytes_DecodeEscape(const char *s,
1232 Py_ssize_t len,
1233 const char *errors,
1234 Py_ssize_t unicode,
1235 const char *recode_encoding)
1236 {
1237 const char* first_invalid_escape;
1238 PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1239 recode_encoding,
1240 &first_invalid_escape);
1241 if (result == NULL)
1242 return NULL;
1243 if (first_invalid_escape != NULL) {
1244 if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1245 "invalid escape sequence '\\%c'",
1246 *first_invalid_escape) < 0) {
1247 Py_DECREF(result);
1248 return NULL;
1249 }
1250 }
1251 return result;
1252
1253 }
1225 /* -------------------------------------------------------------------- */ 1254 /* -------------------------------------------------------------------- */
1226 /* object api */ 1255 /* object api */
1227 1256
1228 Py_ssize_t 1257 Py_ssize_t
1229 PyBytes_Size(PyObject *op) 1258 PyBytes_Size(PyObject *op)
1230 { 1259 {
1231 if (!PyBytes_Check(op)) { 1260 if (!PyBytes_Check(op)) {
1232 PyErr_Format(PyExc_TypeError, 1261 PyErr_Format(PyExc_TypeError,
1233 "expected bytes, %.200s found", Py_TYPE(op)->tp_name); 1262 "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1234 return -1; 1263 return -1;
(...skipping 2144 matching lines...) Expand 10 before | Expand all | Expand 10 after
3379 3408
3380 str = _PyBytesWriter_Prepare(writer, str, size); 3409 str = _PyBytesWriter_Prepare(writer, str, size);
3381 if (str == NULL) 3410 if (str == NULL)
3382 return NULL; 3411 return NULL;
3383 3412
3384 memcpy(str, bytes, size); 3413 memcpy(str, bytes, size);
3385 str += size; 3414 str += size;
3386 3415
3387 return str; 3416 return str;
3388 } 3417 }
OLDNEW
« no previous file with comments | « Lib/test/test_unicode.py ('k') | Objects/unicodeobject.c » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+