File: | Parser/tokenizer.c |
Location: | line 1621, column 17 |
Description: | Value stored to 'c' is never read |
1 | |
2 | /* Tokenizer implementation */ |
3 | |
4 | #include "Python.h" |
5 | #include "pgenheaders.h" |
6 | |
7 | #include <ctype.h> |
8 | #include <assert.h> |
9 | |
10 | #include "tokenizer.h" |
11 | #include "errcode.h" |
12 | |
13 | #ifndef PGEN |
14 | #include "unicodeobject.h" |
15 | #include "bytesobject.h" |
16 | #include "fileobject.h" |
17 | #include "codecs.h" |
18 | #include "abstract.h" |
19 | #endif /* PGEN */ |
20 | |
21 | #define is_potential_identifier_start(c)( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 128)) (\ |
22 | (c >= 'a' && c <= 'z')\ |
23 | || (c >= 'A' && c <= 'Z')\ |
24 | || c == '_'\ |
25 | || (c >= 128)) |
26 | |
27 | #define is_potential_identifier_char(c)( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || (c >= 128)) (\ |
28 | (c >= 'a' && c <= 'z')\ |
29 | || (c >= 'A' && c <= 'Z')\ |
30 | || (c >= '0' && c <= '9')\ |
31 | || c == '_'\ |
32 | || (c >= 128)) |
33 | |
34 | extern char *PyOS_Readline(FILE *, FILE *, char *); |
35 | /* Return malloc'ed string including trailing \n; |
36 | empty malloc'ed string for EOF; |
37 | NULL if interrupted */ |
38 | |
39 | /* Don't ever change this -- it would break the portability of Python code */ |
40 | #define TABSIZE8 8 |
41 | |
42 | /* Forward */ |
43 | static struct tok_state *tok_new(void); |
44 | static int tok_nextc(struct tok_state *tok); |
45 | static void tok_backup(struct tok_state *tok, int c); |
46 | |
47 | |
48 | /* Token names */ |
49 | |
50 | char *_PyParser_TokenNames[] = { |
51 | "ENDMARKER", |
52 | "NAME", |
53 | "NUMBER", |
54 | "STRING", |
55 | "NEWLINE", |
56 | "INDENT", |
57 | "DEDENT", |
58 | "LPAR", |
59 | "RPAR", |
60 | "LSQB", |
61 | "RSQB", |
62 | "COLON", |
63 | "COMMA", |
64 | "SEMI", |
65 | "PLUS", |
66 | "MINUS", |
67 | "STAR", |
68 | "SLASH", |
69 | "VBAR", |
70 | "AMPER", |
71 | "LESS", |
72 | "GREATER", |
73 | "EQUAL", |
74 | "DOT", |
75 | "PERCENT", |
76 | "LBRACE", |
77 | "RBRACE", |
78 | "EQEQUAL", |
79 | "NOTEQUAL", |
80 | "LESSEQUAL", |
81 | "GREATEREQUAL", |
82 | "TILDE", |
83 | "CIRCUMFLEX", |
84 | "LEFTSHIFT", |
85 | "RIGHTSHIFT", |
86 | "DOUBLESTAR", |
87 | "PLUSEQUAL", |
88 | "MINEQUAL", |
89 | "STAREQUAL", |
90 | "SLASHEQUAL", |
91 | "PERCENTEQUAL", |
92 | "AMPEREQUAL", |
93 | "VBAREQUAL", |
94 | "CIRCUMFLEXEQUAL", |
95 | "LEFTSHIFTEQUAL", |
96 | "RIGHTSHIFTEQUAL", |
97 | "DOUBLESTAREQUAL", |
98 | "DOUBLESLASH", |
99 | "DOUBLESLASHEQUAL", |
100 | "AT", |
101 | "RARROW", |
102 | "ELLIPSIS", |
103 | /* This table must match the #defines in token.h! */ |
104 | "OP", |
105 | "<ERRORTOKEN>", |
106 | "<N_TOKENS>" |
107 | }; |
108 | |
109 | |
110 | /* Create and initialize a new tok_state structure */ |
111 | |
112 | static struct tok_state * |
113 | tok_new(void) |
114 | { |
115 | struct tok_state *tok = (struct tok_state *)PyMem_MALLOC_PyMem_DebugMalloc( |
116 | sizeof(struct tok_state)); |
117 | if (tok == NULL((void *)0)) |
118 | return NULL((void *)0); |
119 | tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL((void *)0); |
120 | tok->done = E_OK10; |
121 | tok->fp = NULL((void *)0); |
122 | tok->input = NULL((void *)0); |
123 | tok->tabsize = TABSIZE8; |
124 | tok->indent = 0; |
125 | tok->indstack[0] = 0; |
126 | tok->atbol = 1; |
127 | tok->pendin = 0; |
128 | tok->prompt = tok->nextprompt = NULL((void *)0); |
129 | tok->lineno = 0; |
130 | tok->level = 0; |
131 | tok->filename = NULL((void *)0); |
132 | tok->altwarning = 1; |
133 | tok->alterror = 1; |
134 | tok->alttabsize = 1; |
135 | tok->altindstack[0] = 0; |
136 | tok->decoding_state = STATE_INIT; |
137 | tok->decoding_erred = 0; |
138 | tok->read_coding_spec = 0; |
139 | tok->enc = NULL((void *)0); |
140 | tok->encoding = NULL((void *)0); |
141 | tok->cont_line = 0; |
142 | #ifndef PGEN |
143 | tok->decoding_readline = NULL((void *)0); |
144 | tok->decoding_buffer = NULL((void *)0); |
145 | #endif |
146 | return tok; |
147 | } |
148 | |
149 | static char * |
150 | new_string(const char *s, Py_ssize_t len) |
151 | { |
152 | char* result = (char *)PyMem_MALLOC_PyMem_DebugMalloc(len + 1); |
153 | if (result != NULL((void *)0)) { |
154 | memcpy(result, s, len)((__builtin_object_size (result, 0) != (size_t) -1) ? __builtin___memcpy_chk (result, s, len, __builtin_object_size (result, 0)) : __inline_memcpy_chk (result, s, len)); |
155 | result[len] = '\0'; |
156 | } |
157 | return result; |
158 | } |
159 | |
160 | #ifdef PGEN |
161 | |
162 | static char * |
163 | decoding_fgets(char *s, int size, struct tok_state *tok) |
164 | { |
165 | return fgets(s, size, tok->fp); |
166 | } |
167 | |
168 | static int |
169 | decoding_feof(struct tok_state *tok) |
170 | { |
171 | return feof(tok->fp); |
172 | } |
173 | |
174 | static char * |
175 | decode_str(const char *str, int exec_input, struct tok_state *tok) |
176 | { |
177 | return new_string(str, strlen(str)); |
178 | } |
179 | |
180 | #else /* PGEN */ |
181 | |
182 | static char * |
183 | error_ret(struct tok_state *tok) /* XXX */ |
184 | { |
185 | tok->decoding_erred = 1; |
186 | if (tok->fp != NULL((void *)0) && tok->buf != NULL((void *)0)) /* see PyTokenizer_Free */ |
187 | PyMem_FREE_PyMem_DebugFree(tok->buf); |
188 | tok->buf = NULL((void *)0); |
189 | return NULL((void *)0); /* as if it were EOF */ |
190 | } |
191 | |
192 | |
193 | static char * |
194 | get_normal_name(char *s) /* for utf-8 and latin-1 */ |
195 | { |
196 | char buf[13]; |
197 | int i; |
198 | for (i = 0; i < 12; i++) { |
199 | int c = s[i]; |
200 | if (c == '\0') |
201 | break; |
202 | else if (c == '_') |
203 | buf[i] = '-'; |
204 | else |
205 | buf[i] = tolower(c)towlower(btowc(c)); |
206 | } |
207 | buf[i] = '\0'; |
208 | if (strcmp(buf, "utf-8") == 0 || |
209 | strncmp(buf, "utf-8-", 6) == 0) |
210 | return "utf-8"; |
211 | else if (strcmp(buf, "latin-1") == 0 || |
212 | strcmp(buf, "iso-8859-1") == 0 || |
213 | strcmp(buf, "iso-latin-1") == 0 || |
214 | strncmp(buf, "latin-1-", 8) == 0 || |
215 | strncmp(buf, "iso-8859-1-", 11) == 0 || |
216 | strncmp(buf, "iso-latin-1-", 12) == 0) |
217 | return "iso-8859-1"; |
218 | else |
219 | return s; |
220 | } |
221 | |
222 | /* Return the coding spec in S, or NULL if none is found. */ |
223 | |
224 | static char * |
225 | get_coding_spec(const char *s, Py_ssize_t size) |
226 | { |
227 | Py_ssize_t i; |
228 | /* Coding spec must be in a comment, and that comment must be |
229 | * the only statement on the source code line. */ |
230 | for (i = 0; i < size - 6; i++) { |
231 | if (s[i] == '#') |
232 | break; |
233 | if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') |
234 | return NULL((void *)0); |
235 | } |
236 | for (; i < size - 6; i++) { /* XXX inefficient search */ |
237 | const char* t = s + i; |
238 | if (strncmp(t, "coding", 6) == 0) { |
239 | const char* begin = NULL((void *)0); |
240 | t += 6; |
241 | if (t[0] != ':' && t[0] != '=') |
242 | continue; |
243 | do { |
244 | t++; |
245 | } while (t[0] == '\x20' || t[0] == '\t'); |
246 | |
247 | begin = t; |
248 | while (Py_ISALNUM(t[0])(_Py_ctype_table[((unsigned char)((t[0]) & 0xff))] & ( (0x01|0x02)|0x04)) || |
249 | t[0] == '-' || t[0] == '_' || t[0] == '.') |
250 | t++; |
251 | |
252 | if (begin < t) { |
253 | char* r = new_string(begin, t - begin); |
254 | char* q = get_normal_name(r); |
255 | if (r != q) { |
256 | PyMem_FREE_PyMem_DebugFree(r); |
257 | r = new_string(q, strlen(q)); |
258 | } |
259 | return r; |
260 | } |
261 | } |
262 | } |
263 | return NULL((void *)0); |
264 | } |
265 | |
266 | /* Check whether the line contains a coding spec. If it does, |
267 | invoke the set_readline function for the new encoding. |
268 | This function receives the tok_state and the new encoding. |
269 | Return 1 on success, 0 on failure. */ |
270 | |
271 | static int |
272 | check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, |
273 | int set_readline(struct tok_state *, const char *)) |
274 | { |
275 | char * cs; |
276 | int r = 1; |
277 | |
278 | if (tok->cont_line) |
279 | /* It's a continuation line, so it can't be a coding spec. */ |
280 | return 1; |
281 | cs = get_coding_spec(line, size); |
282 | if (cs != NULL((void *)0)) { |
283 | tok->read_coding_spec = 1; |
284 | if (tok->encoding == NULL((void *)0)) { |
285 | assert(tok->decoding_state == STATE_RAW)(__builtin_expect(!(tok->decoding_state == STATE_RAW), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c", 285, "tok->decoding_state == STATE_RAW" ) : (void)0); |
286 | if (strcmp(cs, "utf-8") == 0) { |
287 | tok->encoding = cs; |
288 | } else { |
289 | r = set_readline(tok, cs); |
290 | if (r) { |
291 | tok->encoding = cs; |
292 | tok->decoding_state = STATE_NORMAL; |
293 | } |
294 | else |
295 | PyMem_FREE_PyMem_DebugFree(cs); |
296 | } |
297 | } else { /* then, compare cs with BOM */ |
298 | r = (strcmp(tok->encoding, cs) == 0); |
299 | PyMem_FREE_PyMem_DebugFree(cs); |
300 | } |
301 | } |
302 | if (!r) { |
303 | cs = tok->encoding; |
304 | if (!cs) |
305 | cs = "with BOM"; |
306 | PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); |
307 | } |
308 | return r; |
309 | } |
310 | |
311 | /* See whether the file starts with a BOM. If it does, |
312 | invoke the set_readline function with the new encoding. |
313 | Return 1 on success, 0 on failure. */ |
314 | |
315 | static int |
316 | check_bom(int get_char(struct tok_state *), |
317 | void unget_char(int, struct tok_state *), |
318 | int set_readline(struct tok_state *, const char *), |
319 | struct tok_state *tok) |
320 | { |
321 | int ch1, ch2, ch3; |
322 | ch1 = get_char(tok); |
323 | tok->decoding_state = STATE_RAW; |
324 | if (ch1 == EOF(-1)) { |
325 | return 1; |
326 | } else if (ch1 == 0xEF) { |
327 | ch2 = get_char(tok); |
328 | if (ch2 != 0xBB) { |
329 | unget_char(ch2, tok); |
330 | unget_char(ch1, tok); |
331 | return 1; |
332 | } |
333 | ch3 = get_char(tok); |
334 | if (ch3 != 0xBF) { |
335 | unget_char(ch3, tok); |
336 | unget_char(ch2, tok); |
337 | unget_char(ch1, tok); |
338 | return 1; |
339 | } |
340 | #if 0 |
341 | /* Disable support for UTF-16 BOMs until a decision |
342 | is made whether this needs to be supported. */ |
343 | } else if (ch1 == 0xFE) { |
344 | ch2 = get_char(tok); |
345 | if (ch2 != 0xFF) { |
346 | unget_char(ch2, tok); |
347 | unget_char(ch1, tok); |
348 | return 1; |
349 | } |
350 | if (!set_readline(tok, "utf-16-be")) |
351 | return 0; |
352 | tok->decoding_state = STATE_NORMAL; |
353 | } else if (ch1 == 0xFF) { |
354 | ch2 = get_char(tok); |
355 | if (ch2 != 0xFE) { |
356 | unget_char(ch2, tok); |
357 | unget_char(ch1, tok); |
358 | return 1; |
359 | } |
360 | if (!set_readline(tok, "utf-16-le")) |
361 | return 0; |
362 | tok->decoding_state = STATE_NORMAL; |
363 | #endif |
364 | } else { |
365 | unget_char(ch1, tok); |
366 | return 1; |
367 | } |
368 | if (tok->encoding != NULL((void *)0)) |
369 | PyMem_FREE_PyMem_DebugFree(tok->encoding); |
370 | tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ |
371 | /* No need to set_readline: input is already utf-8 */ |
372 | return 1; |
373 | } |
374 | |
375 | /* Read a line of text from TOK into S, using the stream in TOK. |
376 | Return NULL on failure, else S. |
377 | |
378 | On entry, tok->decoding_buffer will be one of: |
379 | 1) NULL: need to call tok->decoding_readline to get a new line |
380 | 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and |
381 | stored the result in tok->decoding_buffer |
382 | 3) PyByteArrayObject *: previous call to fp_readl did not have enough room |
383 | (in the s buffer) to copy entire contents of the line read |
384 | by tok->decoding_readline. tok->decoding_buffer has the overflow. |
385 | In this case, fp_readl is called in a loop (with an expanded buffer) |
386 | until the buffer ends with a '\n' (or until the end of the file is |
387 | reached): see tok_nextc and its calls to decoding_fgets. |
388 | */ |
389 | |
390 | static char * |
391 | fp_readl(char *s, int size, struct tok_state *tok) |
392 | { |
393 | PyObject* bufobj; |
394 | const char *buf; |
395 | Py_ssize_t buflen; |
396 | |
397 | /* Ask for one less byte so we can terminate it */ |
398 | assert(size > 0)(__builtin_expect(!(size > 0), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c", 398, "size > 0") : (void)0); |
399 | size--; |
400 | |
401 | if (tok->decoding_buffer) { |
402 | bufobj = tok->decoding_buffer; |
403 | Py_INCREF(bufobj)( _Py_RefTotal++ , ((PyObject*)(bufobj))->ob_refcnt++); |
404 | } |
405 | else |
406 | { |
407 | bufobj = PyObject_CallObject(tok->decoding_readline, NULL((void *)0)); |
408 | if (bufobj == NULL((void *)0)) |
409 | goto error; |
410 | } |
411 | if (PyUnicode_CheckExact(bufobj)((((PyObject*)(bufobj))->ob_type) == &PyUnicode_Type)) |
412 | { |
413 | buf = _PyUnicode_AsStringAndSize(bufobj, &buflen); |
414 | if (buf == NULL((void *)0)) { |
415 | goto error; |
416 | } |
417 | } |
418 | else |
419 | { |
420 | buf = PyByteArray_AsString(bufobj); |
421 | if (buf == NULL((void *)0)) { |
422 | goto error; |
423 | } |
424 | buflen = PyByteArray_GET_SIZE(bufobj)((__builtin_expect(!(((((PyObject*)(bufobj))->ob_type) == ( &PyByteArray_Type) || PyType_IsSubtype((((PyObject*)(bufobj ))->ob_type), (&PyByteArray_Type)))), 0) ? __assert_rtn (__func__, "Parser/tokenizer.c", 424, "PyByteArray_Check(bufobj)" ) : (void)0),(((PyVarObject*)(bufobj))->ob_size)); |
425 | } |
426 | |
427 | Py_XDECREF(tok->decoding_buffer)do { if ((tok->decoding_buffer) == ((void *)0)) ; else do { if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_buffer) )->ob_refcnt != 0) { if (((PyObject*)tok->decoding_buffer )->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 427, (PyObject *)(tok->decoding_buffer)); } else _Py_Dealloc ((PyObject *)(tok->decoding_buffer)); } while (0); } while (0); |
428 | if (buflen > size) { |
429 | /* Too many chars, the rest goes into tok->decoding_buffer */ |
430 | tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size, |
431 | buflen-size); |
432 | if (tok->decoding_buffer == NULL((void *)0)) |
433 | goto error; |
434 | buflen = size; |
435 | } |
436 | else |
437 | tok->decoding_buffer = NULL((void *)0); |
438 | |
439 | memcpy(s, buf, buflen)((__builtin_object_size (s, 0) != (size_t) -1) ? __builtin___memcpy_chk (s, buf, buflen, __builtin_object_size (s, 0)) : __inline_memcpy_chk (s, buf, buflen)); |
440 | s[buflen] = '\0'; |
441 | if (buflen == 0) /* EOF */ |
442 | s = NULL((void *)0); |
443 | Py_DECREF(bufobj)do { if (_Py_RefTotal-- , --((PyObject*)(bufobj))->ob_refcnt != 0) { if (((PyObject*)bufobj)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 443, (PyObject *)(bufobj)); } else _Py_Dealloc ((PyObject *)(bufobj)); } while (0); |
444 | return s; |
445 | |
446 | error: |
447 | Py_XDECREF(bufobj)do { if ((bufobj) == ((void *)0)) ; else do { if (_Py_RefTotal -- , --((PyObject*)(bufobj))->ob_refcnt != 0) { if (((PyObject *)bufobj)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 447, (PyObject *)(bufobj)); } else _Py_Dealloc((PyObject *) (bufobj)); } while (0); } while (0); |
448 | return error_ret(tok); |
449 | } |
450 | |
451 | /* Set the readline function for TOK to a StreamReader's |
452 | readline function. The StreamReader is named ENC. |
453 | |
454 | This function is called from check_bom and check_coding_spec. |
455 | |
456 | ENC is usually identical to the future value of tok->encoding, |
457 | except for the (currently unsupported) case of UTF-16. |
458 | |
459 | Return 1 on success, 0 on failure. */ |
460 | |
461 | static int |
462 | fp_setreadl(struct tok_state *tok, const char* enc) |
463 | { |
464 | PyObject *readline = NULL((void *)0), *stream = NULL((void *)0), *io = NULL((void *)0); |
465 | int fd; |
466 | |
467 | io = PyImport_ImportModuleNoBlock("io"); |
468 | if (io == NULL((void *)0)) |
469 | goto cleanup; |
470 | |
471 | fd = fileno(tok->fp); |
472 | if (lseek(fd, 0, SEEK_SET0) == (off_t)-1) { |
473 | PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL((void *)0)); |
474 | goto cleanup; |
475 | } |
476 | |
477 | stream = PyObject_CallMethod(io, "open", "isisOOO", |
478 | fd, "r", -1, enc, Py_None(&_Py_NoneStruct), Py_None(&_Py_NoneStruct), Py_False((PyObject *) &_Py_FalseStruct)); |
479 | if (stream == NULL((void *)0)) |
480 | goto cleanup; |
481 | |
482 | Py_XDECREF(tok->decoding_readline)do { if ((tok->decoding_readline) == ((void *)0)) ; else do { if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_readline ))->ob_refcnt != 0) { if (((PyObject*)tok->decoding_readline )->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 482, (PyObject *)(tok->decoding_readline)); } else _Py_Dealloc ((PyObject *)(tok->decoding_readline)); } while (0); } while (0); |
483 | readline = PyObject_GetAttrString(stream, "readline"); |
484 | tok->decoding_readline = readline; |
485 | |
486 | /* The file has been reopened; parsing will restart from |
487 | * the beginning of the file, we have to reset the line number. |
488 | * But this function has been called from inside tok_nextc() which |
489 | * will increment lineno before it returns. So we set it -1 so that |
490 | * the next call to tok_nextc() will start with tok->lineno == 0. |
491 | */ |
492 | tok->lineno = -1; |
493 | |
494 | cleanup: |
495 | Py_XDECREF(stream)do { if ((stream) == ((void *)0)) ; else do { if (_Py_RefTotal -- , --((PyObject*)(stream))->ob_refcnt != 0) { if (((PyObject *)stream)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 495, (PyObject *)(stream)); } else _Py_Dealloc((PyObject *) (stream)); } while (0); } while (0); |
496 | Py_XDECREF(io)do { if ((io) == ((void *)0)) ; else do { if (_Py_RefTotal-- , --((PyObject*)(io))->ob_refcnt != 0) { if (((PyObject*)io )->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 496, (PyObject *)(io)); } else _Py_Dealloc((PyObject *)(io) ); } while (0); } while (0); |
497 | return readline != NULL((void *)0); |
498 | } |
499 | |
500 | /* Fetch the next byte from TOK. */ |
501 | |
502 | static int fp_getc(struct tok_state *tok) { |
503 | return getc(tok->fp); |
504 | } |
505 | |
506 | /* Unfetch the last byte back into TOK. */ |
507 | |
508 | static void fp_ungetc(int c, struct tok_state *tok) { |
509 | ungetc(c, tok->fp); |
510 | } |
511 | |
512 | /* Check whether the characters at s start a valid |
513 | UTF-8 sequence. Return the number of characters forming |
514 | the sequence if yes, 0 if not. */ |
515 | static int valid_utf8(const unsigned char* s) |
516 | { |
517 | int expected = 0; |
518 | int length; |
519 | if (*s < 0x80) |
520 | /* single-byte code */ |
521 | return 1; |
522 | if (*s < 0xc0) |
523 | /* following byte */ |
524 | return 0; |
525 | if (*s < 0xE0) |
526 | expected = 1; |
527 | else if (*s < 0xF0) |
528 | expected = 2; |
529 | else if (*s < 0xF8) |
530 | expected = 3; |
531 | else |
532 | return 0; |
533 | length = expected + 1; |
534 | for (; expected; expected--) |
535 | if (s[expected] < 0x80 || s[expected] >= 0xC0) |
536 | return 0; |
537 | return length; |
538 | } |
539 | |
540 | /* Read a line of input from TOK. Determine encoding |
541 | if necessary. */ |
542 | |
543 | static char * |
544 | decoding_fgets(char *s, int size, struct tok_state *tok) |
545 | { |
546 | char *line = NULL((void *)0); |
547 | int badchar = 0; |
548 | PyObject *filename; |
549 | for (;;) { |
550 | if (tok->decoding_state == STATE_NORMAL) { |
551 | /* We already have a codec associated with |
552 | this input. */ |
553 | line = fp_readl(s, size, tok); |
554 | break; |
555 | } else if (tok->decoding_state == STATE_RAW) { |
556 | /* We want a 'raw' read. */ |
557 | line = Py_UniversalNewlineFgets(s, size, |
558 | tok->fp, NULL((void *)0)); |
559 | break; |
560 | } else { |
561 | /* We have not yet determined the encoding. |
562 | If an encoding is found, use the file-pointer |
563 | reader functions from now on. */ |
564 | if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) |
565 | return error_ret(tok); |
566 | assert(tok->decoding_state != STATE_INIT)(__builtin_expect(!(tok->decoding_state != STATE_INIT), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c", 566, "tok->decoding_state != STATE_INIT" ) : (void)0); |
567 | } |
568 | } |
569 | if (line != NULL((void *)0) && tok->lineno < 2 && !tok->read_coding_spec) { |
570 | if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { |
571 | return error_ret(tok); |
572 | } |
573 | } |
574 | #ifndef PGEN |
575 | /* The default encoding is UTF-8, so make sure we don't have any |
576 | non-UTF-8 sequences in it. */ |
577 | if (line && !tok->encoding) { |
578 | unsigned char *c; |
579 | int length; |
580 | for (c = (unsigned char *)line; *c; c += length) |
581 | if (!(length = valid_utf8(c))) { |
582 | badchar = *c; |
583 | break; |
584 | } |
585 | } |
586 | if (badchar) { |
587 | /* Need to add 1 to the line number, since this line |
588 | has not been counted, yet. */ |
589 | filename = PyUnicode_DecodeFSDefaultPyUnicodeUCS2_DecodeFSDefault(tok->filename); |
590 | if (filename != NULL((void *)0)) { |
591 | PyErr_Format(PyExc_SyntaxError, |
592 | "Non-UTF-8 code starting with '\\x%.2x' " |
593 | "in file %U on line %i, " |
594 | "but no encoding declared; " |
595 | "see http://python.org/dev/peps/pep-0263/ for details", |
596 | badchar, filename, tok->lineno + 1); |
597 | Py_DECREF(filename)do { if (_Py_RefTotal-- , --((PyObject*)(filename))->ob_refcnt != 0) { if (((PyObject*)filename)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 597, (PyObject *)(filename)); } else _Py_Dealloc ((PyObject *)(filename)); } while (0); |
598 | } |
599 | return error_ret(tok); |
600 | } |
601 | #endif |
602 | return line; |
603 | } |
604 | |
605 | static int |
606 | decoding_feof(struct tok_state *tok) |
607 | { |
608 | if (tok->decoding_state != STATE_NORMAL) { |
609 | return feof(tok->fp); |
610 | } else { |
611 | PyObject* buf = tok->decoding_buffer; |
612 | if (buf == NULL((void *)0)) { |
613 | buf = PyObject_CallObject(tok->decoding_readline, NULL((void *)0)); |
614 | if (buf == NULL((void *)0)) { |
615 | error_ret(tok); |
616 | return 1; |
617 | } else { |
618 | tok->decoding_buffer = buf; |
619 | } |
620 | } |
621 | return PyObject_LengthPyObject_Size(buf) == 0; |
622 | } |
623 | } |
624 | |
625 | /* Fetch a byte from TOK, using the string buffer. */ |
626 | |
627 | static int |
628 | buf_getc(struct tok_state *tok) { |
629 | return Py_CHARMASK(*tok->str++)((unsigned char)((*tok->str++) & 0xff)); |
630 | } |
631 | |
632 | /* Unfetch a byte from TOK, using the string buffer. */ |
633 | |
634 | static void |
635 | buf_ungetc(int c, struct tok_state *tok) { |
636 | tok->str--; |
637 | assert(Py_CHARMASK(*tok->str) == c)(__builtin_expect(!(((unsigned char)((*tok->str) & 0xff )) == c), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c", 637 , "Py_CHARMASK(*tok->str) == c") : (void)0); /* tok->cur may point to read-only segment */ |
638 | } |
639 | |
640 | /* Set the readline function for TOK to ENC. For the string-based |
641 | tokenizer, this means to just record the encoding. */ |
642 | |
643 | static int |
644 | buf_setreadl(struct tok_state *tok, const char* enc) { |
645 | tok->enc = enc; |
646 | return 1; |
647 | } |
648 | |
649 | /* Return a UTF-8 encoding Python string object from the |
650 | C byte string STR, which is encoded with ENC. */ |
651 | |
652 | static PyObject * |
653 | translate_into_utf8(const char* str, const char* enc) { |
654 | PyObject *utf8; |
655 | PyObject* buf = PyUnicode_DecodePyUnicodeUCS2_Decode(str, strlen(str), enc, NULL((void *)0)); |
656 | if (buf == NULL((void *)0)) |
657 | return NULL((void *)0); |
658 | utf8 = PyUnicode_AsUTF8StringPyUnicodeUCS2_AsUTF8String(buf); |
659 | Py_DECREF(buf)do { if (_Py_RefTotal-- , --((PyObject*)(buf))->ob_refcnt != 0) { if (((PyObject*)buf)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 659, (PyObject *)(buf)); } else _Py_Dealloc ((PyObject *)(buf)); } while (0); |
660 | return utf8; |
661 | } |
662 | |
663 | |
664 | static char * |
665 | translate_newlines(const char *s, int exec_input, struct tok_state *tok) { |
666 | int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; |
667 | char *buf, *current; |
668 | char c = '\0'; |
669 | buf = PyMem_MALLOC_PyMem_DebugMalloc(needed_length); |
670 | if (buf == NULL((void *)0)) { |
671 | tok->done = E_NOMEM15; |
672 | return NULL((void *)0); |
673 | } |
674 | for (current = buf; *s; s++, current++) { |
675 | c = *s; |
676 | if (skip_next_lf) { |
677 | skip_next_lf = 0; |
678 | if (c == '\n') { |
679 | c = *++s; |
680 | if (!c) |
681 | break; |
682 | } |
683 | } |
684 | if (c == '\r') { |
685 | skip_next_lf = 1; |
686 | c = '\n'; |
687 | } |
688 | *current = c; |
689 | } |
690 | /* If this is exec input, add a newline to the end of the string if |
691 | there isn't one already. */ |
692 | if (exec_input && c != '\n') { |
693 | *current = '\n'; |
694 | current++; |
695 | } |
696 | *current = '\0'; |
697 | final_length = current - buf + 1; |
698 | if (final_length < needed_length && final_length) |
699 | /* should never fail */ |
700 | buf = PyMem_REALLOC_PyMem_DebugRealloc(buf, final_length); |
701 | return buf; |
702 | } |
703 | |
704 | /* Decode a byte string STR for use as the buffer of TOK. |
705 | Look for encoding declarations inside STR, and record them |
706 | inside TOK. */ |
707 | |
708 | static const char * |
709 | decode_str(const char *input, int single, struct tok_state *tok) |
710 | { |
711 | PyObject* utf8 = NULL((void *)0); |
712 | const char *str; |
713 | const char *s; |
714 | const char *newl[2] = {NULL((void *)0), NULL((void *)0)}; |
715 | int lineno = 0; |
716 | tok->input = str = translate_newlines(input, single, tok); |
717 | if (str == NULL((void *)0)) |
718 | return NULL((void *)0); |
719 | tok->enc = NULL((void *)0); |
720 | tok->str = str; |
721 | if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) |
722 | return error_ret(tok); |
723 | str = tok->str; /* string after BOM if any */ |
724 | assert(str)(__builtin_expect(!(str), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c" , 724, "str") : (void)0); |
725 | if (tok->enc != NULL((void *)0)) { |
726 | utf8 = translate_into_utf8(str, tok->enc); |
727 | if (utf8 == NULL((void *)0)) |
728 | return error_ret(tok); |
729 | str = PyBytes_AsString(utf8); |
730 | } |
731 | for (s = str;; s++) { |
732 | if (*s == '\0') break; |
733 | else if (*s == '\n') { |
734 | assert(lineno < 2)(__builtin_expect(!(lineno < 2), 0) ? __assert_rtn(__func__ , "Parser/tokenizer.c", 734, "lineno < 2") : (void)0); |
735 | newl[lineno] = s; |
736 | lineno++; |
737 | if (lineno == 2) break; |
738 | } |
739 | } |
740 | tok->enc = NULL((void *)0); |
741 | /* need to check line 1 and 2 separately since check_coding_spec |
742 | assumes a single line as input */ |
743 | if (newl[0]) { |
744 | if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) |
745 | return error_ret(tok); |
746 | if (tok->enc == NULL((void *)0) && newl[1]) { |
747 | if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], |
748 | tok, buf_setreadl)) |
749 | return error_ret(tok); |
750 | } |
751 | } |
752 | if (tok->enc != NULL((void *)0)) { |
753 | assert(utf8 == NULL)(__builtin_expect(!(utf8 == ((void *)0)), 0) ? __assert_rtn(__func__ , "Parser/tokenizer.c", 753, "utf8 == NULL") : (void)0); |
754 | utf8 = translate_into_utf8(str, tok->enc); |
755 | if (utf8 == NULL((void *)0)) |
756 | return error_ret(tok); |
757 | str = PyBytes_AS_STRING(utf8)((__builtin_expect(!(((((((PyObject*)(utf8))->ob_type))-> tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__ , "Parser/tokenizer.c", 757, "PyBytes_Check(utf8)") : (void)0 ), (((PyBytesObject *)(utf8))->ob_sval)); |
758 | } |
759 | assert(tok->decoding_buffer == NULL)(__builtin_expect(!(tok->decoding_buffer == ((void *)0)), 0 ) ? __assert_rtn(__func__, "Parser/tokenizer.c", 759, "tok->decoding_buffer == NULL" ) : (void)0); |
760 | tok->decoding_buffer = utf8; /* CAUTION */ |
761 | return str; |
762 | } |
763 | |
764 | #endif /* PGEN */ |
765 | |
766 | /* Set up tokenizer for string */ |
767 | |
768 | struct tok_state * |
769 | PyTokenizer_FromString(const char *str, int exec_input) |
770 | { |
771 | struct tok_state *tok = tok_new(); |
772 | if (tok == NULL((void *)0)) |
773 | return NULL((void *)0); |
774 | str = (char *)decode_str(str, exec_input, tok); |
775 | if (str == NULL((void *)0)) { |
776 | PyTokenizer_Free(tok); |
777 | return NULL((void *)0); |
778 | } |
779 | |
780 | /* XXX: constify members. */ |
781 | tok->buf = tok->cur = tok->end = tok->inp = (char*)str; |
782 | return tok; |
783 | } |
784 | |
785 | struct tok_state * |
786 | PyTokenizer_FromUTF8(const char *str, int exec_input) |
787 | { |
788 | struct tok_state *tok = tok_new(); |
789 | if (tok == NULL((void *)0)) |
790 | return NULL((void *)0); |
791 | #ifndef PGEN |
792 | tok->input = str = translate_newlines(str, exec_input, tok); |
793 | #endif |
794 | if (str == NULL((void *)0)) { |
795 | PyTokenizer_Free(tok); |
796 | return NULL((void *)0); |
797 | } |
798 | tok->decoding_state = STATE_RAW; |
799 | tok->read_coding_spec = 1; |
800 | tok->enc = NULL((void *)0); |
801 | tok->str = str; |
802 | tok->encoding = (char *)PyMem_MALLOC_PyMem_DebugMalloc(6); |
803 | if (!tok->encoding) { |
804 | PyTokenizer_Free(tok); |
805 | return NULL((void *)0); |
806 | } |
807 | strcpy(tok->encoding, "utf-8")((__builtin_object_size (tok->encoding, 0) != (size_t) -1) ? __builtin___strcpy_chk (tok->encoding, "utf-8", __builtin_object_size (tok->encoding, 2 > 1)) : __inline_strcpy_chk (tok-> encoding, "utf-8")); |
808 | |
809 | /* XXX: constify members. */ |
810 | tok->buf = tok->cur = tok->end = tok->inp = (char*)str; |
811 | return tok; |
812 | } |
813 | |
814 | /* Set up tokenizer for file */ |
815 | |
816 | struct tok_state * |
817 | PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2) |
818 | { |
819 | struct tok_state *tok = tok_new(); |
820 | if (tok == NULL((void *)0)) |
821 | return NULL((void *)0); |
822 | if ((tok->buf = (char *)PyMem_MALLOC_PyMem_DebugMalloc(BUFSIZ1024)) == NULL((void *)0)) { |
823 | PyTokenizer_Free(tok); |
824 | return NULL((void *)0); |
825 | } |
826 | tok->cur = tok->inp = tok->buf; |
827 | tok->end = tok->buf + BUFSIZ1024; |
828 | tok->fp = fp; |
829 | tok->prompt = ps1; |
830 | tok->nextprompt = ps2; |
831 | if (enc != NULL((void *)0)) { |
832 | /* Must copy encoding declaration since it |
833 | gets copied into the parse tree. */ |
834 | tok->encoding = PyMem_MALLOC_PyMem_DebugMalloc(strlen(enc)+1); |
835 | if (!tok->encoding) { |
836 | PyTokenizer_Free(tok); |
837 | return NULL((void *)0); |
838 | } |
839 | strcpy(tok->encoding, enc)((__builtin_object_size (tok->encoding, 0) != (size_t) -1) ? __builtin___strcpy_chk (tok->encoding, enc, __builtin_object_size (tok->encoding, 2 > 1)) : __inline_strcpy_chk (tok-> encoding, enc)); |
840 | tok->decoding_state = STATE_NORMAL; |
841 | } |
842 | return tok; |
843 | } |
844 | |
845 | |
846 | /* Free a tok_state structure */ |
847 | |
848 | void |
849 | PyTokenizer_Free(struct tok_state *tok) |
850 | { |
851 | if (tok->encoding != NULL((void *)0)) |
852 | PyMem_FREE_PyMem_DebugFree(tok->encoding); |
853 | #ifndef PGEN |
854 | Py_XDECREF(tok->decoding_readline)do { if ((tok->decoding_readline) == ((void *)0)) ; else do { if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_readline ))->ob_refcnt != 0) { if (((PyObject*)tok->decoding_readline )->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 854, (PyObject *)(tok->decoding_readline)); } else _Py_Dealloc ((PyObject *)(tok->decoding_readline)); } while (0); } while (0); |
855 | Py_XDECREF(tok->decoding_buffer)do { if ((tok->decoding_buffer) == ((void *)0)) ; else do { if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_buffer) )->ob_refcnt != 0) { if (((PyObject*)tok->decoding_buffer )->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c" , 855, (PyObject *)(tok->decoding_buffer)); } else _Py_Dealloc ((PyObject *)(tok->decoding_buffer)); } while (0); } while (0); |
856 | #endif |
857 | if (tok->fp != NULL((void *)0) && tok->buf != NULL((void *)0)) |
858 | PyMem_FREE_PyMem_DebugFree(tok->buf); |
859 | if (tok->input) |
860 | PyMem_FREE_PyMem_DebugFree((char *)tok->input); |
861 | PyMem_FREE_PyMem_DebugFree(tok); |
862 | } |
863 | |
864 | /* Get next char, updating state; error code goes into tok->done */ |
865 | |
866 | static int |
867 | tok_nextc(register struct tok_state *tok) |
868 | { |
869 | for (;;) { |
870 | if (tok->cur != tok->inp) { |
871 | return Py_CHARMASK(*tok->cur++)((unsigned char)((*tok->cur++) & 0xff)); /* Fast path */ |
872 | } |
873 | if (tok->done != E_OK10) |
874 | return EOF(-1); |
875 | if (tok->fp == NULL((void *)0)) { |
876 | char *end = strchr(tok->inp, '\n'); |
877 | if (end != NULL((void *)0)) |
878 | end++; |
879 | else { |
880 | end = strchr(tok->inp, '\0'); |
881 | if (end == tok->inp) { |
882 | tok->done = E_EOF11; |
883 | return EOF(-1); |
884 | } |
885 | } |
886 | if (tok->start == NULL((void *)0)) |
887 | tok->buf = tok->cur; |
888 | tok->line_start = tok->cur; |
889 | tok->lineno++; |
890 | tok->inp = end; |
891 | return Py_CHARMASK(*tok->cur++)((unsigned char)((*tok->cur++) & 0xff)); |
892 | } |
893 | if (tok->prompt != NULL((void *)0)) { |
894 | char *newtok = PyOS_Readline(stdin__stdinp, stdout__stdoutp, tok->prompt); |
895 | #ifndef PGEN |
896 | if (newtok != NULL((void *)0)) { |
897 | char *translated = translate_newlines(newtok, 0, tok); |
898 | PyMem_FREE_PyMem_DebugFree(newtok); |
899 | if (translated == NULL((void *)0)) |
900 | return EOF(-1); |
901 | newtok = translated; |
902 | } |
903 | if (tok->encoding && newtok && *newtok) { |
904 | /* Recode to UTF-8 */ |
905 | Py_ssize_t buflen; |
906 | const char* buf; |
907 | PyObject *u = translate_into_utf8(newtok, tok->encoding); |
908 | PyMem_FREE_PyMem_DebugFree(newtok); |
909 | if (!u) { |
910 | tok->done = E_DECODE22; |
911 | return EOF(-1); |
912 | } |
913 | buflen = PyBytes_GET_SIZE(u)((__builtin_expect(!(((((((PyObject*)(u))->ob_type))->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c" , 913, "PyBytes_Check(u)") : (void)0),(((PyVarObject*)(u))-> ob_size)); |
914 | buf = PyBytes_AS_STRING(u)((__builtin_expect(!(((((((PyObject*)(u))->ob_type))->tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c" , 914, "PyBytes_Check(u)") : (void)0), (((PyBytesObject *)(u) )->ob_sval)); |
915 | if (!buf) { |
916 | Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt != 0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 916, (PyObject *)(u)); } else _Py_Dealloc ((PyObject *)(u)); } while (0); |
917 | tok->done = E_DECODE22; |
918 | return EOF(-1); |
919 | } |
920 | newtok = PyMem_MALLOC_PyMem_DebugMalloc(buflen+1); |
921 | strcpy(newtok, buf)((__builtin_object_size (newtok, 0) != (size_t) -1) ? __builtin___strcpy_chk (newtok, buf, __builtin_object_size (newtok, 2 > 1)) : __inline_strcpy_chk (newtok, buf)); |
922 | Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt != 0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 922, (PyObject *)(u)); } else _Py_Dealloc ((PyObject *)(u)); } while (0); |
923 | } |
924 | #endif |
925 | if (tok->nextprompt != NULL((void *)0)) |
926 | tok->prompt = tok->nextprompt; |
927 | if (newtok == NULL((void *)0)) |
928 | tok->done = E_INTR12; |
929 | else if (*newtok == '\0') { |
930 | PyMem_FREE_PyMem_DebugFree(newtok); |
931 | tok->done = E_EOF11; |
932 | } |
933 | else if (tok->start != NULL((void *)0)) { |
934 | size_t start = tok->start - tok->buf; |
935 | size_t oldlen = tok->cur - tok->buf; |
936 | size_t newlen = oldlen + strlen(newtok); |
937 | char *buf = tok->buf; |
938 | buf = (char *)PyMem_REALLOC_PyMem_DebugRealloc(buf, newlen+1); |
939 | tok->lineno++; |
940 | if (buf == NULL((void *)0)) { |
941 | PyMem_FREE_PyMem_DebugFree(tok->buf); |
942 | tok->buf = NULL((void *)0); |
943 | PyMem_FREE_PyMem_DebugFree(newtok); |
944 | tok->done = E_NOMEM15; |
945 | return EOF(-1); |
946 | } |
947 | tok->buf = buf; |
948 | tok->cur = tok->buf + oldlen; |
949 | tok->line_start = tok->cur; |
950 | strcpy(tok->buf + oldlen, newtok)((__builtin_object_size (tok->buf + oldlen, 0) != (size_t) -1) ? __builtin___strcpy_chk (tok->buf + oldlen, newtok, __builtin_object_size (tok->buf + oldlen, 2 > 1)) : __inline_strcpy_chk (tok ->buf + oldlen, newtok)); |
951 | PyMem_FREE_PyMem_DebugFree(newtok); |
952 | tok->inp = tok->buf + newlen; |
953 | tok->end = tok->inp + 1; |
954 | tok->start = tok->buf + start; |
955 | } |
956 | else { |
957 | tok->lineno++; |
958 | if (tok->buf != NULL((void *)0)) |
959 | PyMem_FREE_PyMem_DebugFree(tok->buf); |
960 | tok->buf = newtok; |
961 | tok->line_start = tok->buf; |
962 | tok->cur = tok->buf; |
963 | tok->line_start = tok->buf; |
964 | tok->inp = strchr(tok->buf, '\0'); |
965 | tok->end = tok->inp + 1; |
966 | } |
967 | } |
968 | else { |
969 | int done = 0; |
970 | Py_ssize_t cur = 0; |
971 | char *pt; |
972 | if (tok->start == NULL((void *)0)) { |
973 | if (tok->buf == NULL((void *)0)) { |
974 | tok->buf = (char *) |
975 | PyMem_MALLOC_PyMem_DebugMalloc(BUFSIZ1024); |
976 | if (tok->buf == NULL((void *)0)) { |
977 | tok->done = E_NOMEM15; |
978 | return EOF(-1); |
979 | } |
980 | tok->end = tok->buf + BUFSIZ1024; |
981 | } |
982 | if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), |
983 | tok) == NULL((void *)0)) { |
984 | tok->done = E_EOF11; |
985 | done = 1; |
986 | } |
987 | else { |
988 | tok->done = E_OK10; |
989 | tok->inp = strchr(tok->buf, '\0'); |
990 | done = tok->inp[-1] == '\n'; |
991 | } |
992 | } |
993 | else { |
994 | cur = tok->cur - tok->buf; |
995 | if (decoding_feof(tok)) { |
996 | tok->done = E_EOF11; |
997 | done = 1; |
998 | } |
999 | else |
1000 | tok->done = E_OK10; |
1001 | } |
1002 | tok->lineno++; |
1003 | /* Read until '\n' or EOF */ |
1004 | while (!done) { |
1005 | Py_ssize_t curstart = tok->start == NULL((void *)0) ? -1 : |
1006 | tok->start - tok->buf; |
1007 | Py_ssize_t curvalid = tok->inp - tok->buf; |
1008 | Py_ssize_t newsize = curvalid + BUFSIZ1024; |
1009 | char *newbuf = tok->buf; |
1010 | newbuf = (char *)PyMem_REALLOC_PyMem_DebugRealloc(newbuf, |
1011 | newsize); |
1012 | if (newbuf == NULL((void *)0)) { |
1013 | tok->done = E_NOMEM15; |
1014 | tok->cur = tok->inp; |
1015 | return EOF(-1); |
1016 | } |
1017 | tok->buf = newbuf; |
1018 | tok->inp = tok->buf + curvalid; |
1019 | tok->end = tok->buf + newsize; |
1020 | tok->start = curstart < 0 ? NULL((void *)0) : |
1021 | tok->buf + curstart; |
1022 | if (decoding_fgets(tok->inp, |
1023 | (int)(tok->end - tok->inp), |
1024 | tok) == NULL((void *)0)) { |
1025 | /* Break out early on decoding |
1026 | errors, as tok->buf will be NULL |
1027 | */ |
1028 | if (tok->decoding_erred) |
1029 | return EOF(-1); |
1030 | /* Last line does not end in \n, |
1031 | fake one */ |
1032 | strcpy(tok->inp, "\n")((__builtin_object_size (tok->inp, 0) != (size_t) -1) ? __builtin___strcpy_chk (tok->inp, "\n", __builtin_object_size (tok->inp, 2 > 1)) : __inline_strcpy_chk (tok->inp, "\n")); |
1033 | } |
1034 | tok->inp = strchr(tok->inp, '\0'); |
1035 | done = tok->inp[-1] == '\n'; |
1036 | } |
1037 | if (tok->buf != NULL((void *)0)) { |
1038 | tok->cur = tok->buf + cur; |
1039 | tok->line_start = tok->cur; |
1040 | /* replace "\r\n" with "\n" */ |
1041 | /* For Mac leave the \r, giving a syntax error */ |
1042 | pt = tok->inp - 2; |
1043 | if (pt >= tok->buf && *pt == '\r') { |
1044 | *pt++ = '\n'; |
1045 | *pt = '\0'; |
1046 | tok->inp = pt; |
1047 | } |
1048 | } |
1049 | } |
1050 | if (tok->done != E_OK10) { |
1051 | if (tok->prompt != NULL((void *)0)) |
1052 | PySys_WriteStderr("\n"); |
1053 | tok->cur = tok->inp; |
1054 | return EOF(-1); |
1055 | } |
1056 | } |
1057 | /*NOTREACHED*/ |
1058 | } |
1059 | |
1060 | |
1061 | /* Back-up one character */ |
1062 | |
1063 | static void |
1064 | tok_backup(register struct tok_state *tok, register int c) |
1065 | { |
1066 | if (c != EOF(-1)) { |
1067 | if (--tok->cur < tok->buf) |
1068 | Py_FatalError("tok_backup: beginning of buffer"); |
1069 | if (*tok->cur != c) |
1070 | *tok->cur = c; |
1071 | } |
1072 | } |
1073 | |
1074 | |
1075 | /* Return the token corresponding to a single character */ |
1076 | |
1077 | int |
1078 | PyToken_OneChar(int c) |
1079 | { |
1080 | switch (c) { |
1081 | case '(': return LPAR7; |
1082 | case ')': return RPAR8; |
1083 | case '[': return LSQB9; |
1084 | case ']': return RSQB10; |
1085 | case ':': return COLON11; |
1086 | case ',': return COMMA12; |
1087 | case ';': return SEMI13; |
1088 | case '+': return PLUS14; |
1089 | case '-': return MINUS15; |
1090 | case '*': return STAR16; |
1091 | case '/': return SLASH17; |
1092 | case '|': return VBAR18; |
1093 | case '&': return AMPER19; |
1094 | case '<': return LESS20; |
1095 | case '>': return GREATER21; |
1096 | case '=': return EQUAL22; |
1097 | case '.': return DOT23; |
1098 | case '%': return PERCENT24; |
1099 | case '{': return LBRACE26; |
1100 | case '}': return RBRACE27; |
1101 | case '^': return CIRCUMFLEX33; |
1102 | case '~': return TILDE32; |
1103 | case '@': return AT50; |
1104 | default: return OP53; |
1105 | } |
1106 | } |
1107 | |
1108 | |
1109 | int |
1110 | PyToken_TwoChars(int c1, int c2) |
1111 | { |
1112 | switch (c1) { |
1113 | case '=': |
1114 | switch (c2) { |
1115 | case '=': return EQEQUAL28; |
1116 | } |
1117 | break; |
1118 | case '!': |
1119 | switch (c2) { |
1120 | case '=': return NOTEQUAL29; |
1121 | } |
1122 | break; |
1123 | case '<': |
1124 | switch (c2) { |
1125 | case '>': return NOTEQUAL29; |
1126 | case '=': return LESSEQUAL30; |
1127 | case '<': return LEFTSHIFT34; |
1128 | } |
1129 | break; |
1130 | case '>': |
1131 | switch (c2) { |
1132 | case '=': return GREATEREQUAL31; |
1133 | case '>': return RIGHTSHIFT35; |
1134 | } |
1135 | break; |
1136 | case '+': |
1137 | switch (c2) { |
1138 | case '=': return PLUSEQUAL37; |
1139 | } |
1140 | break; |
1141 | case '-': |
1142 | switch (c2) { |
1143 | case '=': return MINEQUAL38; |
1144 | case '>': return RARROW51; |
1145 | } |
1146 | break; |
1147 | case '*': |
1148 | switch (c2) { |
1149 | case '*': return DOUBLESTAR36; |
1150 | case '=': return STAREQUAL39; |
1151 | } |
1152 | break; |
1153 | case '/': |
1154 | switch (c2) { |
1155 | case '/': return DOUBLESLASH48; |
1156 | case '=': return SLASHEQUAL40; |
1157 | } |
1158 | break; |
1159 | case '|': |
1160 | switch (c2) { |
1161 | case '=': return VBAREQUAL43; |
1162 | } |
1163 | break; |
1164 | case '%': |
1165 | switch (c2) { |
1166 | case '=': return PERCENTEQUAL41; |
1167 | } |
1168 | break; |
1169 | case '&': |
1170 | switch (c2) { |
1171 | case '=': return AMPEREQUAL42; |
1172 | } |
1173 | break; |
1174 | case '^': |
1175 | switch (c2) { |
1176 | case '=': return CIRCUMFLEXEQUAL44; |
1177 | } |
1178 | break; |
1179 | } |
1180 | return OP53; |
1181 | } |
1182 | |
1183 | int |
1184 | PyToken_ThreeChars(int c1, int c2, int c3) |
1185 | { |
1186 | switch (c1) { |
1187 | case '<': |
1188 | switch (c2) { |
1189 | case '<': |
1190 | switch (c3) { |
1191 | case '=': |
1192 | return LEFTSHIFTEQUAL45; |
1193 | } |
1194 | break; |
1195 | } |
1196 | break; |
1197 | case '>': |
1198 | switch (c2) { |
1199 | case '>': |
1200 | switch (c3) { |
1201 | case '=': |
1202 | return RIGHTSHIFTEQUAL46; |
1203 | } |
1204 | break; |
1205 | } |
1206 | break; |
1207 | case '*': |
1208 | switch (c2) { |
1209 | case '*': |
1210 | switch (c3) { |
1211 | case '=': |
1212 | return DOUBLESTAREQUAL47; |
1213 | } |
1214 | break; |
1215 | } |
1216 | break; |
1217 | case '/': |
1218 | switch (c2) { |
1219 | case '/': |
1220 | switch (c3) { |
1221 | case '=': |
1222 | return DOUBLESLASHEQUAL49; |
1223 | } |
1224 | break; |
1225 | } |
1226 | break; |
1227 | case '.': |
1228 | switch (c2) { |
1229 | case '.': |
1230 | switch (c3) { |
1231 | case '.': |
1232 | return ELLIPSIS52; |
1233 | } |
1234 | break; |
1235 | } |
1236 | break; |
1237 | } |
1238 | return OP53; |
1239 | } |
1240 | |
1241 | static int |
1242 | indenterror(struct tok_state *tok) |
1243 | { |
1244 | if (tok->alterror) { |
1245 | tok->done = E_TABSPACE18; |
1246 | tok->cur = tok->inp; |
1247 | return 1; |
1248 | } |
1249 | if (tok->altwarning) { |
1250 | PySys_WriteStderr("%s: inconsistent use of tabs and spaces " |
1251 | "in indentation\n", tok->filename); |
1252 | tok->altwarning = 0; |
1253 | } |
1254 | return 0; |
1255 | } |
1256 | |
1257 | #ifdef PGEN |
1258 | #define verify_identifier(tok) 1 |
1259 | #else |
1260 | /* Verify that the identifier follows PEP 3131. */ |
1261 | static int |
1262 | verify_identifier(struct tok_state *tok) |
1263 | { |
1264 | PyObject *s; |
1265 | int result; |
1266 | s = PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(tok->start, tok->cur - tok->start, NULL((void *)0)); |
1267 | if (s == NULL((void *)0)) { |
1268 | if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { |
1269 | PyErr_Clear(); |
1270 | tok->done = E_IDENTIFIER26; |
1271 | } else { |
1272 | tok->done = E_ERROR17; |
1273 | } |
1274 | return 0; |
1275 | } |
1276 | result = PyUnicode_IsIdentifierPyUnicodeUCS2_IsIdentifier(s); |
1277 | Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt != 0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount ("Parser/tokenizer.c", 1277, (PyObject *)(s)); } else _Py_Dealloc ((PyObject *)(s)); } while (0); |
1278 | if (result == 0) |
1279 | tok->done = E_IDENTIFIER26; |
1280 | return result; |
1281 | } |
1282 | #endif |
1283 | |
1284 | /* Get next token, after space stripping etc. */ |
1285 | |
1286 | static int |
1287 | tok_get(register struct tok_state *tok, char **p_start, char **p_end) |
1288 | { |
1289 | register int c; |
1290 | int blankline, nonascii; |
1291 | |
1292 | *p_start = *p_end = NULL((void *)0); |
1293 | nextline: |
1294 | tok->start = NULL((void *)0); |
1295 | blankline = 0; |
1296 | |
1297 | /* Get indentation level */ |
1298 | if (tok->atbol) { |
1299 | register int col = 0; |
1300 | register int altcol = 0; |
1301 | tok->atbol = 0; |
1302 | for (;;) { |
1303 | c = tok_nextc(tok); |
1304 | if (c == ' ') |
1305 | col++, altcol++; |
1306 | else if (c == '\t') { |
1307 | col = (col/tok->tabsize + 1) * tok->tabsize; |
1308 | altcol = (altcol/tok->alttabsize + 1) |
1309 | * tok->alttabsize; |
1310 | } |
1311 | else if (c == '\014') /* Control-L (formfeed) */ |
1312 | col = altcol = 0; /* For Emacs users */ |
1313 | else |
1314 | break; |
1315 | } |
1316 | tok_backup(tok, c); |
1317 | if (c == '#' || c == '\n') { |
1318 | /* Lines with only whitespace and/or comments |
1319 | shouldn't affect the indentation and are |
1320 | not passed to the parser as NEWLINE tokens, |
1321 | except *totally* empty lines in interactive |
1322 | mode, which signal the end of a command group. */ |
1323 | if (col == 0 && c == '\n' && tok->prompt != NULL((void *)0)) |
1324 | blankline = 0; /* Let it through */ |
1325 | else |
1326 | blankline = 1; /* Ignore completely */ |
1327 | /* We can't jump back right here since we still |
1328 | may need to skip to the end of a comment */ |
1329 | } |
1330 | if (!blankline && tok->level == 0) { |
1331 | if (col == tok->indstack[tok->indent]) { |
1332 | /* No change */ |
1333 | if (altcol != tok->altindstack[tok->indent]) { |
1334 | if (indenterror(tok)) |
1335 | return ERRORTOKEN54; |
1336 | } |
1337 | } |
1338 | else if (col > tok->indstack[tok->indent]) { |
1339 | /* Indent -- always one */ |
1340 | if (tok->indent+1 >= MAXINDENT100) { |
1341 | tok->done = E_TOODEEP20; |
1342 | tok->cur = tok->inp; |
1343 | return ERRORTOKEN54; |
1344 | } |
1345 | if (altcol <= tok->altindstack[tok->indent]) { |
1346 | if (indenterror(tok)) |
1347 | return ERRORTOKEN54; |
1348 | } |
1349 | tok->pendin++; |
1350 | tok->indstack[++tok->indent] = col; |
1351 | tok->altindstack[tok->indent] = altcol; |
1352 | } |
1353 | else /* col < tok->indstack[tok->indent] */ { |
1354 | /* Dedent -- any number, must be consistent */ |
1355 | while (tok->indent > 0 && |
1356 | col < tok->indstack[tok->indent]) { |
1357 | tok->pendin--; |
1358 | tok->indent--; |
1359 | } |
1360 | if (col != tok->indstack[tok->indent]) { |
1361 | tok->done = E_DEDENT21; |
1362 | tok->cur = tok->inp; |
1363 | return ERRORTOKEN54; |
1364 | } |
1365 | if (altcol != tok->altindstack[tok->indent]) { |
1366 | if (indenterror(tok)) |
1367 | return ERRORTOKEN54; |
1368 | } |
1369 | } |
1370 | } |
1371 | } |
1372 | |
1373 | tok->start = tok->cur; |
1374 | |
1375 | /* Return pending indents/dedents */ |
1376 | if (tok->pendin != 0) { |
1377 | if (tok->pendin < 0) { |
1378 | tok->pendin++; |
1379 | return DEDENT6; |
1380 | } |
1381 | else { |
1382 | tok->pendin--; |
1383 | return INDENT5; |
1384 | } |
1385 | } |
1386 | |
1387 | again: |
1388 | tok->start = NULL((void *)0); |
1389 | /* Skip spaces */ |
1390 | do { |
1391 | c = tok_nextc(tok); |
1392 | } while (c == ' ' || c == '\t' || c == '\014'); |
1393 | |
1394 | /* Set start of current token */ |
1395 | tok->start = tok->cur - 1; |
1396 | |
1397 | /* Skip comment */ |
1398 | if (c == '#') |
1399 | while (c != EOF(-1) && c != '\n') |
1400 | c = tok_nextc(tok); |
1401 | |
1402 | /* Check for EOF and errors now */ |
1403 | if (c == EOF(-1)) { |
1404 | return tok->done == E_EOF11 ? ENDMARKER0 : ERRORTOKEN54; |
1405 | } |
1406 | |
1407 | /* Identifier (most frequent token!) */ |
1408 | nonascii = 0; |
1409 | if (is_potential_identifier_start(c)( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= 128))) { |
1410 | /* Process b"", r"" and br"" */ |
1411 | if (c == 'b' || c == 'B') { |
1412 | c = tok_nextc(tok); |
1413 | if (c == '"' || c == '\'') |
1414 | goto letter_quote; |
1415 | } |
1416 | if (c == 'r' || c == 'R') { |
1417 | c = tok_nextc(tok); |
1418 | if (c == '"' || c == '\'') |
1419 | goto letter_quote; |
1420 | } |
1421 | while (is_potential_identifier_char(c)( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || (c >= 128))) { |
1422 | if (c >= 128) |
1423 | nonascii = 1; |
1424 | c = tok_nextc(tok); |
1425 | } |
1426 | tok_backup(tok, c); |
1427 | if (nonascii && |
1428 | !verify_identifier(tok)) { |
1429 | tok->done = E_IDENTIFIER26; |
1430 | return ERRORTOKEN54; |
1431 | } |
1432 | *p_start = tok->start; |
1433 | *p_end = tok->cur; |
1434 | return NAME1; |
1435 | } |
1436 | |
1437 | /* Newline */ |
1438 | if (c == '\n') { |
1439 | tok->atbol = 1; |
1440 | if (blankline || tok->level > 0) |
1441 | goto nextline; |
1442 | *p_start = tok->start; |
1443 | *p_end = tok->cur - 1; /* Leave '\n' out of the string */ |
1444 | tok->cont_line = 0; |
1445 | return NEWLINE4; |
1446 | } |
1447 | |
1448 | /* Period or number starting with period? */ |
1449 | if (c == '.') { |
1450 | c = tok_nextc(tok); |
1451 | if (isdigit(c)) { |
1452 | goto fraction; |
1453 | } else if (c == '.') { |
1454 | c = tok_nextc(tok); |
1455 | if (c == '.') { |
1456 | *p_start = tok->start; |
1457 | *p_end = tok->cur; |
1458 | return ELLIPSIS52; |
1459 | } else { |
1460 | tok_backup(tok, c); |
1461 | } |
1462 | tok_backup(tok, '.'); |
1463 | } else { |
1464 | tok_backup(tok, c); |
1465 | } |
1466 | *p_start = tok->start; |
1467 | *p_end = tok->cur; |
1468 | return DOT23; |
1469 | } |
1470 | |
1471 | /* Number */ |
1472 | if (isdigit(c)) { |
1473 | if (c == '0') { |
1474 | /* Hex, octal or binary -- maybe. */ |
1475 | c = tok_nextc(tok); |
1476 | if (c == '.') |
1477 | goto fraction; |
1478 | if (c == 'j' || c == 'J') |
1479 | goto imaginary; |
1480 | if (c == 'x' || c == 'X') { |
1481 | |
1482 | /* Hex */ |
1483 | c = tok_nextc(tok); |
1484 | if (!isxdigit(c)) { |
1485 | tok->done = E_TOKEN13; |
1486 | tok_backup(tok, c); |
1487 | return ERRORTOKEN54; |
1488 | } |
1489 | do { |
1490 | c = tok_nextc(tok); |
1491 | } while (isxdigit(c)); |
1492 | } |
1493 | else if (c == 'o' || c == 'O') { |
1494 | /* Octal */ |
1495 | c = tok_nextc(tok); |
1496 | if (c < '0' || c >= '8') { |
1497 | tok->done = E_TOKEN13; |
1498 | tok_backup(tok, c); |
1499 | return ERRORTOKEN54; |
1500 | } |
1501 | do { |
1502 | c = tok_nextc(tok); |
1503 | } while ('0' <= c && c < '8'); |
1504 | } |
1505 | else if (c == 'b' || c == 'B') { |
1506 | /* Binary */ |
1507 | c = tok_nextc(tok); |
1508 | if (c != '0' && c != '1') { |
1509 | tok->done = E_TOKEN13; |
1510 | tok_backup(tok, c); |
1511 | return ERRORTOKEN54; |
1512 | } |
1513 | do { |
1514 | c = tok_nextc(tok); |
1515 | } while (c == '0' || c == '1'); |
1516 | } |
1517 | else { |
1518 | int nonzero = 0; |
1519 | /* maybe old-style octal; c is first char of it */ |
1520 | /* in any case, allow '0' as a literal */ |
1521 | while (c == '0') |
1522 | c = tok_nextc(tok); |
1523 | while (isdigit(c)) { |
1524 | nonzero = 1; |
1525 | c = tok_nextc(tok); |
1526 | } |
1527 | if (c == '.') |
1528 | goto fraction; |
1529 | else if (c == 'e' || c == 'E') |
1530 | goto exponent; |
1531 | else if (c == 'j' || c == 'J') |
1532 | goto imaginary; |
1533 | else if (nonzero) { |
1534 | tok->done = E_TOKEN13; |
1535 | tok_backup(tok, c); |
1536 | return ERRORTOKEN54; |
1537 | } |
1538 | } |
1539 | } |
1540 | else { |
1541 | /* Decimal */ |
1542 | do { |
1543 | c = tok_nextc(tok); |
1544 | } while (isdigit(c)); |
1545 | { |
1546 | /* Accept floating point numbers. */ |
1547 | if (c == '.') { |
1548 | fraction: |
1549 | /* Fraction */ |
1550 | do { |
1551 | c = tok_nextc(tok); |
1552 | } while (isdigit(c)); |
1553 | } |
1554 | if (c == 'e' || c == 'E') { |
1555 | exponent: |
1556 | /* Exponent part */ |
1557 | c = tok_nextc(tok); |
1558 | if (c == '+' || c == '-') |
1559 | c = tok_nextc(tok); |
1560 | if (!isdigit(c)) { |
1561 | tok->done = E_TOKEN13; |
1562 | tok_backup(tok, c); |
1563 | return ERRORTOKEN54; |
1564 | } |
1565 | do { |
1566 | c = tok_nextc(tok); |
1567 | } while (isdigit(c)); |
1568 | } |
1569 | if (c == 'j' || c == 'J') |
1570 | /* Imaginary part */ |
1571 | imaginary: |
1572 | c = tok_nextc(tok); |
1573 | } |
1574 | } |
1575 | tok_backup(tok, c); |
1576 | *p_start = tok->start; |
1577 | *p_end = tok->cur; |
1578 | return NUMBER2; |
1579 | } |
1580 | |
1581 | letter_quote: |
1582 | /* String */ |
1583 | if (c == '\'' || c == '"') { |
1584 | int quote = c; |
1585 | int quote_size = 1; /* 1 or 3 */ |
1586 | int end_quote_size = 0; |
1587 | |
1588 | /* Find the quote size and start of string */ |
1589 | c = tok_nextc(tok); |
1590 | if (c == quote) { |
1591 | c = tok_nextc(tok); |
1592 | if (c == quote) |
1593 | quote_size = 3; |
1594 | else |
1595 | end_quote_size = 1; /* empty string found */ |
1596 | } |
1597 | if (c != quote) |
1598 | tok_backup(tok, c); |
1599 | |
1600 | /* Get rest of string */ |
1601 | while (end_quote_size != quote_size) { |
1602 | c = tok_nextc(tok); |
1603 | if (c == EOF(-1)) { |
1604 | if (quote_size == 3) |
1605 | tok->done = E_EOFS23; |
1606 | else |
1607 | tok->done = E_EOLS24; |
1608 | tok->cur = tok->inp; |
1609 | return ERRORTOKEN54; |
1610 | } |
1611 | if (quote_size == 1 && c == '\n') { |
1612 | tok->done = E_EOLS24; |
1613 | tok->cur = tok->inp; |
1614 | return ERRORTOKEN54; |
1615 | } |
1616 | if (c == quote) |
1617 | end_quote_size += 1; |
1618 | else { |
1619 | end_quote_size = 0; |
1620 | if (c == '\\') |
1621 | c = tok_nextc(tok); /* skip escaped char */ |
Value stored to 'c' is never read | |
1622 | } |
1623 | } |
1624 | |
1625 | *p_start = tok->start; |
1626 | *p_end = tok->cur; |
1627 | return STRING3; |
1628 | } |
1629 | |
1630 | /* Line continuation */ |
1631 | if (c == '\\') { |
1632 | c = tok_nextc(tok); |
1633 | if (c != '\n') { |
1634 | tok->done = E_LINECONT25; |
1635 | tok->cur = tok->inp; |
1636 | return ERRORTOKEN54; |
1637 | } |
1638 | tok->cont_line = 1; |
1639 | goto again; /* Read next line */ |
1640 | } |
1641 | |
1642 | /* Check for two-character token */ |
1643 | { |
1644 | int c2 = tok_nextc(tok); |
1645 | int token = PyToken_TwoChars(c, c2); |
1646 | if (token != OP53) { |
1647 | int c3 = tok_nextc(tok); |
1648 | int token3 = PyToken_ThreeChars(c, c2, c3); |
1649 | if (token3 != OP53) { |
1650 | token = token3; |
1651 | } else { |
1652 | tok_backup(tok, c3); |
1653 | } |
1654 | *p_start = tok->start; |
1655 | *p_end = tok->cur; |
1656 | return token; |
1657 | } |
1658 | tok_backup(tok, c2); |
1659 | } |
1660 | |
1661 | /* Keep track of parentheses nesting level */ |
1662 | switch (c) { |
1663 | case '(': |
1664 | case '[': |
1665 | case '{': |
1666 | tok->level++; |
1667 | break; |
1668 | case ')': |
1669 | case ']': |
1670 | case '}': |
1671 | tok->level--; |
1672 | break; |
1673 | } |
1674 | |
1675 | /* Punctuation character */ |
1676 | *p_start = tok->start; |
1677 | *p_end = tok->cur; |
1678 | return PyToken_OneChar(c); |
1679 | } |
1680 | |
1681 | int |
1682 | PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) |
1683 | { |
1684 | int result = tok_get(tok, p_start, p_end); |
1685 | if (tok->decoding_erred) { |
1686 | result = ERRORTOKEN54; |
1687 | tok->done = E_DECODE22; |
1688 | } |
1689 | return result; |
1690 | } |
1691 | |
1692 | /* Get -*- encoding -*- from a Python file. |
1693 | |
1694 | PyTokenizer_FindEncoding returns NULL when it can't find the encoding in |
1695 | the first or second line of the file (in which case the encoding |
1696 | should be assumed to be PyUnicode_GetDefaultEncoding()). |
1697 | |
1698 | The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed |
1699 | by the caller. |
1700 | */ |
1701 | char * |
1702 | PyTokenizer_FindEncoding(int fd) |
1703 | { |
1704 | struct tok_state *tok; |
1705 | FILE *fp; |
1706 | char *p_start =NULL((void *)0) , *p_end =NULL((void *)0) , *encoding = NULL((void *)0); |
1707 | |
1708 | fd = dup(fd); |
1709 | if (fd < 0) { |
1710 | return NULL((void *)0); |
1711 | } |
1712 | fp = fdopen(fd, "r"); |
1713 | if (fp == NULL((void *)0)) { |
1714 | return NULL((void *)0); |
1715 | } |
1716 | tok = PyTokenizer_FromFile(fp, NULL((void *)0), NULL((void *)0), NULL((void *)0)); |
1717 | if (tok == NULL((void *)0)) { |
1718 | fclose(fp); |
1719 | return NULL((void *)0); |
1720 | } |
1721 | while (tok->lineno < 2 && tok->done == E_OK10) { |
1722 | PyTokenizer_Get(tok, &p_start, &p_end); |
1723 | } |
1724 | fclose(fp); |
1725 | if (tok->encoding) { |
1726 | encoding = (char *)PyMem_MALLOC_PyMem_DebugMalloc(strlen(tok->encoding) + 1); |
1727 | if (encoding) |
1728 | strcpy(encoding, tok->encoding)((__builtin_object_size (encoding, 0) != (size_t) -1) ? __builtin___strcpy_chk (encoding, tok->encoding, __builtin_object_size (encoding , 2 > 1)) : __inline_strcpy_chk (encoding, tok->encoding )); |
1729 | } |
1730 | PyTokenizer_Free(tok); |
1731 | return encoding; |
1732 | } |
1733 | |
1734 | #ifdef Py_DEBUG1 |
1735 | |
1736 | void |
1737 | tok_dump_Py_tok_dump(int type, char *start, char *end) |
1738 | { |
1739 | printf("%s", _PyParser_TokenNames[type]); |
1740 | if (type == NAME1 || type == NUMBER2 || type == STRING3 || type == OP53) |
1741 | printf("(%.*s)", (int)(end - start), start); |
1742 | } |
1743 | |
1744 | #endif |