Bug Summary

File:Parser/tokenizer.c
Location:line 1621, column 17
Description:Value stored to 'c' is never read

Annotated Source Code

1
2/* Tokenizer implementation */
3
4#include "Python.h"
5#include "pgenheaders.h"
6
7#include <ctype.h>
8#include <assert.h>
9
10#include "tokenizer.h"
11#include "errcode.h"
12
13#ifndef PGEN
14#include "unicodeobject.h"
15#include "bytesobject.h"
16#include "fileobject.h"
17#include "codecs.h"
18#include "abstract.h"
19#endif /* PGEN */
20
21#define is_potential_identifier_start(c)( (c >= 'a' && c <= 'z') || (c >= 'A' &&
c <= 'Z') || c == '_' || (c >= 128))
(\
22 (c >= 'a' && c <= 'z')\
23 || (c >= 'A' && c <= 'Z')\
24 || c == '_'\
25 || (c >= 128))
26
27#define is_potential_identifier_char(c)( (c >= 'a' && c <= 'z') || (c >= 'A' &&
c <= 'Z') || (c >= '0' && c <= '9') || c ==
'_' || (c >= 128))
(\
28 (c >= 'a' && c <= 'z')\
29 || (c >= 'A' && c <= 'Z')\
30 || (c >= '0' && c <= '9')\
31 || c == '_'\
32 || (c >= 128))
33
34extern char *PyOS_Readline(FILE *, FILE *, char *);
35/* Return malloc'ed string including trailing \n;
36 empty malloc'ed string for EOF;
37 NULL if interrupted */
38
39/* Don't ever change this -- it would break the portability of Python code */
40#define TABSIZE8 8
41
42/* Forward */
43static struct tok_state *tok_new(void);
44static int tok_nextc(struct tok_state *tok);
45static void tok_backup(struct tok_state *tok, int c);
46
47
48/* Token names */
49
50char *_PyParser_TokenNames[] = {
51 "ENDMARKER",
52 "NAME",
53 "NUMBER",
54 "STRING",
55 "NEWLINE",
56 "INDENT",
57 "DEDENT",
58 "LPAR",
59 "RPAR",
60 "LSQB",
61 "RSQB",
62 "COLON",
63 "COMMA",
64 "SEMI",
65 "PLUS",
66 "MINUS",
67 "STAR",
68 "SLASH",
69 "VBAR",
70 "AMPER",
71 "LESS",
72 "GREATER",
73 "EQUAL",
74 "DOT",
75 "PERCENT",
76 "LBRACE",
77 "RBRACE",
78 "EQEQUAL",
79 "NOTEQUAL",
80 "LESSEQUAL",
81 "GREATEREQUAL",
82 "TILDE",
83 "CIRCUMFLEX",
84 "LEFTSHIFT",
85 "RIGHTSHIFT",
86 "DOUBLESTAR",
87 "PLUSEQUAL",
88 "MINEQUAL",
89 "STAREQUAL",
90 "SLASHEQUAL",
91 "PERCENTEQUAL",
92 "AMPEREQUAL",
93 "VBAREQUAL",
94 "CIRCUMFLEXEQUAL",
95 "LEFTSHIFTEQUAL",
96 "RIGHTSHIFTEQUAL",
97 "DOUBLESTAREQUAL",
98 "DOUBLESLASH",
99 "DOUBLESLASHEQUAL",
100 "AT",
101 "RARROW",
102 "ELLIPSIS",
103 /* This table must match the #defines in token.h! */
104 "OP",
105 "<ERRORTOKEN>",
106 "<N_TOKENS>"
107};
108
109
110/* Create and initialize a new tok_state structure */
111
112static struct tok_state *
113tok_new(void)
114{
115 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC_PyMem_DebugMalloc(
116 sizeof(struct tok_state));
117 if (tok == NULL((void *)0))
118 return NULL((void *)0);
119 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL((void *)0);
120 tok->done = E_OK10;
121 tok->fp = NULL((void *)0);
122 tok->input = NULL((void *)0);
123 tok->tabsize = TABSIZE8;
124 tok->indent = 0;
125 tok->indstack[0] = 0;
126 tok->atbol = 1;
127 tok->pendin = 0;
128 tok->prompt = tok->nextprompt = NULL((void *)0);
129 tok->lineno = 0;
130 tok->level = 0;
131 tok->filename = NULL((void *)0);
132 tok->altwarning = 1;
133 tok->alterror = 1;
134 tok->alttabsize = 1;
135 tok->altindstack[0] = 0;
136 tok->decoding_state = STATE_INIT;
137 tok->decoding_erred = 0;
138 tok->read_coding_spec = 0;
139 tok->enc = NULL((void *)0);
140 tok->encoding = NULL((void *)0);
141 tok->cont_line = 0;
142#ifndef PGEN
143 tok->decoding_readline = NULL((void *)0);
144 tok->decoding_buffer = NULL((void *)0);
145#endif
146 return tok;
147}
148
149static char *
150new_string(const char *s, Py_ssize_t len)
151{
152 char* result = (char *)PyMem_MALLOC_PyMem_DebugMalloc(len + 1);
153 if (result != NULL((void *)0)) {
154 memcpy(result, s, len)((__builtin_object_size (result, 0) != (size_t) -1) ? __builtin___memcpy_chk
(result, s, len, __builtin_object_size (result, 0)) : __inline_memcpy_chk
(result, s, len))
;
155 result[len] = '\0';
156 }
157 return result;
158}
159
160#ifdef PGEN
161
162static char *
163decoding_fgets(char *s, int size, struct tok_state *tok)
164{
165 return fgets(s, size, tok->fp);
166}
167
168static int
169decoding_feof(struct tok_state *tok)
170{
171 return feof(tok->fp);
172}
173
174static char *
175decode_str(const char *str, int exec_input, struct tok_state *tok)
176{
177 return new_string(str, strlen(str));
178}
179
180#else /* PGEN */
181
182static char *
183error_ret(struct tok_state *tok) /* XXX */
184{
185 tok->decoding_erred = 1;
186 if (tok->fp != NULL((void *)0) && tok->buf != NULL((void *)0)) /* see PyTokenizer_Free */
187 PyMem_FREE_PyMem_DebugFree(tok->buf);
188 tok->buf = NULL((void *)0);
189 return NULL((void *)0); /* as if it were EOF */
190}
191
192
193static char *
194get_normal_name(char *s) /* for utf-8 and latin-1 */
195{
196 char buf[13];
197 int i;
198 for (i = 0; i < 12; i++) {
199 int c = s[i];
200 if (c == '\0')
201 break;
202 else if (c == '_')
203 buf[i] = '-';
204 else
205 buf[i] = tolower(c)towlower(btowc(c));
206 }
207 buf[i] = '\0';
208 if (strcmp(buf, "utf-8") == 0 ||
209 strncmp(buf, "utf-8-", 6) == 0)
210 return "utf-8";
211 else if (strcmp(buf, "latin-1") == 0 ||
212 strcmp(buf, "iso-8859-1") == 0 ||
213 strcmp(buf, "iso-latin-1") == 0 ||
214 strncmp(buf, "latin-1-", 8) == 0 ||
215 strncmp(buf, "iso-8859-1-", 11) == 0 ||
216 strncmp(buf, "iso-latin-1-", 12) == 0)
217 return "iso-8859-1";
218 else
219 return s;
220}
221
222/* Return the coding spec in S, or NULL if none is found. */
223
224static char *
225get_coding_spec(const char *s, Py_ssize_t size)
226{
227 Py_ssize_t i;
228 /* Coding spec must be in a comment, and that comment must be
229 * the only statement on the source code line. */
230 for (i = 0; i < size - 6; i++) {
231 if (s[i] == '#')
232 break;
233 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
234 return NULL((void *)0);
235 }
236 for (; i < size - 6; i++) { /* XXX inefficient search */
237 const char* t = s + i;
238 if (strncmp(t, "coding", 6) == 0) {
239 const char* begin = NULL((void *)0);
240 t += 6;
241 if (t[0] != ':' && t[0] != '=')
242 continue;
243 do {
244 t++;
245 } while (t[0] == '\x20' || t[0] == '\t');
246
247 begin = t;
248 while (Py_ISALNUM(t[0])(_Py_ctype_table[((unsigned char)((t[0]) & 0xff))] & (
(0x01|0x02)|0x04))
||
249 t[0] == '-' || t[0] == '_' || t[0] == '.')
250 t++;
251
252 if (begin < t) {
253 char* r = new_string(begin, t - begin);
254 char* q = get_normal_name(r);
255 if (r != q) {
256 PyMem_FREE_PyMem_DebugFree(r);
257 r = new_string(q, strlen(q));
258 }
259 return r;
260 }
261 }
262 }
263 return NULL((void *)0);
264}
265
266/* Check whether the line contains a coding spec. If it does,
267 invoke the set_readline function for the new encoding.
268 This function receives the tok_state and the new encoding.
269 Return 1 on success, 0 on failure. */
270
271static int
272check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
273 int set_readline(struct tok_state *, const char *))
274{
275 char * cs;
276 int r = 1;
277
278 if (tok->cont_line)
279 /* It's a continuation line, so it can't be a coding spec. */
280 return 1;
281 cs = get_coding_spec(line, size);
282 if (cs != NULL((void *)0)) {
283 tok->read_coding_spec = 1;
284 if (tok->encoding == NULL((void *)0)) {
285 assert(tok->decoding_state == STATE_RAW)(__builtin_expect(!(tok->decoding_state == STATE_RAW), 0) ?
__assert_rtn(__func__, "Parser/tokenizer.c", 285, "tok->decoding_state == STATE_RAW"
) : (void)0)
;
286 if (strcmp(cs, "utf-8") == 0) {
287 tok->encoding = cs;
288 } else {
289 r = set_readline(tok, cs);
290 if (r) {
291 tok->encoding = cs;
292 tok->decoding_state = STATE_NORMAL;
293 }
294 else
295 PyMem_FREE_PyMem_DebugFree(cs);
296 }
297 } else { /* then, compare cs with BOM */
298 r = (strcmp(tok->encoding, cs) == 0);
299 PyMem_FREE_PyMem_DebugFree(cs);
300 }
301 }
302 if (!r) {
303 cs = tok->encoding;
304 if (!cs)
305 cs = "with BOM";
306 PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
307 }
308 return r;
309}
310
311/* See whether the file starts with a BOM. If it does,
312 invoke the set_readline function with the new encoding.
313 Return 1 on success, 0 on failure. */
314
315static int
316check_bom(int get_char(struct tok_state *),
317 void unget_char(int, struct tok_state *),
318 int set_readline(struct tok_state *, const char *),
319 struct tok_state *tok)
320{
321 int ch1, ch2, ch3;
322 ch1 = get_char(tok);
323 tok->decoding_state = STATE_RAW;
324 if (ch1 == EOF(-1)) {
325 return 1;
326 } else if (ch1 == 0xEF) {
327 ch2 = get_char(tok);
328 if (ch2 != 0xBB) {
329 unget_char(ch2, tok);
330 unget_char(ch1, tok);
331 return 1;
332 }
333 ch3 = get_char(tok);
334 if (ch3 != 0xBF) {
335 unget_char(ch3, tok);
336 unget_char(ch2, tok);
337 unget_char(ch1, tok);
338 return 1;
339 }
340#if 0
341 /* Disable support for UTF-16 BOMs until a decision
342 is made whether this needs to be supported. */
343 } else if (ch1 == 0xFE) {
344 ch2 = get_char(tok);
345 if (ch2 != 0xFF) {
346 unget_char(ch2, tok);
347 unget_char(ch1, tok);
348 return 1;
349 }
350 if (!set_readline(tok, "utf-16-be"))
351 return 0;
352 tok->decoding_state = STATE_NORMAL;
353 } else if (ch1 == 0xFF) {
354 ch2 = get_char(tok);
355 if (ch2 != 0xFE) {
356 unget_char(ch2, tok);
357 unget_char(ch1, tok);
358 return 1;
359 }
360 if (!set_readline(tok, "utf-16-le"))
361 return 0;
362 tok->decoding_state = STATE_NORMAL;
363#endif
364 } else {
365 unget_char(ch1, tok);
366 return 1;
367 }
368 if (tok->encoding != NULL((void *)0))
369 PyMem_FREE_PyMem_DebugFree(tok->encoding);
370 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
371 /* No need to set_readline: input is already utf-8 */
372 return 1;
373}
374
375/* Read a line of text from TOK into S, using the stream in TOK.
376 Return NULL on failure, else S.
377
378 On entry, tok->decoding_buffer will be one of:
379 1) NULL: need to call tok->decoding_readline to get a new line
380 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
381 stored the result in tok->decoding_buffer
382 3) PyByteArrayObject *: previous call to fp_readl did not have enough room
383 (in the s buffer) to copy entire contents of the line read
384 by tok->decoding_readline. tok->decoding_buffer has the overflow.
385 In this case, fp_readl is called in a loop (with an expanded buffer)
386 until the buffer ends with a '\n' (or until the end of the file is
387 reached): see tok_nextc and its calls to decoding_fgets.
388*/
389
390static char *
391fp_readl(char *s, int size, struct tok_state *tok)
392{
393 PyObject* bufobj;
394 const char *buf;
395 Py_ssize_t buflen;
396
397 /* Ask for one less byte so we can terminate it */
398 assert(size > 0)(__builtin_expect(!(size > 0), 0) ? __assert_rtn(__func__,
"Parser/tokenizer.c", 398, "size > 0") : (void)0)
;
399 size--;
400
401 if (tok->decoding_buffer) {
402 bufobj = tok->decoding_buffer;
403 Py_INCREF(bufobj)( _Py_RefTotal++ , ((PyObject*)(bufobj))->ob_refcnt++);
404 }
405 else
406 {
407 bufobj = PyObject_CallObject(tok->decoding_readline, NULL((void *)0));
408 if (bufobj == NULL((void *)0))
409 goto error;
410 }
411 if (PyUnicode_CheckExact(bufobj)((((PyObject*)(bufobj))->ob_type) == &PyUnicode_Type))
412 {
413 buf = _PyUnicode_AsStringAndSize(bufobj, &buflen);
414 if (buf == NULL((void *)0)) {
415 goto error;
416 }
417 }
418 else
419 {
420 buf = PyByteArray_AsString(bufobj);
421 if (buf == NULL((void *)0)) {
422 goto error;
423 }
424 buflen = PyByteArray_GET_SIZE(bufobj)((__builtin_expect(!(((((PyObject*)(bufobj))->ob_type) == (
&PyByteArray_Type) || PyType_IsSubtype((((PyObject*)(bufobj
))->ob_type), (&PyByteArray_Type)))), 0) ? __assert_rtn
(__func__, "Parser/tokenizer.c", 424, "PyByteArray_Check(bufobj)"
) : (void)0),(((PyVarObject*)(bufobj))->ob_size))
;
425 }
426
427 Py_XDECREF(tok->decoding_buffer)do { if ((tok->decoding_buffer) == ((void *)0)) ; else do {
if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_buffer)
)->ob_refcnt != 0) { if (((PyObject*)tok->decoding_buffer
)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 427, (PyObject *)(tok->decoding_buffer)); } else _Py_Dealloc
((PyObject *)(tok->decoding_buffer)); } while (0); } while
(0)
;
428 if (buflen > size) {
429 /* Too many chars, the rest goes into tok->decoding_buffer */
430 tok->decoding_buffer = PyByteArray_FromStringAndSize(buf+size,
431 buflen-size);
432 if (tok->decoding_buffer == NULL((void *)0))
433 goto error;
434 buflen = size;
435 }
436 else
437 tok->decoding_buffer = NULL((void *)0);
438
439 memcpy(s, buf, buflen)((__builtin_object_size (s, 0) != (size_t) -1) ? __builtin___memcpy_chk
(s, buf, buflen, __builtin_object_size (s, 0)) : __inline_memcpy_chk
(s, buf, buflen))
;
440 s[buflen] = '\0';
441 if (buflen == 0) /* EOF */
442 s = NULL((void *)0);
443 Py_DECREF(bufobj)do { if (_Py_RefTotal-- , --((PyObject*)(bufobj))->ob_refcnt
!= 0) { if (((PyObject*)bufobj)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 443, (PyObject *)(bufobj)); } else _Py_Dealloc
((PyObject *)(bufobj)); } while (0)
;
444 return s;
445
446error:
447 Py_XDECREF(bufobj)do { if ((bufobj) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(bufobj))->ob_refcnt != 0) { if (((PyObject
*)bufobj)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 447, (PyObject *)(bufobj)); } else _Py_Dealloc((PyObject *)
(bufobj)); } while (0); } while (0)
;
448 return error_ret(tok);
449}
450
451/* Set the readline function for TOK to a StreamReader's
452 readline function. The StreamReader is named ENC.
453
454 This function is called from check_bom and check_coding_spec.
455
456 ENC is usually identical to the future value of tok->encoding,
457 except for the (currently unsupported) case of UTF-16.
458
459 Return 1 on success, 0 on failure. */
460
461static int
462fp_setreadl(struct tok_state *tok, const char* enc)
463{
464 PyObject *readline = NULL((void *)0), *stream = NULL((void *)0), *io = NULL((void *)0);
465 int fd;
466
467 io = PyImport_ImportModuleNoBlock("io");
468 if (io == NULL((void *)0))
469 goto cleanup;
470
471 fd = fileno(tok->fp);
472 if (lseek(fd, 0, SEEK_SET0) == (off_t)-1) {
473 PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL((void *)0));
474 goto cleanup;
475 }
476
477 stream = PyObject_CallMethod(io, "open", "isisOOO",
478 fd, "r", -1, enc, Py_None(&_Py_NoneStruct), Py_None(&_Py_NoneStruct), Py_False((PyObject *) &_Py_FalseStruct));
479 if (stream == NULL((void *)0))
480 goto cleanup;
481
482 Py_XDECREF(tok->decoding_readline)do { if ((tok->decoding_readline) == ((void *)0)) ; else do
{ if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_readline
))->ob_refcnt != 0) { if (((PyObject*)tok->decoding_readline
)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 482, (PyObject *)(tok->decoding_readline)); } else _Py_Dealloc
((PyObject *)(tok->decoding_readline)); } while (0); } while
(0)
;
483 readline = PyObject_GetAttrString(stream, "readline");
484 tok->decoding_readline = readline;
485
486 /* The file has been reopened; parsing will restart from
487 * the beginning of the file, we have to reset the line number.
488 * But this function has been called from inside tok_nextc() which
489 * will increment lineno before it returns. So we set it -1 so that
490 * the next call to tok_nextc() will start with tok->lineno == 0.
491 */
492 tok->lineno = -1;
493
494 cleanup:
495 Py_XDECREF(stream)do { if ((stream) == ((void *)0)) ; else do { if (_Py_RefTotal
-- , --((PyObject*)(stream))->ob_refcnt != 0) { if (((PyObject
*)stream)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 495, (PyObject *)(stream)); } else _Py_Dealloc((PyObject *)
(stream)); } while (0); } while (0)
;
496 Py_XDECREF(io)do { if ((io) == ((void *)0)) ; else do { if (_Py_RefTotal-- ,
--((PyObject*)(io))->ob_refcnt != 0) { if (((PyObject*)io
)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 496, (PyObject *)(io)); } else _Py_Dealloc((PyObject *)(io)
); } while (0); } while (0)
;
497 return readline != NULL((void *)0);
498}
499
500/* Fetch the next byte from TOK. */
501
502static int fp_getc(struct tok_state *tok) {
503 return getc(tok->fp);
504}
505
506/* Unfetch the last byte back into TOK. */
507
508static void fp_ungetc(int c, struct tok_state *tok) {
509 ungetc(c, tok->fp);
510}
511
512/* Check whether the characters at s start a valid
513 UTF-8 sequence. Return the number of characters forming
514 the sequence if yes, 0 if not. */
515static int valid_utf8(const unsigned char* s)
516{
517 int expected = 0;
518 int length;
519 if (*s < 0x80)
520 /* single-byte code */
521 return 1;
522 if (*s < 0xc0)
523 /* following byte */
524 return 0;
525 if (*s < 0xE0)
526 expected = 1;
527 else if (*s < 0xF0)
528 expected = 2;
529 else if (*s < 0xF8)
530 expected = 3;
531 else
532 return 0;
533 length = expected + 1;
534 for (; expected; expected--)
535 if (s[expected] < 0x80 || s[expected] >= 0xC0)
536 return 0;
537 return length;
538}
539
540/* Read a line of input from TOK. Determine encoding
541 if necessary. */
542
543static char *
544decoding_fgets(char *s, int size, struct tok_state *tok)
545{
546 char *line = NULL((void *)0);
547 int badchar = 0;
548 PyObject *filename;
549 for (;;) {
550 if (tok->decoding_state == STATE_NORMAL) {
551 /* We already have a codec associated with
552 this input. */
553 line = fp_readl(s, size, tok);
554 break;
555 } else if (tok->decoding_state == STATE_RAW) {
556 /* We want a 'raw' read. */
557 line = Py_UniversalNewlineFgets(s, size,
558 tok->fp, NULL((void *)0));
559 break;
560 } else {
561 /* We have not yet determined the encoding.
562 If an encoding is found, use the file-pointer
563 reader functions from now on. */
564 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
565 return error_ret(tok);
566 assert(tok->decoding_state != STATE_INIT)(__builtin_expect(!(tok->decoding_state != STATE_INIT), 0)
? __assert_rtn(__func__, "Parser/tokenizer.c", 566, "tok->decoding_state != STATE_INIT"
) : (void)0)
;
567 }
568 }
569 if (line != NULL((void *)0) && tok->lineno < 2 && !tok->read_coding_spec) {
570 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
571 return error_ret(tok);
572 }
573 }
574#ifndef PGEN
575 /* The default encoding is UTF-8, so make sure we don't have any
576 non-UTF-8 sequences in it. */
577 if (line && !tok->encoding) {
578 unsigned char *c;
579 int length;
580 for (c = (unsigned char *)line; *c; c += length)
581 if (!(length = valid_utf8(c))) {
582 badchar = *c;
583 break;
584 }
585 }
586 if (badchar) {
587 /* Need to add 1 to the line number, since this line
588 has not been counted, yet. */
589 filename = PyUnicode_DecodeFSDefaultPyUnicodeUCS2_DecodeFSDefault(tok->filename);
590 if (filename != NULL((void *)0)) {
591 PyErr_Format(PyExc_SyntaxError,
592 "Non-UTF-8 code starting with '\\x%.2x' "
593 "in file %U on line %i, "
594 "but no encoding declared; "
595 "see http://python.org/dev/peps/pep-0263/ for details",
596 badchar, filename, tok->lineno + 1);
597 Py_DECREF(filename)do { if (_Py_RefTotal-- , --((PyObject*)(filename))->ob_refcnt
!= 0) { if (((PyObject*)filename)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 597, (PyObject *)(filename)); } else _Py_Dealloc
((PyObject *)(filename)); } while (0)
;
598 }
599 return error_ret(tok);
600 }
601#endif
602 return line;
603}
604
605static int
606decoding_feof(struct tok_state *tok)
607{
608 if (tok->decoding_state != STATE_NORMAL) {
609 return feof(tok->fp);
610 } else {
611 PyObject* buf = tok->decoding_buffer;
612 if (buf == NULL((void *)0)) {
613 buf = PyObject_CallObject(tok->decoding_readline, NULL((void *)0));
614 if (buf == NULL((void *)0)) {
615 error_ret(tok);
616 return 1;
617 } else {
618 tok->decoding_buffer = buf;
619 }
620 }
621 return PyObject_LengthPyObject_Size(buf) == 0;
622 }
623}
624
625/* Fetch a byte from TOK, using the string buffer. */
626
627static int
628buf_getc(struct tok_state *tok) {
629 return Py_CHARMASK(*tok->str++)((unsigned char)((*tok->str++) & 0xff));
630}
631
632/* Unfetch a byte from TOK, using the string buffer. */
633
634static void
635buf_ungetc(int c, struct tok_state *tok) {
636 tok->str--;
637 assert(Py_CHARMASK(*tok->str) == c)(__builtin_expect(!(((unsigned char)((*tok->str) & 0xff
)) == c), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c", 637
, "Py_CHARMASK(*tok->str) == c") : (void)0)
; /* tok->cur may point to read-only segment */
638}
639
640/* Set the readline function for TOK to ENC. For the string-based
641 tokenizer, this means to just record the encoding. */
642
643static int
644buf_setreadl(struct tok_state *tok, const char* enc) {
645 tok->enc = enc;
646 return 1;
647}
648
649/* Return a UTF-8 encoding Python string object from the
650 C byte string STR, which is encoded with ENC. */
651
652static PyObject *
653translate_into_utf8(const char* str, const char* enc) {
654 PyObject *utf8;
655 PyObject* buf = PyUnicode_DecodePyUnicodeUCS2_Decode(str, strlen(str), enc, NULL((void *)0));
656 if (buf == NULL((void *)0))
657 return NULL((void *)0);
658 utf8 = PyUnicode_AsUTF8StringPyUnicodeUCS2_AsUTF8String(buf);
659 Py_DECREF(buf)do { if (_Py_RefTotal-- , --((PyObject*)(buf))->ob_refcnt !=
0) { if (((PyObject*)buf)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 659, (PyObject *)(buf)); } else _Py_Dealloc
((PyObject *)(buf)); } while (0)
;
660 return utf8;
661}
662
663
664static char *
665translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
666 int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
667 char *buf, *current;
668 char c = '\0';
669 buf = PyMem_MALLOC_PyMem_DebugMalloc(needed_length);
670 if (buf == NULL((void *)0)) {
671 tok->done = E_NOMEM15;
672 return NULL((void *)0);
673 }
674 for (current = buf; *s; s++, current++) {
675 c = *s;
676 if (skip_next_lf) {
677 skip_next_lf = 0;
678 if (c == '\n') {
679 c = *++s;
680 if (!c)
681 break;
682 }
683 }
684 if (c == '\r') {
685 skip_next_lf = 1;
686 c = '\n';
687 }
688 *current = c;
689 }
690 /* If this is exec input, add a newline to the end of the string if
691 there isn't one already. */
692 if (exec_input && c != '\n') {
693 *current = '\n';
694 current++;
695 }
696 *current = '\0';
697 final_length = current - buf + 1;
698 if (final_length < needed_length && final_length)
699 /* should never fail */
700 buf = PyMem_REALLOC_PyMem_DebugRealloc(buf, final_length);
701 return buf;
702}
703
704/* Decode a byte string STR for use as the buffer of TOK.
705 Look for encoding declarations inside STR, and record them
706 inside TOK. */
707
708static const char *
709decode_str(const char *input, int single, struct tok_state *tok)
710{
711 PyObject* utf8 = NULL((void *)0);
712 const char *str;
713 const char *s;
714 const char *newl[2] = {NULL((void *)0), NULL((void *)0)};
715 int lineno = 0;
716 tok->input = str = translate_newlines(input, single, tok);
717 if (str == NULL((void *)0))
718 return NULL((void *)0);
719 tok->enc = NULL((void *)0);
720 tok->str = str;
721 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
722 return error_ret(tok);
723 str = tok->str; /* string after BOM if any */
724 assert(str)(__builtin_expect(!(str), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c"
, 724, "str") : (void)0)
;
725 if (tok->enc != NULL((void *)0)) {
726 utf8 = translate_into_utf8(str, tok->enc);
727 if (utf8 == NULL((void *)0))
728 return error_ret(tok);
729 str = PyBytes_AsString(utf8);
730 }
731 for (s = str;; s++) {
732 if (*s == '\0') break;
733 else if (*s == '\n') {
734 assert(lineno < 2)(__builtin_expect(!(lineno < 2), 0) ? __assert_rtn(__func__
, "Parser/tokenizer.c", 734, "lineno < 2") : (void)0)
;
735 newl[lineno] = s;
736 lineno++;
737 if (lineno == 2) break;
738 }
739 }
740 tok->enc = NULL((void *)0);
741 /* need to check line 1 and 2 separately since check_coding_spec
742 assumes a single line as input */
743 if (newl[0]) {
744 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
745 return error_ret(tok);
746 if (tok->enc == NULL((void *)0) && newl[1]) {
747 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
748 tok, buf_setreadl))
749 return error_ret(tok);
750 }
751 }
752 if (tok->enc != NULL((void *)0)) {
753 assert(utf8 == NULL)(__builtin_expect(!(utf8 == ((void *)0)), 0) ? __assert_rtn(__func__
, "Parser/tokenizer.c", 753, "utf8 == NULL") : (void)0)
;
754 utf8 = translate_into_utf8(str, tok->enc);
755 if (utf8 == NULL((void *)0))
756 return error_ret(tok);
757 str = PyBytes_AS_STRING(utf8)((__builtin_expect(!(((((((PyObject*)(utf8))->ob_type))->
tp_flags & ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__
, "Parser/tokenizer.c", 757, "PyBytes_Check(utf8)") : (void)0
), (((PyBytesObject *)(utf8))->ob_sval))
;
758 }
759 assert(tok->decoding_buffer == NULL)(__builtin_expect(!(tok->decoding_buffer == ((void *)0)), 0
) ? __assert_rtn(__func__, "Parser/tokenizer.c", 759, "tok->decoding_buffer == NULL"
) : (void)0)
;
760 tok->decoding_buffer = utf8; /* CAUTION */
761 return str;
762}
763
764#endif /* PGEN */
765
766/* Set up tokenizer for string */
767
768struct tok_state *
769PyTokenizer_FromString(const char *str, int exec_input)
770{
771 struct tok_state *tok = tok_new();
772 if (tok == NULL((void *)0))
773 return NULL((void *)0);
774 str = (char *)decode_str(str, exec_input, tok);
775 if (str == NULL((void *)0)) {
776 PyTokenizer_Free(tok);
777 return NULL((void *)0);
778 }
779
780 /* XXX: constify members. */
781 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
782 return tok;
783}
784
785struct tok_state *
786PyTokenizer_FromUTF8(const char *str, int exec_input)
787{
788 struct tok_state *tok = tok_new();
789 if (tok == NULL((void *)0))
790 return NULL((void *)0);
791#ifndef PGEN
792 tok->input = str = translate_newlines(str, exec_input, tok);
793#endif
794 if (str == NULL((void *)0)) {
795 PyTokenizer_Free(tok);
796 return NULL((void *)0);
797 }
798 tok->decoding_state = STATE_RAW;
799 tok->read_coding_spec = 1;
800 tok->enc = NULL((void *)0);
801 tok->str = str;
802 tok->encoding = (char *)PyMem_MALLOC_PyMem_DebugMalloc(6);
803 if (!tok->encoding) {
804 PyTokenizer_Free(tok);
805 return NULL((void *)0);
806 }
807 strcpy(tok->encoding, "utf-8")((__builtin_object_size (tok->encoding, 0) != (size_t) -1)
? __builtin___strcpy_chk (tok->encoding, "utf-8", __builtin_object_size
(tok->encoding, 2 > 1)) : __inline_strcpy_chk (tok->
encoding, "utf-8"))
;
808
809 /* XXX: constify members. */
810 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
811 return tok;
812}
813
814/* Set up tokenizer for file */
815
816struct tok_state *
817PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
818{
819 struct tok_state *tok = tok_new();
820 if (tok == NULL((void *)0))
821 return NULL((void *)0);
822 if ((tok->buf = (char *)PyMem_MALLOC_PyMem_DebugMalloc(BUFSIZ1024)) == NULL((void *)0)) {
823 PyTokenizer_Free(tok);
824 return NULL((void *)0);
825 }
826 tok->cur = tok->inp = tok->buf;
827 tok->end = tok->buf + BUFSIZ1024;
828 tok->fp = fp;
829 tok->prompt = ps1;
830 tok->nextprompt = ps2;
831 if (enc != NULL((void *)0)) {
832 /* Must copy encoding declaration since it
833 gets copied into the parse tree. */
834 tok->encoding = PyMem_MALLOC_PyMem_DebugMalloc(strlen(enc)+1);
835 if (!tok->encoding) {
836 PyTokenizer_Free(tok);
837 return NULL((void *)0);
838 }
839 strcpy(tok->encoding, enc)((__builtin_object_size (tok->encoding, 0) != (size_t) -1)
? __builtin___strcpy_chk (tok->encoding, enc, __builtin_object_size
(tok->encoding, 2 > 1)) : __inline_strcpy_chk (tok->
encoding, enc))
;
840 tok->decoding_state = STATE_NORMAL;
841 }
842 return tok;
843}
844
845
846/* Free a tok_state structure */
847
848void
849PyTokenizer_Free(struct tok_state *tok)
850{
851 if (tok->encoding != NULL((void *)0))
852 PyMem_FREE_PyMem_DebugFree(tok->encoding);
853#ifndef PGEN
854 Py_XDECREF(tok->decoding_readline)do { if ((tok->decoding_readline) == ((void *)0)) ; else do
{ if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_readline
))->ob_refcnt != 0) { if (((PyObject*)tok->decoding_readline
)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 854, (PyObject *)(tok->decoding_readline)); } else _Py_Dealloc
((PyObject *)(tok->decoding_readline)); } while (0); } while
(0)
;
855 Py_XDECREF(tok->decoding_buffer)do { if ((tok->decoding_buffer) == ((void *)0)) ; else do {
if (_Py_RefTotal-- , --((PyObject*)(tok->decoding_buffer)
)->ob_refcnt != 0) { if (((PyObject*)tok->decoding_buffer
)->ob_refcnt < 0) _Py_NegativeRefcount("Parser/tokenizer.c"
, 855, (PyObject *)(tok->decoding_buffer)); } else _Py_Dealloc
((PyObject *)(tok->decoding_buffer)); } while (0); } while
(0)
;
856#endif
857 if (tok->fp != NULL((void *)0) && tok->buf != NULL((void *)0))
858 PyMem_FREE_PyMem_DebugFree(tok->buf);
859 if (tok->input)
860 PyMem_FREE_PyMem_DebugFree((char *)tok->input);
861 PyMem_FREE_PyMem_DebugFree(tok);
862}
863
864/* Get next char, updating state; error code goes into tok->done */
865
866static int
867tok_nextc(register struct tok_state *tok)
868{
869 for (;;) {
870 if (tok->cur != tok->inp) {
871 return Py_CHARMASK(*tok->cur++)((unsigned char)((*tok->cur++) & 0xff)); /* Fast path */
872 }
873 if (tok->done != E_OK10)
874 return EOF(-1);
875 if (tok->fp == NULL((void *)0)) {
876 char *end = strchr(tok->inp, '\n');
877 if (end != NULL((void *)0))
878 end++;
879 else {
880 end = strchr(tok->inp, '\0');
881 if (end == tok->inp) {
882 tok->done = E_EOF11;
883 return EOF(-1);
884 }
885 }
886 if (tok->start == NULL((void *)0))
887 tok->buf = tok->cur;
888 tok->line_start = tok->cur;
889 tok->lineno++;
890 tok->inp = end;
891 return Py_CHARMASK(*tok->cur++)((unsigned char)((*tok->cur++) & 0xff));
892 }
893 if (tok->prompt != NULL((void *)0)) {
894 char *newtok = PyOS_Readline(stdin__stdinp, stdout__stdoutp, tok->prompt);
895#ifndef PGEN
896 if (newtok != NULL((void *)0)) {
897 char *translated = translate_newlines(newtok, 0, tok);
898 PyMem_FREE_PyMem_DebugFree(newtok);
899 if (translated == NULL((void *)0))
900 return EOF(-1);
901 newtok = translated;
902 }
903 if (tok->encoding && newtok && *newtok) {
904 /* Recode to UTF-8 */
905 Py_ssize_t buflen;
906 const char* buf;
907 PyObject *u = translate_into_utf8(newtok, tok->encoding);
908 PyMem_FREE_PyMem_DebugFree(newtok);
909 if (!u) {
910 tok->done = E_DECODE22;
911 return EOF(-1);
912 }
913 buflen = PyBytes_GET_SIZE(u)((__builtin_expect(!(((((((PyObject*)(u))->ob_type))->tp_flags
& ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c"
, 913, "PyBytes_Check(u)") : (void)0),(((PyVarObject*)(u))->
ob_size))
;
914 buf = PyBytes_AS_STRING(u)((__builtin_expect(!(((((((PyObject*)(u))->ob_type))->tp_flags
& ((1L<<27))) != 0)), 0) ? __assert_rtn(__func__, "Parser/tokenizer.c"
, 914, "PyBytes_Check(u)") : (void)0), (((PyBytesObject *)(u)
)->ob_sval))
;
915 if (!buf) {
916 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 916, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
917 tok->done = E_DECODE22;
918 return EOF(-1);
919 }
920 newtok = PyMem_MALLOC_PyMem_DebugMalloc(buflen+1);
921 strcpy(newtok, buf)((__builtin_object_size (newtok, 0) != (size_t) -1) ? __builtin___strcpy_chk
(newtok, buf, __builtin_object_size (newtok, 2 > 1)) : __inline_strcpy_chk
(newtok, buf))
;
922 Py_DECREF(u)do { if (_Py_RefTotal-- , --((PyObject*)(u))->ob_refcnt !=
0) { if (((PyObject*)u)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 922, (PyObject *)(u)); } else _Py_Dealloc
((PyObject *)(u)); } while (0)
;
923 }
924#endif
925 if (tok->nextprompt != NULL((void *)0))
926 tok->prompt = tok->nextprompt;
927 if (newtok == NULL((void *)0))
928 tok->done = E_INTR12;
929 else if (*newtok == '\0') {
930 PyMem_FREE_PyMem_DebugFree(newtok);
931 tok->done = E_EOF11;
932 }
933 else if (tok->start != NULL((void *)0)) {
934 size_t start = tok->start - tok->buf;
935 size_t oldlen = tok->cur - tok->buf;
936 size_t newlen = oldlen + strlen(newtok);
937 char *buf = tok->buf;
938 buf = (char *)PyMem_REALLOC_PyMem_DebugRealloc(buf, newlen+1);
939 tok->lineno++;
940 if (buf == NULL((void *)0)) {
941 PyMem_FREE_PyMem_DebugFree(tok->buf);
942 tok->buf = NULL((void *)0);
943 PyMem_FREE_PyMem_DebugFree(newtok);
944 tok->done = E_NOMEM15;
945 return EOF(-1);
946 }
947 tok->buf = buf;
948 tok->cur = tok->buf + oldlen;
949 tok->line_start = tok->cur;
950 strcpy(tok->buf + oldlen, newtok)((__builtin_object_size (tok->buf + oldlen, 0) != (size_t)
-1) ? __builtin___strcpy_chk (tok->buf + oldlen, newtok, __builtin_object_size
(tok->buf + oldlen, 2 > 1)) : __inline_strcpy_chk (tok
->buf + oldlen, newtok))
;
951 PyMem_FREE_PyMem_DebugFree(newtok);
952 tok->inp = tok->buf + newlen;
953 tok->end = tok->inp + 1;
954 tok->start = tok->buf + start;
955 }
956 else {
957 tok->lineno++;
958 if (tok->buf != NULL((void *)0))
959 PyMem_FREE_PyMem_DebugFree(tok->buf);
960 tok->buf = newtok;
961 tok->line_start = tok->buf;
962 tok->cur = tok->buf;
963 tok->line_start = tok->buf;
964 tok->inp = strchr(tok->buf, '\0');
965 tok->end = tok->inp + 1;
966 }
967 }
968 else {
969 int done = 0;
970 Py_ssize_t cur = 0;
971 char *pt;
972 if (tok->start == NULL((void *)0)) {
973 if (tok->buf == NULL((void *)0)) {
974 tok->buf = (char *)
975 PyMem_MALLOC_PyMem_DebugMalloc(BUFSIZ1024);
976 if (tok->buf == NULL((void *)0)) {
977 tok->done = E_NOMEM15;
978 return EOF(-1);
979 }
980 tok->end = tok->buf + BUFSIZ1024;
981 }
982 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
983 tok) == NULL((void *)0)) {
984 tok->done = E_EOF11;
985 done = 1;
986 }
987 else {
988 tok->done = E_OK10;
989 tok->inp = strchr(tok->buf, '\0');
990 done = tok->inp[-1] == '\n';
991 }
992 }
993 else {
994 cur = tok->cur - tok->buf;
995 if (decoding_feof(tok)) {
996 tok->done = E_EOF11;
997 done = 1;
998 }
999 else
1000 tok->done = E_OK10;
1001 }
1002 tok->lineno++;
1003 /* Read until '\n' or EOF */
1004 while (!done) {
1005 Py_ssize_t curstart = tok->start == NULL((void *)0) ? -1 :
1006 tok->start - tok->buf;
1007 Py_ssize_t curvalid = tok->inp - tok->buf;
1008 Py_ssize_t newsize = curvalid + BUFSIZ1024;
1009 char *newbuf = tok->buf;
1010 newbuf = (char *)PyMem_REALLOC_PyMem_DebugRealloc(newbuf,
1011 newsize);
1012 if (newbuf == NULL((void *)0)) {
1013 tok->done = E_NOMEM15;
1014 tok->cur = tok->inp;
1015 return EOF(-1);
1016 }
1017 tok->buf = newbuf;
1018 tok->inp = tok->buf + curvalid;
1019 tok->end = tok->buf + newsize;
1020 tok->start = curstart < 0 ? NULL((void *)0) :
1021 tok->buf + curstart;
1022 if (decoding_fgets(tok->inp,
1023 (int)(tok->end - tok->inp),
1024 tok) == NULL((void *)0)) {
1025 /* Break out early on decoding
1026 errors, as tok->buf will be NULL
1027 */
1028 if (tok->decoding_erred)
1029 return EOF(-1);
1030 /* Last line does not end in \n,
1031 fake one */
1032 strcpy(tok->inp, "\n")((__builtin_object_size (tok->inp, 0) != (size_t) -1) ? __builtin___strcpy_chk
(tok->inp, "\n", __builtin_object_size (tok->inp, 2 >
1)) : __inline_strcpy_chk (tok->inp, "\n"))
;
1033 }
1034 tok->inp = strchr(tok->inp, '\0');
1035 done = tok->inp[-1] == '\n';
1036 }
1037 if (tok->buf != NULL((void *)0)) {
1038 tok->cur = tok->buf + cur;
1039 tok->line_start = tok->cur;
1040 /* replace "\r\n" with "\n" */
1041 /* For Mac leave the \r, giving a syntax error */
1042 pt = tok->inp - 2;
1043 if (pt >= tok->buf && *pt == '\r') {
1044 *pt++ = '\n';
1045 *pt = '\0';
1046 tok->inp = pt;
1047 }
1048 }
1049 }
1050 if (tok->done != E_OK10) {
1051 if (tok->prompt != NULL((void *)0))
1052 PySys_WriteStderr("\n");
1053 tok->cur = tok->inp;
1054 return EOF(-1);
1055 }
1056 }
1057 /*NOTREACHED*/
1058}
1059
1060
1061/* Back-up one character */
1062
1063static void
1064tok_backup(register struct tok_state *tok, register int c)
1065{
1066 if (c != EOF(-1)) {
1067 if (--tok->cur < tok->buf)
1068 Py_FatalError("tok_backup: beginning of buffer");
1069 if (*tok->cur != c)
1070 *tok->cur = c;
1071 }
1072}
1073
1074
1075/* Return the token corresponding to a single character */
1076
1077int
1078PyToken_OneChar(int c)
1079{
1080 switch (c) {
1081 case '(': return LPAR7;
1082 case ')': return RPAR8;
1083 case '[': return LSQB9;
1084 case ']': return RSQB10;
1085 case ':': return COLON11;
1086 case ',': return COMMA12;
1087 case ';': return SEMI13;
1088 case '+': return PLUS14;
1089 case '-': return MINUS15;
1090 case '*': return STAR16;
1091 case '/': return SLASH17;
1092 case '|': return VBAR18;
1093 case '&': return AMPER19;
1094 case '<': return LESS20;
1095 case '>': return GREATER21;
1096 case '=': return EQUAL22;
1097 case '.': return DOT23;
1098 case '%': return PERCENT24;
1099 case '{': return LBRACE26;
1100 case '}': return RBRACE27;
1101 case '^': return CIRCUMFLEX33;
1102 case '~': return TILDE32;
1103 case '@': return AT50;
1104 default: return OP53;
1105 }
1106}
1107
1108
1109int
1110PyToken_TwoChars(int c1, int c2)
1111{
1112 switch (c1) {
1113 case '=':
1114 switch (c2) {
1115 case '=': return EQEQUAL28;
1116 }
1117 break;
1118 case '!':
1119 switch (c2) {
1120 case '=': return NOTEQUAL29;
1121 }
1122 break;
1123 case '<':
1124 switch (c2) {
1125 case '>': return NOTEQUAL29;
1126 case '=': return LESSEQUAL30;
1127 case '<': return LEFTSHIFT34;
1128 }
1129 break;
1130 case '>':
1131 switch (c2) {
1132 case '=': return GREATEREQUAL31;
1133 case '>': return RIGHTSHIFT35;
1134 }
1135 break;
1136 case '+':
1137 switch (c2) {
1138 case '=': return PLUSEQUAL37;
1139 }
1140 break;
1141 case '-':
1142 switch (c2) {
1143 case '=': return MINEQUAL38;
1144 case '>': return RARROW51;
1145 }
1146 break;
1147 case '*':
1148 switch (c2) {
1149 case '*': return DOUBLESTAR36;
1150 case '=': return STAREQUAL39;
1151 }
1152 break;
1153 case '/':
1154 switch (c2) {
1155 case '/': return DOUBLESLASH48;
1156 case '=': return SLASHEQUAL40;
1157 }
1158 break;
1159 case '|':
1160 switch (c2) {
1161 case '=': return VBAREQUAL43;
1162 }
1163 break;
1164 case '%':
1165 switch (c2) {
1166 case '=': return PERCENTEQUAL41;
1167 }
1168 break;
1169 case '&':
1170 switch (c2) {
1171 case '=': return AMPEREQUAL42;
1172 }
1173 break;
1174 case '^':
1175 switch (c2) {
1176 case '=': return CIRCUMFLEXEQUAL44;
1177 }
1178 break;
1179 }
1180 return OP53;
1181}
1182
1183int
1184PyToken_ThreeChars(int c1, int c2, int c3)
1185{
1186 switch (c1) {
1187 case '<':
1188 switch (c2) {
1189 case '<':
1190 switch (c3) {
1191 case '=':
1192 return LEFTSHIFTEQUAL45;
1193 }
1194 break;
1195 }
1196 break;
1197 case '>':
1198 switch (c2) {
1199 case '>':
1200 switch (c3) {
1201 case '=':
1202 return RIGHTSHIFTEQUAL46;
1203 }
1204 break;
1205 }
1206 break;
1207 case '*':
1208 switch (c2) {
1209 case '*':
1210 switch (c3) {
1211 case '=':
1212 return DOUBLESTAREQUAL47;
1213 }
1214 break;
1215 }
1216 break;
1217 case '/':
1218 switch (c2) {
1219 case '/':
1220 switch (c3) {
1221 case '=':
1222 return DOUBLESLASHEQUAL49;
1223 }
1224 break;
1225 }
1226 break;
1227 case '.':
1228 switch (c2) {
1229 case '.':
1230 switch (c3) {
1231 case '.':
1232 return ELLIPSIS52;
1233 }
1234 break;
1235 }
1236 break;
1237 }
1238 return OP53;
1239}
1240
1241static int
1242indenterror(struct tok_state *tok)
1243{
1244 if (tok->alterror) {
1245 tok->done = E_TABSPACE18;
1246 tok->cur = tok->inp;
1247 return 1;
1248 }
1249 if (tok->altwarning) {
1250 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1251 "in indentation\n", tok->filename);
1252 tok->altwarning = 0;
1253 }
1254 return 0;
1255}
1256
1257#ifdef PGEN
1258#define verify_identifier(tok) 1
1259#else
1260/* Verify that the identifier follows PEP 3131. */
1261static int
1262verify_identifier(struct tok_state *tok)
1263{
1264 PyObject *s;
1265 int result;
1266 s = PyUnicode_DecodeUTF8PyUnicodeUCS2_DecodeUTF8(tok->start, tok->cur - tok->start, NULL((void *)0));
1267 if (s == NULL((void *)0)) {
1268 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
1269 PyErr_Clear();
1270 tok->done = E_IDENTIFIER26;
1271 } else {
1272 tok->done = E_ERROR17;
1273 }
1274 return 0;
1275 }
1276 result = PyUnicode_IsIdentifierPyUnicodeUCS2_IsIdentifier(s);
1277 Py_DECREF(s)do { if (_Py_RefTotal-- , --((PyObject*)(s))->ob_refcnt !=
0) { if (((PyObject*)s)->ob_refcnt < 0) _Py_NegativeRefcount
("Parser/tokenizer.c", 1277, (PyObject *)(s)); } else _Py_Dealloc
((PyObject *)(s)); } while (0)
;
1278 if (result == 0)
1279 tok->done = E_IDENTIFIER26;
1280 return result;
1281}
1282#endif
1283
1284/* Get next token, after space stripping etc. */
1285
1286static int
1287tok_get(register struct tok_state *tok, char **p_start, char **p_end)
1288{
1289 register int c;
1290 int blankline, nonascii;
1291
1292 *p_start = *p_end = NULL((void *)0);
1293 nextline:
1294 tok->start = NULL((void *)0);
1295 blankline = 0;
1296
1297 /* Get indentation level */
1298 if (tok->atbol) {
1299 register int col = 0;
1300 register int altcol = 0;
1301 tok->atbol = 0;
1302 for (;;) {
1303 c = tok_nextc(tok);
1304 if (c == ' ')
1305 col++, altcol++;
1306 else if (c == '\t') {
1307 col = (col/tok->tabsize + 1) * tok->tabsize;
1308 altcol = (altcol/tok->alttabsize + 1)
1309 * tok->alttabsize;
1310 }
1311 else if (c == '\014') /* Control-L (formfeed) */
1312 col = altcol = 0; /* For Emacs users */
1313 else
1314 break;
1315 }
1316 tok_backup(tok, c);
1317 if (c == '#' || c == '\n') {
1318 /* Lines with only whitespace and/or comments
1319 shouldn't affect the indentation and are
1320 not passed to the parser as NEWLINE tokens,
1321 except *totally* empty lines in interactive
1322 mode, which signal the end of a command group. */
1323 if (col == 0 && c == '\n' && tok->prompt != NULL((void *)0))
1324 blankline = 0; /* Let it through */
1325 else
1326 blankline = 1; /* Ignore completely */
1327 /* We can't jump back right here since we still
1328 may need to skip to the end of a comment */
1329 }
1330 if (!blankline && tok->level == 0) {
1331 if (col == tok->indstack[tok->indent]) {
1332 /* No change */
1333 if (altcol != tok->altindstack[tok->indent]) {
1334 if (indenterror(tok))
1335 return ERRORTOKEN54;
1336 }
1337 }
1338 else if (col > tok->indstack[tok->indent]) {
1339 /* Indent -- always one */
1340 if (tok->indent+1 >= MAXINDENT100) {
1341 tok->done = E_TOODEEP20;
1342 tok->cur = tok->inp;
1343 return ERRORTOKEN54;
1344 }
1345 if (altcol <= tok->altindstack[tok->indent]) {
1346 if (indenterror(tok))
1347 return ERRORTOKEN54;
1348 }
1349 tok->pendin++;
1350 tok->indstack[++tok->indent] = col;
1351 tok->altindstack[tok->indent] = altcol;
1352 }
1353 else /* col < tok->indstack[tok->indent] */ {
1354 /* Dedent -- any number, must be consistent */
1355 while (tok->indent > 0 &&
1356 col < tok->indstack[tok->indent]) {
1357 tok->pendin--;
1358 tok->indent--;
1359 }
1360 if (col != tok->indstack[tok->indent]) {
1361 tok->done = E_DEDENT21;
1362 tok->cur = tok->inp;
1363 return ERRORTOKEN54;
1364 }
1365 if (altcol != tok->altindstack[tok->indent]) {
1366 if (indenterror(tok))
1367 return ERRORTOKEN54;
1368 }
1369 }
1370 }
1371 }
1372
1373 tok->start = tok->cur;
1374
1375 /* Return pending indents/dedents */
1376 if (tok->pendin != 0) {
1377 if (tok->pendin < 0) {
1378 tok->pendin++;
1379 return DEDENT6;
1380 }
1381 else {
1382 tok->pendin--;
1383 return INDENT5;
1384 }
1385 }
1386
1387 again:
1388 tok->start = NULL((void *)0);
1389 /* Skip spaces */
1390 do {
1391 c = tok_nextc(tok);
1392 } while (c == ' ' || c == '\t' || c == '\014');
1393
1394 /* Set start of current token */
1395 tok->start = tok->cur - 1;
1396
1397 /* Skip comment */
1398 if (c == '#')
1399 while (c != EOF(-1) && c != '\n')
1400 c = tok_nextc(tok);
1401
1402 /* Check for EOF and errors now */
1403 if (c == EOF(-1)) {
1404 return tok->done == E_EOF11 ? ENDMARKER0 : ERRORTOKEN54;
1405 }
1406
1407 /* Identifier (most frequent token!) */
1408 nonascii = 0;
1409 if (is_potential_identifier_start(c)( (c >= 'a' && c <= 'z') || (c >= 'A' &&
c <= 'Z') || c == '_' || (c >= 128))
) {
1410 /* Process b"", r"" and br"" */
1411 if (c == 'b' || c == 'B') {
1412 c = tok_nextc(tok);
1413 if (c == '"' || c == '\'')
1414 goto letter_quote;
1415 }
1416 if (c == 'r' || c == 'R') {
1417 c = tok_nextc(tok);
1418 if (c == '"' || c == '\'')
1419 goto letter_quote;
1420 }
1421 while (is_potential_identifier_char(c)( (c >= 'a' && c <= 'z') || (c >= 'A' &&
c <= 'Z') || (c >= '0' && c <= '9') || c ==
'_' || (c >= 128))
) {
1422 if (c >= 128)
1423 nonascii = 1;
1424 c = tok_nextc(tok);
1425 }
1426 tok_backup(tok, c);
1427 if (nonascii &&
1428 !verify_identifier(tok)) {
1429 tok->done = E_IDENTIFIER26;
1430 return ERRORTOKEN54;
1431 }
1432 *p_start = tok->start;
1433 *p_end = tok->cur;
1434 return NAME1;
1435 }
1436
1437 /* Newline */
1438 if (c == '\n') {
1439 tok->atbol = 1;
1440 if (blankline || tok->level > 0)
1441 goto nextline;
1442 *p_start = tok->start;
1443 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1444 tok->cont_line = 0;
1445 return NEWLINE4;
1446 }
1447
1448 /* Period or number starting with period? */
1449 if (c == '.') {
1450 c = tok_nextc(tok);
1451 if (isdigit(c)) {
1452 goto fraction;
1453 } else if (c == '.') {
1454 c = tok_nextc(tok);
1455 if (c == '.') {
1456 *p_start = tok->start;
1457 *p_end = tok->cur;
1458 return ELLIPSIS52;
1459 } else {
1460 tok_backup(tok, c);
1461 }
1462 tok_backup(tok, '.');
1463 } else {
1464 tok_backup(tok, c);
1465 }
1466 *p_start = tok->start;
1467 *p_end = tok->cur;
1468 return DOT23;
1469 }
1470
1471 /* Number */
1472 if (isdigit(c)) {
1473 if (c == '0') {
1474 /* Hex, octal or binary -- maybe. */
1475 c = tok_nextc(tok);
1476 if (c == '.')
1477 goto fraction;
1478 if (c == 'j' || c == 'J')
1479 goto imaginary;
1480 if (c == 'x' || c == 'X') {
1481
1482 /* Hex */
1483 c = tok_nextc(tok);
1484 if (!isxdigit(c)) {
1485 tok->done = E_TOKEN13;
1486 tok_backup(tok, c);
1487 return ERRORTOKEN54;
1488 }
1489 do {
1490 c = tok_nextc(tok);
1491 } while (isxdigit(c));
1492 }
1493 else if (c == 'o' || c == 'O') {
1494 /* Octal */
1495 c = tok_nextc(tok);
1496 if (c < '0' || c >= '8') {
1497 tok->done = E_TOKEN13;
1498 tok_backup(tok, c);
1499 return ERRORTOKEN54;
1500 }
1501 do {
1502 c = tok_nextc(tok);
1503 } while ('0' <= c && c < '8');
1504 }
1505 else if (c == 'b' || c == 'B') {
1506 /* Binary */
1507 c = tok_nextc(tok);
1508 if (c != '0' && c != '1') {
1509 tok->done = E_TOKEN13;
1510 tok_backup(tok, c);
1511 return ERRORTOKEN54;
1512 }
1513 do {
1514 c = tok_nextc(tok);
1515 } while (c == '0' || c == '1');
1516 }
1517 else {
1518 int nonzero = 0;
1519 /* maybe old-style octal; c is first char of it */
1520 /* in any case, allow '0' as a literal */
1521 while (c == '0')
1522 c = tok_nextc(tok);
1523 while (isdigit(c)) {
1524 nonzero = 1;
1525 c = tok_nextc(tok);
1526 }
1527 if (c == '.')
1528 goto fraction;
1529 else if (c == 'e' || c == 'E')
1530 goto exponent;
1531 else if (c == 'j' || c == 'J')
1532 goto imaginary;
1533 else if (nonzero) {
1534 tok->done = E_TOKEN13;
1535 tok_backup(tok, c);
1536 return ERRORTOKEN54;
1537 }
1538 }
1539 }
1540 else {
1541 /* Decimal */
1542 do {
1543 c = tok_nextc(tok);
1544 } while (isdigit(c));
1545 {
1546 /* Accept floating point numbers. */
1547 if (c == '.') {
1548 fraction:
1549 /* Fraction */
1550 do {
1551 c = tok_nextc(tok);
1552 } while (isdigit(c));
1553 }
1554 if (c == 'e' || c == 'E') {
1555 exponent:
1556 /* Exponent part */
1557 c = tok_nextc(tok);
1558 if (c == '+' || c == '-')
1559 c = tok_nextc(tok);
1560 if (!isdigit(c)) {
1561 tok->done = E_TOKEN13;
1562 tok_backup(tok, c);
1563 return ERRORTOKEN54;
1564 }
1565 do {
1566 c = tok_nextc(tok);
1567 } while (isdigit(c));
1568 }
1569 if (c == 'j' || c == 'J')
1570 /* Imaginary part */
1571 imaginary:
1572 c = tok_nextc(tok);
1573 }
1574 }
1575 tok_backup(tok, c);
1576 *p_start = tok->start;
1577 *p_end = tok->cur;
1578 return NUMBER2;
1579 }
1580
1581 letter_quote:
1582 /* String */
1583 if (c == '\'' || c == '"') {
1584 int quote = c;
1585 int quote_size = 1; /* 1 or 3 */
1586 int end_quote_size = 0;
1587
1588 /* Find the quote size and start of string */
1589 c = tok_nextc(tok);
1590 if (c == quote) {
1591 c = tok_nextc(tok);
1592 if (c == quote)
1593 quote_size = 3;
1594 else
1595 end_quote_size = 1; /* empty string found */
1596 }
1597 if (c != quote)
1598 tok_backup(tok, c);
1599
1600 /* Get rest of string */
1601 while (end_quote_size != quote_size) {
1602 c = tok_nextc(tok);
1603 if (c == EOF(-1)) {
1604 if (quote_size == 3)
1605 tok->done = E_EOFS23;
1606 else
1607 tok->done = E_EOLS24;
1608 tok->cur = tok->inp;
1609 return ERRORTOKEN54;
1610 }
1611 if (quote_size == 1 && c == '\n') {
1612 tok->done = E_EOLS24;
1613 tok->cur = tok->inp;
1614 return ERRORTOKEN54;
1615 }
1616 if (c == quote)
1617 end_quote_size += 1;
1618 else {
1619 end_quote_size = 0;
1620 if (c == '\\')
1621 c = tok_nextc(tok); /* skip escaped char */
Value stored to 'c' is never read
1622 }
1623 }
1624
1625 *p_start = tok->start;
1626 *p_end = tok->cur;
1627 return STRING3;
1628 }
1629
1630 /* Line continuation */
1631 if (c == '\\') {
1632 c = tok_nextc(tok);
1633 if (c != '\n') {
1634 tok->done = E_LINECONT25;
1635 tok->cur = tok->inp;
1636 return ERRORTOKEN54;
1637 }
1638 tok->cont_line = 1;
1639 goto again; /* Read next line */
1640 }
1641
1642 /* Check for two-character token */
1643 {
1644 int c2 = tok_nextc(tok);
1645 int token = PyToken_TwoChars(c, c2);
1646 if (token != OP53) {
1647 int c3 = tok_nextc(tok);
1648 int token3 = PyToken_ThreeChars(c, c2, c3);
1649 if (token3 != OP53) {
1650 token = token3;
1651 } else {
1652 tok_backup(tok, c3);
1653 }
1654 *p_start = tok->start;
1655 *p_end = tok->cur;
1656 return token;
1657 }
1658 tok_backup(tok, c2);
1659 }
1660
1661 /* Keep track of parentheses nesting level */
1662 switch (c) {
1663 case '(':
1664 case '[':
1665 case '{':
1666 tok->level++;
1667 break;
1668 case ')':
1669 case ']':
1670 case '}':
1671 tok->level--;
1672 break;
1673 }
1674
1675 /* Punctuation character */
1676 *p_start = tok->start;
1677 *p_end = tok->cur;
1678 return PyToken_OneChar(c);
1679}
1680
1681int
1682PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1683{
1684 int result = tok_get(tok, p_start, p_end);
1685 if (tok->decoding_erred) {
1686 result = ERRORTOKEN54;
1687 tok->done = E_DECODE22;
1688 }
1689 return result;
1690}
1691
1692/* Get -*- encoding -*- from a Python file.
1693
1694 PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
1695 the first or second line of the file (in which case the encoding
1696 should be assumed to be PyUnicode_GetDefaultEncoding()).
1697
1698 The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
1699 by the caller.
1700*/
1701char *
1702PyTokenizer_FindEncoding(int fd)
1703{
1704 struct tok_state *tok;
1705 FILE *fp;
1706 char *p_start =NULL((void *)0) , *p_end =NULL((void *)0) , *encoding = NULL((void *)0);
1707
1708 fd = dup(fd);
1709 if (fd < 0) {
1710 return NULL((void *)0);
1711 }
1712 fp = fdopen(fd, "r");
1713 if (fp == NULL((void *)0)) {
1714 return NULL((void *)0);
1715 }
1716 tok = PyTokenizer_FromFile(fp, NULL((void *)0), NULL((void *)0), NULL((void *)0));
1717 if (tok == NULL((void *)0)) {
1718 fclose(fp);
1719 return NULL((void *)0);
1720 }
1721 while (tok->lineno < 2 && tok->done == E_OK10) {
1722 PyTokenizer_Get(tok, &p_start, &p_end);
1723 }
1724 fclose(fp);
1725 if (tok->encoding) {
1726 encoding = (char *)PyMem_MALLOC_PyMem_DebugMalloc(strlen(tok->encoding) + 1);
1727 if (encoding)
1728 strcpy(encoding, tok->encoding)((__builtin_object_size (encoding, 0) != (size_t) -1) ? __builtin___strcpy_chk
(encoding, tok->encoding, __builtin_object_size (encoding
, 2 > 1)) : __inline_strcpy_chk (encoding, tok->encoding
))
;
1729 }
1730 PyTokenizer_Free(tok);
1731 return encoding;
1732}
1733
1734#ifdef Py_DEBUG1
1735
1736void
1737tok_dump_Py_tok_dump(int type, char *start, char *end)
1738{
1739 printf("%s", _PyParser_TokenNames[type]);
1740 if (type == NAME1 || type == NUMBER2 || type == STRING3 || type == OP53)
1741 printf("(%.*s)", (int)(end - start), start);
1742}
1743
1744#endif