Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(13)

Side by Side Diff: Parser/parser.c

Issue 3353: make built-in tokenizer available via Python C API
Patch Set: Created 4 years, 10 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Parser/listnode.c ('k') | Parser/parsetok.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* Parser implementation */ 2 /* Parser implementation */
3 3
4 /* For a description, see the comments at end of this file */ 4 /* For a description, see the comments at end of this file */
5 5
6 /* XXX To do: error recovery */ 6 /* XXX To do: error recovery */
7 7
8 #include "Python.h" 8 #include "Python.h"
9 #include "pgenheaders.h" 9 #include "pgenheaders.h"
10 #include "token.h" 10 #include "token.h"
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 132
133 133
134 /* PARSER PROPER */ 134 /* PARSER PROPER */
135 135
136 static int 136 static int
137 classify(parser_state *ps, int type, const char *str) 137 classify(parser_state *ps, int type, const char *str)
138 { 138 {
139 grammar *g = ps->p_grammar; 139 grammar *g = ps->p_grammar;
140 int n = g->g_ll.ll_nlabels; 140 int n = g->g_ll.ll_nlabels;
141 141
142 if (type == NAME) { 142 if (type == PYTOK_NAME) {
143 const char *s = str; 143 const char *s = str;
144 label *l = g->g_ll.ll_label; 144 label *l = g->g_ll.ll_label;
145 int i; 145 int i;
146 for (i = n; i > 0; i--, l++) { 146 for (i = n; i > 0; i--, l++) {
147 if (l->lb_type != NAME || l->lb_str == NULL || 147 if (l->lb_type != PYTOK_NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] || 148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0) 149 strcmp(l->lb_str, s) != 0)
150 continue; 150 continue;
151 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 151 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152 #if 0 152 #if 0
153 /* Leaving this in as an example */ 153 /* Leaving this in as an example */
154 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) { 154 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
155 if (s[0] == 'w' && strcmp(s, "with") == 0) 155 if (s[0] == 'w' && strcmp(s, "with") == 0)
156 break; /* not a keyword yet */ 156 break; /* not a keyword yet */
157 else if (s[0] == 'a' && strcmp(s, "as") == 0) 157 else if (s[0] == 'a' && strcmp(s, "as") == 0)
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 return; 195 return;
196 ch = CHILD(n, 0); 196 ch = CHILD(n, 0);
197 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) 197 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
198 return; 198 return;
199 ch = CHILD(n, 1); 199 ch = CHILD(n, 1);
200 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && 200 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
201 strcmp(STR(CHILD(ch, 0)), "__future__") != 0) 201 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
202 return; 202 return;
203 ch = CHILD(n, 3); 203 ch = CHILD(n, 3);
204 /* ch can be a star, a parenthesis or import_as_names */ 204 /* ch can be a star, a parenthesis or import_as_names */
205 if (TYPE(ch) == STAR) 205 if (TYPE(ch) == PYTOK_STAR)
206 return; 206 return;
207 if (TYPE(ch) == LPAR) 207 if (TYPE(ch) == PYTOK_LPAR)
208 ch = CHILD(n, 4); 208 ch = CHILD(n, 4);
209 209
210 for (i = 0; i < NCH(ch); i += 2) { 210 for (i = 0; i < NCH(ch); i += 2) {
211 cch = CHILD(ch, i); 211 cch = CHILD(ch, i);
212 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { 212 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == PYTOK_NAME) {
213 char *str_ch = STR(CHILD(cch, 0)); 213 char *str_ch = STR(CHILD(cch, 0));
214 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { 214 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
215 ps->p_flags |= CO_FUTURE_WITH_STATEMENT; 215 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
216 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { 216 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
217 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 217 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
218 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { 218 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
219 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 219 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
220 } 220 }
221 } 221 }
222 } 222 }
(...skipping 23 matching lines...) Expand all
246 246
247 D(printf(" DFA '%s', state %d:", 247 D(printf(" DFA '%s', state %d:",
248 d->d_name, ps->p_stack.s_top->s_state)); 248 d->d_name, ps->p_stack.s_top->s_state));
249 249
250 /* Check accelerator */ 250 /* Check accelerator */
251 if (s->s_lower <= ilabel && ilabel < s->s_upper) { 251 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
252 int x = s->s_accel[ilabel - s->s_lower]; 252 int x = s->s_accel[ilabel - s->s_lower];
253 if (x != -1) { 253 if (x != -1) {
254 if (x & (1<<7)) { 254 if (x & (1<<7)) {
255 /* Push non-terminal */ 255 /* Push non-terminal */
256 int nt = (x >> 8) + NT_OFFSET; 256 int nt = (x >> 8) + PYTOK_NT_OFFSET;
257 int arrow = x & ((1<<7)-1); 257 int arrow = x & ((1<<7)-1);
258 dfa *d1 = PyGrammar_FindDFA( 258 dfa *d1 = PyGrammar_FindDFA(
259 ps->p_grammar, nt); 259 ps->p_grammar, nt);
260 if ((err = push(&ps->p_stack, nt, d1, 260 if ((err = push(&ps->p_stack, nt, d1,
261 arrow, lineno, col_offset)) > 0) { 261 arrow, lineno, col_offset)) > 0) {
262 D(printf(" MemError: push\n")); 262 D(printf(" MemError: push\n"));
263 return err; 263 return err;
264 } 264 }
265 D(printf(" Push ...\n")); 265 D(printf(" Push ...\n"));
266 continue; 266 continue;
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 { 343 {
344 int i; 344 int i;
345 345
346 if (n == NULL) 346 if (n == NULL)
347 printf("NIL"); 347 printf("NIL");
348 else { 348 else {
349 label l; 349 label l;
350 l.lb_type = TYPE(n); 350 l.lb_type = TYPE(n);
351 l.lb_str = STR(n); 351 l.lb_str = STR(n);
352 printf("%s", PyGrammar_LabelRepr(&l)); 352 printf("%s", PyGrammar_LabelRepr(&l));
353 if (ISNONTERMINAL(TYPE(n))) { 353 if (PYTOK_ISNONTERMINAL(TYPE(n))) {
354 printf("("); 354 printf("(");
355 for (i = 0; i < NCH(n); i++) { 355 for (i = 0; i < NCH(n); i++) {
356 if (i > 0) 356 if (i > 0)
357 printf(","); 357 printf(",");
358 dumptree(g, CHILD(n, i)); 358 dumptree(g, CHILD(n, i));
359 } 359 }
360 printf(")"); 360 printf(")");
361 } 361 }
362 } 362 }
363 } 363 }
364 364
365 void 365 void
366 showtree(grammar *g, node *n) 366 showtree(grammar *g, node *n)
367 { 367 {
368 int i; 368 int i;
369 369
370 if (n == NULL) 370 if (n == NULL)
371 return; 371 return;
372 if (ISNONTERMINAL(TYPE(n))) { 372 if (PYTOK_ISNONTERMINAL(TYPE(n))) {
373 for (i = 0; i < NCH(n); i++) 373 for (i = 0; i < NCH(n); i++)
374 showtree(g, CHILD(n, i)); 374 showtree(g, CHILD(n, i));
375 } 375 }
376 else if (ISTERMINAL(TYPE(n))) { 376 else if (PYTOK_ISTERMINAL(TYPE(n))) {
377 printf("%s", _PyParser_TokenNames[TYPE(n)]); 377 printf("%s", _PyParser_TokenNames[TYPE(n)]);
378 if (TYPE(n) == NUMBER || TYPE(n) == NAME) 378 if (TYPE(n) == PYTOK_NUMBER || TYPE(n) == PYTOK_NAME)
379 printf("(%s)", STR(n)); 379 printf("(%s)", STR(n));
380 printf(" "); 380 printf(" ");
381 } 381 }
382 else 382 else
383 printf("? "); 383 printf("? ");
384 } 384 }
385 385
386 void 386 void
387 printtree(parser_state *ps) 387 printtree(parser_state *ps)
388 { 388 {
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
424 language description. An extended LL(1) grammar (ELL(1)) is suitable. 424 language description. An extended LL(1) grammar (ELL(1)) is suitable.
425 Certain restrictions make the parser's life easier: rules that can produce 425 Certain restrictions make the parser's life easier: rules that can produce
426 the empty string should be outlawed (there are other ways to put loops 426 the empty string should be outlawed (there are other ways to put loops
427 or optional parts in the language). To avoid the need to construct 427 or optional parts in the language). To avoid the need to construct
428 FIRST sets, we can require that all but the last alternative of a rule 428 FIRST sets, we can require that all but the last alternative of a rule
429 (really: arc going out of a DFA's state) must begin with a terminal 429 (really: arc going out of a DFA's state) must begin with a terminal
430 symbol. 430 symbol.
431 431
432 As an example, consider this grammar: 432 As an example, consider this grammar:
433 433
434 expr: term (OP term)* 434 expr: term (PYTOK_OP term)*
435 term: CONSTANT | '(' expr ')' 435 term: CONSTANT | '(' expr ')'
436 436
437 The DFA corresponding to the rule for expr is: 437 The DFA corresponding to the rule for expr is:
438 438
439 ------->.---term-->.-------> 439 ------->.---------term-->.------->
440 ^ | 440 ^ |
441 | | 441 | |
442 \----OP----/ 442 \----PYTOK_OP----/
443 443
444 The parse tree generated for the input a+b is: 444 The parse tree generated for the input a+b is:
445 445
446 (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) 446 (expr: (term: (PYTOK_NAME: a)), (PYTOK_OP: +), (term: (PYTOK_NAME: b)))
447 447
448 */ 448 */
OLDNEW
« no previous file with comments | « Parser/listnode.c ('k') | Parser/parsetok.c » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+