Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(2)

Side by Side Diff: Parser/pgen.c

Issue 3353: make built-in tokenizer available via Python C API
Patch Set: Created 4 years, 10 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Parser/parsetok.c ('k') | Parser/tokenizer.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Parser generator */ 1 /* Parser generator */
2 2
3 /* For a description, see the comments at end of this file */ 3 /* For a description, see the comments at end of this file */
4 4
5 #include "Python.h" 5 #include "Python.h"
6 #include "pgenheaders.h" 6 #include "pgenheaders.h"
7 #include "token.h" 7 #include "token.h"
8 #include "node.h" 8 #include "node.h"
9 #include "grammar.h" 9 #include "grammar.h"
10 #include "metagrammar.h" 10 #include "metagrammar.h"
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 Py_FatalError("out of mem"); 72 Py_FatalError("out of mem");
73 ar = &st->st_arc[st->st_narcs++]; 73 ar = &st->st_arc[st->st_narcs++];
74 ar->ar_label = lbl; 74 ar->ar_label = lbl;
75 ar->ar_arrow = to; 75 ar->ar_arrow = to;
76 } 76 }
77 77
78 static nfa * 78 static nfa *
79 newnfa(char *name) 79 newnfa(char *name)
80 { 80 {
81 nfa *nf; 81 nfa *nf;
82 static int type = NT_OFFSET; /* All types will be disjunct */ 82 static int type = PYTOK_NT_OFFSET; /* All types will be disjunct */
83 83
84 nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); 84 nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
85 if (nf == NULL) 85 if (nf == NULL)
86 Py_FatalError("no mem for new nfa"); 86 Py_FatalError("no mem for new nfa");
87 nf->nf_type = type++; 87 nf->nf_type = type++;
88 nf->nf_name = name; /* XXX strdup(name) ??? */ 88 nf->nf_name = name; /* XXX strdup(name) ??? */
89 nf->nf_nstates = 0; 89 nf->nf_nstates = 0;
90 nf->nf_state = NULL; 90 nf->nf_state = NULL;
91 nf->nf_start = nf->nf_finish = -1; 91 nf->nf_start = nf->nf_finish = -1;
92 return nf; 92 return nf;
(...skipping 13 matching lines...) Expand all
106 { 106 {
107 nfagrammar *gr; 107 nfagrammar *gr;
108 108
109 gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); 109 gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
110 if (gr == NULL) 110 if (gr == NULL)
111 Py_FatalError("no mem for new nfa grammar"); 111 Py_FatalError("no mem for new nfa grammar");
112 gr->gr_nnfas = 0; 112 gr->gr_nnfas = 0;
113 gr->gr_nfa = NULL; 113 gr->gr_nfa = NULL;
114 gr->gr_ll.ll_nlabels = 0; 114 gr->gr_ll.ll_nlabels = 0;
115 gr->gr_ll.ll_label = NULL; 115 gr->gr_ll.ll_label = NULL;
116 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); 116 addlabel(&gr->gr_ll, PYTOK_ENDMARKER, "EMPTY");
117 return gr; 117 return gr;
118 } 118 }
119 119
120 static nfa * 120 static nfa *
121 addnfa(nfagrammar *gr, char *name) 121 addnfa(nfagrammar *gr, char *name)
122 { 122 {
123 nfa *nf; 123 nfa *nf;
124 124
125 nf = newnfa(name); 125 nf = newnfa(name);
126 gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, 126 gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
127 sizeof(nfa*) * (gr->gr_nnfas + 1)); 127 sizeof(nfa*) * (gr->gr_nnfas + 1));
128 if (gr->gr_nfa == NULL) 128 if (gr->gr_nfa == NULL)
129 Py_FatalError("out of mem"); 129 Py_FatalError("out of mem");
130 gr->gr_nfa[gr->gr_nnfas++] = nf; 130 gr->gr_nfa[gr->gr_nnfas++] = nf;
131 addlabel(&gr->gr_ll, NAME, nf->nf_name); 131 addlabel(&gr->gr_ll, PYTOK_NAME, nf->nf_name);
132 return nf; 132 return nf;
133 } 133 }
134 134
135 #ifdef Py_DEBUG 135 #ifdef Py_DEBUG
136 136
137 static char REQNFMT[] = "metacompile: less than %d children\n"; 137 static char REQNFMT[] = "metacompile: less than %d children\n";
138 138
139 #define REQN(i, count) do { \ 139 #define REQN(i, count) do { \
140 if (i < count) { \ 140 if (i < count) { \
141 fprintf(stderr, REQNFMT, count); \ 141 fprintf(stderr, REQNFMT, count); \
142 Py_FatalError("REQN"); \ 142 Py_FatalError("REQN"); \
143 } \ 143 } \
144 } while (0) 144 } while (0)
145 145
146 #else 146 #else
147 #define REQN(i, count) /* empty */ 147 #define REQN(i, count) /* empty */
148 #endif 148 #endif
149 149
150 static nfagrammar * 150 static nfagrammar *
151 metacompile(node *n) 151 metacompile(node *n)
152 { 152 {
153 nfagrammar *gr; 153 nfagrammar *gr;
154 int i; 154 int i;
155 155
156 if (Py_DebugFlag) 156 if (Py_DebugFlag)
157 printf("Compiling (meta-) parse tree into NFA grammar\n"); 157 printf("Compiling (meta-) parse tree into NFA grammar\n");
158 gr = newnfagrammar(); 158 gr = newnfagrammar();
159 REQ(n, MSTART); 159 REQ(n, MSTART);
160 i = n->n_nchildren - 1; /* Last child is ENDMARKER */ 160 i = n->n_nchildren - 1; /* Last child is PYTOK_ENDMARKER */
161 n = n->n_child; 161 n = n->n_child;
162 for (; --i >= 0; n++) { 162 for (; --i >= 0; n++) {
163 if (n->n_type != NEWLINE) 163 if (n->n_type != PYTOK_NEWLINE)
164 compile_rule(gr, n); 164 compile_rule(gr, n);
165 } 165 }
166 return gr; 166 return gr;
167 } 167 }
168 168
169 static void 169 static void
170 compile_rule(nfagrammar *gr, node *n) 170 compile_rule(nfagrammar *gr, node *n)
171 { 171 {
172 nfa *nf; 172 nfa *nf;
173 173
174 REQ(n, RULE); 174 REQ(n, RULE);
175 REQN(n->n_nchildren, 4); 175 REQN(n->n_nchildren, 4);
176 n = n->n_child; 176 n = n->n_child;
177 REQ(n, NAME); 177 REQ(n, PYTOK_NAME);
178 nf = addnfa(gr, n->n_str); 178 nf = addnfa(gr, n->n_str);
179 n++; 179 n++;
180 REQ(n, COLON); 180 REQ(n, PYTOK_COLON);
181 n++; 181 n++;
182 REQ(n, RHS); 182 REQ(n, RHS);
183 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); 183 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
184 n++; 184 n++;
185 REQ(n, NEWLINE); 185 REQ(n, PYTOK_NEWLINE);
186 } 186 }
187 187
188 static void 188 static void
189 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 189 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
190 { 190 {
191 int i; 191 int i;
192 int a, b; 192 int a, b;
193 193
194 REQ(n, RHS); 194 REQ(n, RHS);
195 i = n->n_nchildren; 195 i = n->n_nchildren;
196 REQN(i, 1); 196 REQN(i, 1);
197 n = n->n_child; 197 n = n->n_child;
198 REQ(n, ALT); 198 REQ(n, ALT);
199 compile_alt(ll, nf, n, pa, pb); 199 compile_alt(ll, nf, n, pa, pb);
200 if (--i <= 0) 200 if (--i <= 0)
201 return; 201 return;
202 n++; 202 n++;
203 a = *pa; 203 a = *pa;
204 b = *pb; 204 b = *pb;
205 *pa = addnfastate(nf); 205 *pa = addnfastate(nf);
206 *pb = addnfastate(nf); 206 *pb = addnfastate(nf);
207 addnfaarc(nf, *pa, a, EMPTY); 207 addnfaarc(nf, *pa, a, EMPTY);
208 addnfaarc(nf, b, *pb, EMPTY); 208 addnfaarc(nf, b, *pb, EMPTY);
209 for (; --i >= 0; n++) { 209 for (; --i >= 0; n++) {
210 REQ(n, VBAR); 210 REQ(n, PYTOK_VBAR);
211 REQN(i, 1); 211 REQN(i, 1);
212 --i; 212 --i;
213 n++; 213 n++;
214 REQ(n, ALT); 214 REQ(n, ALT);
215 compile_alt(ll, nf, n, &a, &b); 215 compile_alt(ll, nf, n, &a, &b);
216 addnfaarc(nf, *pa, a, EMPTY); 216 addnfaarc(nf, *pa, a, EMPTY);
217 addnfaarc(nf, b, *pb, EMPTY); 217 addnfaarc(nf, b, *pb, EMPTY);
218 } 218 }
219 } 219 }
220 220
(...skipping 22 matching lines...) Expand all
243 static void 243 static void
244 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 244 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
245 { 245 {
246 int i; 246 int i;
247 int a, b; 247 int a, b;
248 248
249 REQ(n, ITEM); 249 REQ(n, ITEM);
250 i = n->n_nchildren; 250 i = n->n_nchildren;
251 REQN(i, 1); 251 REQN(i, 1);
252 n = n->n_child; 252 n = n->n_child;
253 if (n->n_type == LSQB) { 253 if (n->n_type == PYTOK_LSQB) {
254 REQN(i, 3); 254 REQN(i, 3);
255 n++; 255 n++;
256 REQ(n, RHS); 256 REQ(n, RHS);
257 *pa = addnfastate(nf); 257 *pa = addnfastate(nf);
258 *pb = addnfastate(nf); 258 *pb = addnfastate(nf);
259 addnfaarc(nf, *pa, *pb, EMPTY); 259 addnfaarc(nf, *pa, *pb, EMPTY);
260 compile_rhs(ll, nf, n, &a, &b); 260 compile_rhs(ll, nf, n, &a, &b);
261 addnfaarc(nf, *pa, a, EMPTY); 261 addnfaarc(nf, *pa, a, EMPTY);
262 addnfaarc(nf, b, *pb, EMPTY); 262 addnfaarc(nf, b, *pb, EMPTY);
263 REQN(i, 1); 263 REQN(i, 1);
264 n++; 264 n++;
265 REQ(n, RSQB); 265 REQ(n, PYTOK_RSQB);
266 } 266 }
267 else { 267 else {
268 compile_atom(ll, nf, n, pa, pb); 268 compile_atom(ll, nf, n, pa, pb);
269 if (--i <= 0) 269 if (--i <= 0)
270 return; 270 return;
271 n++; 271 n++;
272 addnfaarc(nf, *pb, *pa, EMPTY); 272 addnfaarc(nf, *pb, *pa, EMPTY);
273 if (n->n_type == STAR) 273 if (n->n_type == PYTOK_STAR)
274 *pb = *pa; 274 *pb = *pa;
275 else 275 else
276 REQ(n, PLUS); 276 REQ(n, PYTOK_PLUS);
277 } 277 }
278 } 278 }
279 279
280 static void 280 static void
281 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 281 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
282 { 282 {
283 int i; 283 int i;
284 284
285 REQ(n, ATOM); 285 REQ(n, ATOM);
286 i = n->n_nchildren; 286 i = n->n_nchildren;
287 (void)i; /* Don't warn about set but unused */ 287 (void)i; /* Don't warn about set but unused */
288 REQN(i, 1); 288 REQN(i, 1);
289 n = n->n_child; 289 n = n->n_child;
290 if (n->n_type == LPAR) { 290 if (n->n_type == PYTOK_LPAR) {
291 REQN(i, 3); 291 REQN(i, 3);
292 n++; 292 n++;
293 REQ(n, RHS); 293 REQ(n, RHS);
294 compile_rhs(ll, nf, n, pa, pb); 294 compile_rhs(ll, nf, n, pa, pb);
295 n++; 295 n++;
296 REQ(n, RPAR); 296 REQ(n, PYTOK_RPAR);
297 } 297 }
298 else if (n->n_type == NAME || n->n_type == STRING) { 298 else if (n->n_type == PYTOK_NAME || n->n_type == PYTOK_STRING) {
299 *pa = addnfastate(nf); 299 *pa = addnfastate(nf);
300 *pb = addnfastate(nf); 300 *pb = addnfastate(nf);
301 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); 301 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
302 } 302 }
303 else 303 else
304 REQ(n, NAME); 304 REQ(n, PYTOK_NAME);
305 } 305 }
306 306
307 static void 307 static void
308 dumpstate(labellist *ll, nfa *nf, int istate) 308 dumpstate(labellist *ll, nfa *nf, int istate)
309 { 309 {
310 nfastate *st; 310 nfastate *st;
311 int i; 311 int i;
312 nfaarc *ar; 312 nfaarc *ar;
313 313
314 printf("%c%2d%c", 314 printf("%c%2d%c",
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after
701 non-terminals are computed. 701 non-terminals are computed.
702 702
703 Reference 703 Reference
704 --------- 704 ---------
705 705
706 [Aho&Ullman 77] 706 [Aho&Ullman 77]
707 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 707 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
708 (first edition) 708 (first edition)
709 709
710 */ 710 */
OLDNEW
« no previous file with comments | « Parser/parsetok.c ('k') | Parser/tokenizer.c » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+