Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(15)

Side by Side Diff: Include/tokenizer.h

Issue 3353: make built-in tokenizer available via Python C API
Patch Set: Created 5 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | Makefile.pre.in » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #ifndef Py_TOKENIZER_H
2 #define Py_TOKENIZER_H
3 #ifdef __cplusplus
4 extern "C" {
5 #endif
6
7 #include "object.h"
8
9 /* Tokenizer interface */
10
11 #include "token.h" /* For token types */
12
13 #define MAXINDENT 100 /* Max indentation level */
14
15 enum decoding_state {
16 STATE_INIT,
17 STATE_RAW,
18 STATE_NORMAL /* have a codec associated with input */
19 };
20
21 /* Tokenizer state */
22 struct tok_state {
23 /* Input state; buf <= cur <= inp <= end */
24 /* NB an entire line is held in the buffer */
25 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
26 char *cur; /* Next character in buffer */
27 char *inp; /* End of data in buffer */
28 char *end; /* End of input buffer if buf != NULL */
29 char *start; /* Start of current token if not NULL */
30 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
31 /* NB If done != E_OK, cur must be == inp!!! */
32 FILE *fp; /* Rest of input; NULL if tokenizing a string */
33 int tabsize; /* Tab spacing */
34 int indent; /* Current indentation index */
35 int indstack[MAXINDENT]; /* Stack of indents */
36 int atbol; /* Nonzero if at begin of new line */
37 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
38 const char *prompt, *nextprompt; /* For interactive prompting */
39 int lineno; /* Current line number */
40 int level; /* () [] {} Parentheses nesting level */
41 /* Used to allow free continuations inside them */
42 /* Stuff for checking on different tab sizes */
43 #ifndef PGEN
44 /* pgen doesn't have access to Python codecs, it cannot decode the input
45 filename. The bytes filename might be kept, but it is only used by
46 indenterror() and it is not really needed: pgen only compiles one file
47 (Grammar/Grammar). */
48 PyObject *filename;
49 #endif
50 int altwarning; /* Issue warning if alternate tabs don't match */
51 int alterror; /* Issue error if alternate tabs don't match */
52 int alttabsize; /* Alternate tab spacing */
53 int altindstack[MAXINDENT]; /* Stack of alternate indents */
54 /* Stuff for PEP 0263 */
55 enum decoding_state decoding_state;
56 int decoding_erred; /* whether erred in decoding */
57 int read_coding_spec; /* whether 'coding:...' has been read */
58 char *encoding; /* Source encoding. */
59 int cont_line; /* whether we are in a continuation line. */
60 const char* line_start; /* pointer to start of current line */
61 #ifndef PGEN
62 PyObject *decoding_readline; /* open(...).readline */
63 PyObject *decoding_buffer;
64 #endif
65 const char* enc; /* Encoding for the current str. */
66 const char* str;
67 const char* input; /* Tokenizer's newline translated copy of the string. */
68 };
69
70 PyAPI_FUNC(struct tok_state *)PyTokenizer_FromString(const char *, int);
71 PyAPI_FUNC(struct tok_state *)PyTokenizer_FromUTF8(const char *, int);
72 PyAPI_FUNC(struct tok_state *)PyTokenizer_FromFile(FILE *, const char*,
73 const char *, const char *);
74 PyAPI_FUNC(void) PyTokenizer_Free(struct tok_state *);
75 PyAPI_FUNC(int) PyTokenizer_Get(struct tok_state *, char **, char **);
76 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
77 int len, int *offset);
78
79 #ifdef __cplusplus
80 }
81 #endif
82 #endif /* !Py_TOKENIZER_H */
OLDNEW
« no previous file with comments | « no previous file | Makefile.pre.in » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+