Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(16)

Delta Between Two Patch Sets: Include/tokenizer.h

Issue 3353: make built-in tokenizer available via Python C API
Left Patch Set: Created 4 years, 10 months ago
Right Patch Set: Created 4 years, 10 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « Include/token.h ('k') | Lib/token.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #ifndef Py_TOKENIZER_H 1 #ifndef Py_TOKENIZER_H
2 #define Py_TOKENIZER_H 2 #define Py_TOKENIZER_H
3 #ifdef __cplusplus 3 #ifdef __cplusplus
4 extern "C" { 4 extern "C" {
5 #endif 5 #endif
6 6
7 #include "object.h" 7 #include "object.h"
8 8
9 /* Tokenizer interface */ 9 /* Tokenizer interface */
10 10
11 #include "token.h" /* For token types */ 11 #include "token.h" /* For token types */
Nick Coghlan 2015/04/14 18:25:39 This suggests the token #defines will also need to
12 12
13 typedef struct PyTokenizer_State PyTokenizer_State; 13 typedef struct PyTokenizer_State PyTokenizer_State;
Nick Coghlan 2015/04/14 18:25:39 As a now public API, this should be included from
14 14
15 #ifndef Py_LIMITED_API 15 #ifndef Py_LIMITED_API
16 #define MAXINDENT 100 /* Max indentation level */ 16 #define PYTOKENIZER_MAXINDENT 100 /* Max indentation level */
Nick Coghlan 2015/04/14 18:25:39 This needs a PYTOKENIZER_* prefix.
17 17
18 enum decoding_state { 18 enum PyTokenizer_DecodingState {
19 STATE_INIT, 19 PYTOKENIZER_STATE_INIT,
20 STATE_RAW, 20 PYTOKENIZER_STATE_RAW,
21 STATE_NORMAL /* have a codec associated with input */ 21 PYTOKENIZER_STATE_NORMAL /* have a codec associated with input */
Nick Coghlan 2015/04/14 18:25:39 PYTOKENIZER_* prefix
22 }; 22 };
23 23
24 struct PyTokenizer_State { 24 struct PyTokenizer_State {
25 /* Input state; buf <= cur <= inp <= end */ 25 /* Input state; buf <= cur <= inp <= end */
26 /* NB an entire line is held in the buffer */ 26 /* NB an entire line is held in the buffer */
27 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 27 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
28 char *cur; /* Next character in buffer */ 28 char *cur; /* Next character in buffer */
29 char *inp; /* End of data in buffer */ 29 char *inp; /* End of data in buffer */
30 char *end; /* End of input buffer if buf != NULL */ 30 char *end; /* End of input buffer if buf != NULL */
31 char *start; /* Start of current token if not NULL */ 31 char *start; /* Start of current token if not NULL */
32 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 32 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
33 /* NB If done != E_OK, cur must be == inp!!! */ 33 /* NB If done != E_OK, cur must be == inp!!! */
34 FILE *fp; /* Rest of input; NULL if tokenizing a string */ 34 FILE *fp; /* Rest of input; NULL if tokenizing a string */
35 int tabsize; /* Tab spacing */ 35 int tabsize; /* Tab spacing */
36 int indent; /* Current indentation index */ 36 int indent; /* Current indentation index */
37 int indstack[MAXINDENT]; /* Stack of indents */ 37 int indstack[PYTOKENIZER_MAXINDENT]; /* Stack of indents */
38 int atbol; /* Nonzero if at begin of new line */ 38 int atbol; /* Nonzero if at begin of new line */
39 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 39 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
40 const char *prompt, *nextprompt; /* For interactive prompting */ 40 const char *prompt, *nextprompt; /* For interactive prompting */
41 int lineno; /* Current line number */ 41 int lineno; /* Current line number */
42 int level; /* () [] {} Parentheses nesting level */ 42 int level; /* () [] {} Parentheses nesting level */
43 /* Used to allow free continuations inside them */ 43 /* Used to allow free continuations inside them */
44 /* Stuff for checking on different tab sizes */ 44 /* Stuff for checking on different tab sizes */
45 #ifndef PGEN 45 #ifndef PGEN
46 /* pgen doesn't have access to Python codecs, it cannot decode the input 46 /* pgen doesn't have access to Python codecs, it cannot decode the input
47 filename. The bytes filename might be kept, but it is only used by 47 filename. The bytes filename might be kept, but it is only used by
48 indenterror() and it is not really needed: pgen only compiles one file 48 indenterror() and it is not really needed: pgen only compiles one file
49 (Grammar/Grammar). */ 49 (Grammar/Grammar). */
50 PyObject *filename; 50 PyObject *filename;
51 #endif 51 #endif
52 int altwarning; /* Issue warning if alternate tabs don't match */ 52 int altwarning; /* Issue warning if alternate tabs don't match */
53 int alterror; /* Issue error if alternate tabs don't match */ 53 int alterror; /* Issue error if alternate tabs don't match */
54 int alttabsize; /* Alternate tab spacing */ 54 int alttabsize; /* Alternate tab spacing */
55 int altindstack[MAXINDENT]; /* Stack of alternate indents */ 55 int altindstack[PYTOKENIZER_MAXINDENT]; /* Stack of alternate indent s */
56 /* Stuff for PEP 0263 */ 56 /* Stuff for PEP 0263 */
57 enum decoding_state decoding_state; 57 enum PyTokenizer_DecodingState decoding_state;
58 int decoding_erred; /* whether erred in decoding */ 58 int decoding_erred; /* whether erred in decoding */
59 int read_coding_spec; /* whether 'coding:...' has been read */ 59 int read_coding_spec; /* whether 'coding:...' has been read */
60 char *encoding; /* Source encoding. */ 60 char *encoding; /* Source encoding. */
61 int cont_line; /* whether we are in a continuation line. */ 61 int cont_line; /* whether we are in a continuation line. */
62 const char* line_start; /* pointer to start of current line */ 62 const char* line_start; /* pointer to start of current line */
63 #ifndef PGEN 63 #ifndef PGEN
64 PyObject *decoding_readline; /* open(...).readline */ 64 PyObject *decoding_readline; /* open(...).readline */
65 PyObject *decoding_buffer; 65 PyObject *decoding_buffer;
66 #endif 66 #endif
67 const char* enc; /* Encoding for the current str. */ 67 const char* enc; /* Encoding for the current str. */
68 const char* str; 68 const char* str;
69 const char* input; /* Tokenizer's newline translated copy of the string. */ 69 const char* input; /* Tokenizer's newline translated copy of the string. */
70 }; 70 };
71 #endif 71 #endif
72 72
73 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromString(const char *, int); 73 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromString(const char *, int);
74 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromUTF8(const char *, int); 74 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromUTF8(const char *, int);
75 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromFile(FILE *, const char*, 75 PyAPI_FUNC(PyTokenizer_State *)PyTokenizer_FromFile(FILE *, const char*,
76 const char *, const char *); 76 const char *, const char *);
77 PyAPI_FUNC(void) PyTokenizer_Free(PyTokenizer_State *); 77 PyAPI_FUNC(void) PyTokenizer_Free(PyTokenizer_State *);
78 PyAPI_FUNC(int) PyTokenizer_Get(PyTokenizer_State *, char **, char **); 78 PyAPI_FUNC(int) PyTokenizer_Get(PyTokenizer_State *, char **, char **);
79 79
80 #ifdef __cplusplus 80 #ifdef __cplusplus
81 } 81 }
82 #endif 82 #endif
83 #endif /* !Py_TOKENIZER_H */ 83 #endif /* !Py_TOKENIZER_H */
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+