|
1 |
|
2 /* Parser-tokenizer link implementation */ |
|
3 |
|
4 #include "pgenheaders.h" |
|
5 #include "tokenizer.h" |
|
6 #include "node.h" |
|
7 #include "grammar.h" |
|
8 #include "parser.h" |
|
9 #include "parsetok.h" |
|
10 #include "errcode.h" |
|
11 #include "graminit.h" |
|
12 |
|
13 int Py_TabcheckFlag; |
|
14 |
|
15 |
|
16 /* Forward */ |
|
17 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); |
|
18 static void initerr(perrdetail *err_ret, const char* filename); |
|
19 |
|
20 /* Parse input coming from a string. Return error code, print some errors. */ |
|
21 node * |
|
22 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) |
|
23 { |
|
24 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); |
|
25 } |
|
26 |
|
27 node * |
|
28 PyParser_ParseStringFlags(const char *s, grammar *g, int start, |
|
29 perrdetail *err_ret, int flags) |
|
30 { |
|
31 return PyParser_ParseStringFlagsFilename(s, NULL, |
|
32 g, start, err_ret, flags); |
|
33 } |
|
34 |
|
35 node * |
|
36 PyParser_ParseStringFlagsFilename(const char *s, const char *filename, |
|
37 grammar *g, int start, |
|
38 perrdetail *err_ret, int flags) |
|
39 { |
|
40 int iflags = flags; |
|
41 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, |
|
42 err_ret, &iflags); |
|
43 } |
|
44 |
|
45 node * |
|
46 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, |
|
47 grammar *g, int start, |
|
48 perrdetail *err_ret, int *flags) |
|
49 { |
|
50 struct tok_state *tok; |
|
51 |
|
52 initerr(err_ret, filename); |
|
53 |
|
54 if ((tok = PyTokenizer_FromString(s)) == NULL) { |
|
55 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; |
|
56 return NULL; |
|
57 } |
|
58 |
|
59 tok->filename = filename ? filename : "<string>"; |
|
60 if (Py_TabcheckFlag || Py_VerboseFlag) { |
|
61 tok->altwarning = (tok->filename != NULL); |
|
62 if (Py_TabcheckFlag >= 2) |
|
63 tok->alterror++; |
|
64 } |
|
65 |
|
66 return parsetok(tok, g, start, err_ret, flags); |
|
67 } |
|
68 |
|
69 /* Parse input coming from a file. Return error code, print some errors. */ |
|
70 |
|
71 node * |
|
72 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, |
|
73 char *ps1, char *ps2, perrdetail *err_ret) |
|
74 { |
|
75 return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, |
|
76 err_ret, 0); |
|
77 } |
|
78 |
|
79 node * |
|
80 PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, |
|
81 char *ps1, char *ps2, perrdetail *err_ret, int flags) |
|
82 { |
|
83 int iflags = flags; |
|
84 return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); |
|
85 } |
|
86 |
|
87 node * |
|
88 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, |
|
89 char *ps1, char *ps2, perrdetail *err_ret, int *flags) |
|
90 { |
|
91 struct tok_state *tok; |
|
92 |
|
93 initerr(err_ret, filename); |
|
94 |
|
95 if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { |
|
96 err_ret->error = E_NOMEM; |
|
97 return NULL; |
|
98 } |
|
99 tok->filename = filename; |
|
100 if (Py_TabcheckFlag || Py_VerboseFlag) { |
|
101 tok->altwarning = (filename != NULL); |
|
102 if (Py_TabcheckFlag >= 2) |
|
103 tok->alterror++; |
|
104 } |
|
105 |
|
106 return parsetok(tok, g, start, err_ret, flags); |
|
107 } |
|
108 |
|
109 #if 0 |
|
110 static char with_msg[] = |
|
111 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; |
|
112 |
|
113 static char as_msg[] = |
|
114 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; |
|
115 |
|
116 static void |
|
117 warn(const char *msg, const char *filename, int lineno) |
|
118 { |
|
119 if (filename == NULL) |
|
120 filename = "<string>"; |
|
121 PySys_WriteStderr(msg, filename, lineno); |
|
122 } |
|
123 #endif |
|
124 |
|
125 /* Parse input coming from the given tokenizer structure. |
|
126 Return error code. */ |
|
127 |
|
128 static node * |
|
129 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, |
|
130 int *flags) |
|
131 { |
|
132 parser_state *ps; |
|
133 node *n; |
|
134 int started = 0, handling_import = 0, handling_with = 0; |
|
135 |
|
136 if ((ps = PyParser_New(g, start)) == NULL) { |
|
137 fprintf(stderr, "no mem for new parser\n"); |
|
138 err_ret->error = E_NOMEM; |
|
139 PyTokenizer_Free(tok); |
|
140 return NULL; |
|
141 } |
|
142 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
|
143 if (*flags & PyPARSE_PRINT_IS_FUNCTION) { |
|
144 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; |
|
145 } |
|
146 if (*flags & PyPARSE_UNICODE_LITERALS) { |
|
147 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; |
|
148 } |
|
149 |
|
150 #endif |
|
151 |
|
152 for (;;) { |
|
153 char *a, *b; |
|
154 int type; |
|
155 size_t len; |
|
156 char *str; |
|
157 int col_offset; |
|
158 |
|
159 type = PyTokenizer_Get(tok, &a, &b); |
|
160 if (type == ERRORTOKEN) { |
|
161 err_ret->error = tok->done; |
|
162 break; |
|
163 } |
|
164 if (type == ENDMARKER && started) { |
|
165 type = NEWLINE; /* Add an extra newline */ |
|
166 handling_with = handling_import = 0; |
|
167 started = 0; |
|
168 /* Add the right number of dedent tokens, |
|
169 except if a certain flag is given -- |
|
170 codeop.py uses this. */ |
|
171 if (tok->indent && |
|
172 !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) |
|
173 { |
|
174 tok->pendin = -tok->indent; |
|
175 tok->indent = 0; |
|
176 } |
|
177 } |
|
178 else |
|
179 started = 1; |
|
180 len = b - a; /* XXX this may compute NULL - NULL */ |
|
181 str = (char *) PyObject_MALLOC(len + 1); |
|
182 if (str == NULL) { |
|
183 fprintf(stderr, "no mem for next token\n"); |
|
184 err_ret->error = E_NOMEM; |
|
185 break; |
|
186 } |
|
187 if (len > 0) |
|
188 strncpy(str, a, len); |
|
189 str[len] = '\0'; |
|
190 |
|
191 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
|
192 #endif |
|
193 if (a >= tok->line_start) |
|
194 col_offset = a - tok->line_start; |
|
195 else |
|
196 col_offset = -1; |
|
197 |
|
198 if ((err_ret->error = |
|
199 PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, |
|
200 &(err_ret->expected))) != E_OK) { |
|
201 if (err_ret->error != E_DONE) { |
|
202 PyObject_FREE(str); |
|
203 err_ret->token = type; |
|
204 } |
|
205 break; |
|
206 } |
|
207 } |
|
208 |
|
209 if (err_ret->error == E_DONE) { |
|
210 n = ps->p_tree; |
|
211 ps->p_tree = NULL; |
|
212 } |
|
213 else |
|
214 n = NULL; |
|
215 |
|
216 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD |
|
217 *flags = ps->p_flags; |
|
218 #endif |
|
219 PyParser_Delete(ps); |
|
220 |
|
221 if (n == NULL) { |
|
222 if (tok->lineno <= 1 && tok->done == E_EOF) |
|
223 err_ret->error = E_EOF; |
|
224 err_ret->lineno = tok->lineno; |
|
225 if (tok->buf != NULL) { |
|
226 char *text = NULL; |
|
227 size_t len; |
|
228 assert(tok->cur - tok->buf < INT_MAX); |
|
229 err_ret->offset = (int)(tok->cur - tok->buf); |
|
230 len = tok->inp - tok->buf; |
|
231 #ifdef Py_USING_UNICODE |
|
232 text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); |
|
233 |
|
234 #endif |
|
235 if (text == NULL) { |
|
236 text = (char *) PyObject_MALLOC(len + 1); |
|
237 if (text != NULL) { |
|
238 if (len > 0) |
|
239 strncpy(text, tok->buf, len); |
|
240 text[len] = '\0'; |
|
241 } |
|
242 } |
|
243 err_ret->text = text; |
|
244 } |
|
245 } else if (tok->encoding != NULL) { |
|
246 node* r = PyNode_New(encoding_decl); |
|
247 if (!r) { |
|
248 err_ret->error = E_NOMEM; |
|
249 n = NULL; |
|
250 goto done; |
|
251 } |
|
252 r->n_str = tok->encoding; |
|
253 r->n_nchildren = 1; |
|
254 r->n_child = n; |
|
255 tok->encoding = NULL; |
|
256 n = r; |
|
257 } |
|
258 |
|
259 done: |
|
260 PyTokenizer_Free(tok); |
|
261 |
|
262 return n; |
|
263 } |
|
264 |
|
265 static void |
|
266 initerr(perrdetail *err_ret, const char *filename) |
|
267 { |
|
268 err_ret->error = E_OK; |
|
269 err_ret->filename = filename; |
|
270 err_ret->lineno = 0; |
|
271 err_ret->offset = 0; |
|
272 err_ret->text = NULL; |
|
273 err_ret->token = -1; |
|
274 err_ret->expected = -1; |
|
275 } |