symbian-qemu-0.9.1-12/python-2.6.1/Parser/parser.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 
       
     2 /* Parser implementation */
       
     3 
       
     4 /* For a description, see the comments at end of this file */
       
     5 
       
     6 /* XXX To do: error recovery */
       
     7 
       
     8 #include "Python.h"
       
     9 #include "pgenheaders.h"
       
    10 #include "token.h"
       
    11 #include "grammar.h"
       
    12 #include "node.h"
       
    13 #include "parser.h"
       
    14 #include "errcode.h"
       
    15 
       
    16 
       
    17 #ifdef Py_DEBUG
       
    18 extern int Py_DebugFlag;
       
    19 #define D(x) if (!Py_DebugFlag); else x
       
    20 #else
       
    21 #define D(x)
       
    22 #endif
       
    23 
       
    24 
       
    25 /* STACK DATA TYPE */
       
    26 
       
    27 static void s_reset(stack *);
       
    28 
       
    29 static void
       
    30 s_reset(stack *s)
       
    31 {
       
    32 	s->s_top = &s->s_base[MAXSTACK];
       
    33 }
       
    34 
       
    35 #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
       
    36 
       
    37 static int
       
    38 s_push(register stack *s, dfa *d, node *parent)
       
    39 {
       
    40 	register stackentry *top;
       
    41 	if (s->s_top == s->s_base) {
       
    42 		fprintf(stderr, "s_push: parser stack overflow\n");
       
    43 		return E_NOMEM;
       
    44 	}
       
    45 	top = --s->s_top;
       
    46 	top->s_dfa = d;
       
    47 	top->s_parent = parent;
       
    48 	top->s_state = 0;
       
    49 	return 0;
       
    50 }
       
    51 
       
    52 #ifdef Py_DEBUG
       
    53 
       
    54 static void
       
    55 s_pop(register stack *s)
       
    56 {
       
    57 	if (s_empty(s))
       
    58 		Py_FatalError("s_pop: parser stack underflow -- FATAL");
       
    59 	s->s_top++;
       
    60 }
       
    61 
       
    62 #else /* !Py_DEBUG */
       
    63 
       
    64 #define s_pop(s) (s)->s_top++
       
    65 
       
    66 #endif
       
    67 
       
    68 
       
    69 /* PARSER CREATION */
       
    70 
       
    71 parser_state *
       
    72 PyParser_New(grammar *g, int start)
       
    73 {
       
    74 	parser_state *ps;
       
    75 	
       
    76 	if (!g->g_accel)
       
    77 		PyGrammar_AddAccelerators(g);
       
    78 	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
       
    79 	if (ps == NULL)
       
    80 		return NULL;
       
    81 	ps->p_grammar = g;
       
    82 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
       
    83 	ps->p_flags = 0;
       
    84 #endif
       
    85 	ps->p_tree = PyNode_New(start);
       
    86 	if (ps->p_tree == NULL) {
       
    87 		PyMem_FREE(ps);
       
    88 		return NULL;
       
    89 	}
       
    90 	s_reset(&ps->p_stack);
       
    91 	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
       
    92 	return ps;
       
    93 }
       
    94 
       
    95 void
       
    96 PyParser_Delete(parser_state *ps)
       
    97 {
       
    98 	/* NB If you want to save the parse tree,
       
    99 	   you must set p_tree to NULL before calling delparser! */
       
   100 	PyNode_Free(ps->p_tree);
       
   101 	PyMem_FREE(ps);
       
   102 }
       
   103 
       
   104 
       
   105 /* PARSER STACK OPERATIONS */
       
   106 
       
   107 static int
       
   108 shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
       
   109 {
       
   110 	int err;
       
   111 	assert(!s_empty(s));
       
   112 	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
       
   113 	if (err)
       
   114 		return err;
       
   115 	s->s_top->s_state = newstate;
       
   116 	return 0;
       
   117 }
       
   118 
       
   119 static int
       
   120 push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
       
   121 {
       
   122 	int err;
       
   123 	register node *n;
       
   124 	n = s->s_top->s_parent;
       
   125 	assert(!s_empty(s));
       
   126 	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
       
   127 	if (err)
       
   128 		return err;
       
   129 	s->s_top->s_state = newstate;
       
   130 	return s_push(s, d, CHILD(n, NCH(n)-1));
       
   131 }
       
   132 
       
   133 
       
   134 /* PARSER PROPER */
       
   135 
       
   136 static int
       
   137 classify(parser_state *ps, int type, char *str)
       
   138 {
       
   139 	grammar *g = ps->p_grammar;
       
   140 	register int n = g->g_ll.ll_nlabels;
       
   141 	
       
   142 	if (type == NAME) {
       
   143 		register char *s = str;
       
   144 		register label *l = g->g_ll.ll_label;
       
   145 		register int i;
       
   146 		for (i = n; i > 0; i--, l++) {
       
   147 			if (l->lb_type != NAME || l->lb_str == NULL ||
       
   148 			    l->lb_str[0] != s[0] ||
       
   149 			    strcmp(l->lb_str, s) != 0)
       
   150 				continue;
       
   151 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
       
   152                         if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
       
   153                             s[0] == 'p' && strcmp(s, "print") == 0) { 
       
   154                                 break; /* no longer a keyword */
       
   155                         }
       
   156 #endif
       
   157 			D(printf("It's a keyword\n"));
       
   158 			return n - i;
       
   159 		}
       
   160 	}
       
   161 	
       
   162 	{
       
   163 		register label *l = g->g_ll.ll_label;
       
   164 		register int i;
       
   165 		for (i = n; i > 0; i--, l++) {
       
   166 			if (l->lb_type == type && l->lb_str == NULL) {
       
   167 				D(printf("It's a token we know\n"));
       
   168 				return n - i;
       
   169 			}
       
   170 		}
       
   171 	}
       
   172 	
       
   173 	D(printf("Illegal token\n"));
       
   174 	return -1;
       
   175 }
       
   176 
       
   177 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
       
   178 static void
       
   179 future_hack(parser_state *ps)
       
   180 {
       
   181 	node *n = ps->p_stack.s_top->s_parent;
       
   182 	node *ch, *cch;
       
   183 	int i;
       
   184 
       
   185 	/* from __future__ import ..., must have at least 4 children */
       
   186 	n = CHILD(n, 0);
       
   187 	if (NCH(n) < 4)
       
   188 		return;
       
   189 	ch = CHILD(n, 0);
       
   190 	if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
       
   191 		return;
       
   192 	ch = CHILD(n, 1);
       
   193 	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
       
   194 	    strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
       
   195 		return;
       
   196 	ch = CHILD(n, 3);
       
   197 	/* ch can be a star, a parenthesis or import_as_names */
       
   198 	if (TYPE(ch) == STAR)
       
   199 		return;
       
   200 	if (TYPE(ch) == LPAR)
       
   201 		ch = CHILD(n, 4);
       
   202 	
       
   203 	for (i = 0; i < NCH(ch); i += 2) {
       
   204 		cch = CHILD(ch, i);
       
   205 		if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
       
   206 			char *str_ch = STR(CHILD(cch, 0));
       
   207 			if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
       
   208 				ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
       
   209 			} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
       
   210 				ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
       
   211 			} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
       
   212 				ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
       
   213 			}
       
   214 		}
       
   215 	}
       
   216 }
       
   217 #endif /* future keyword */
       
   218 
       
   219 int
       
   220 PyParser_AddToken(register parser_state *ps, register int type, char *str,
       
   221 	          int lineno, int col_offset, int *expected_ret)
       
   222 {
       
   223 	register int ilabel;
       
   224 	int err;
       
   225 	
       
   226 	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
       
   227 	
       
   228 	/* Find out which label this token is */
       
   229 	ilabel = classify(ps, type, str);
       
   230 	if (ilabel < 0)
       
   231 		return E_SYNTAX;
       
   232 	
       
   233 	/* Loop until the token is shifted or an error occurred */
       
   234 	for (;;) {
       
   235 		/* Fetch the current dfa and state */
       
   236 		register dfa *d = ps->p_stack.s_top->s_dfa;
       
   237 		register state *s = &d->d_state[ps->p_stack.s_top->s_state];
       
   238 		
       
   239 		D(printf(" DFA '%s', state %d:",
       
   240 			d->d_name, ps->p_stack.s_top->s_state));
       
   241 		
       
   242 		/* Check accelerator */
       
   243 		if (s->s_lower <= ilabel && ilabel < s->s_upper) {
       
   244 			register int x = s->s_accel[ilabel - s->s_lower];
       
   245 			if (x != -1) {
       
   246 				if (x & (1<<7)) {
       
   247 					/* Push non-terminal */
       
   248 					int nt = (x >> 8) + NT_OFFSET;
       
   249 					int arrow = x & ((1<<7)-1);
       
   250 					dfa *d1 = PyGrammar_FindDFA(
       
   251 						ps->p_grammar, nt);
       
   252 					if ((err = push(&ps->p_stack, nt, d1,
       
   253 						arrow, lineno, col_offset)) > 0) {
       
   254 						D(printf(" MemError: push\n"));
       
   255 						return err;
       
   256 					}
       
   257 					D(printf(" Push ...\n"));
       
   258 					continue;
       
   259 				}
       
   260 				
       
   261 				/* Shift the token */
       
   262 				if ((err = shift(&ps->p_stack, type, str,
       
   263 						x, lineno, col_offset)) > 0) {
       
   264 					D(printf(" MemError: shift.\n"));
       
   265 					return err;
       
   266 				}
       
   267 				D(printf(" Shift.\n"));
       
   268 				/* Pop while we are in an accept-only state */
       
   269 				while (s = &d->d_state
       
   270 						[ps->p_stack.s_top->s_state],
       
   271 					s->s_accept && s->s_narcs == 1) {
       
   272 					D(printf("  DFA '%s', state %d: "
       
   273 						 "Direct pop.\n",
       
   274 						 d->d_name,
       
   275 						 ps->p_stack.s_top->s_state));
       
   276 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
       
   277 					if (d->d_name[0] == 'i' &&
       
   278 					    strcmp(d->d_name,
       
   279 						   "import_stmt") == 0)
       
   280 						future_hack(ps);
       
   281 #endif
       
   282 					s_pop(&ps->p_stack);
       
   283 					if (s_empty(&ps->p_stack)) {
       
   284 						D(printf("  ACCEPT.\n"));
       
   285 						return E_DONE;
       
   286 					}
       
   287 					d = ps->p_stack.s_top->s_dfa;
       
   288 				}
       
   289 				return E_OK;
       
   290 			}
       
   291 		}
       
   292 		
       
   293 		if (s->s_accept) {
       
   294 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
       
   295 			if (d->d_name[0] == 'i' &&
       
   296 			    strcmp(d->d_name, "import_stmt") == 0)
       
   297 				future_hack(ps);
       
   298 #endif
       
   299 			/* Pop this dfa and try again */
       
   300 			s_pop(&ps->p_stack);
       
   301 			D(printf(" Pop ...\n"));
       
   302 			if (s_empty(&ps->p_stack)) {
       
   303 				D(printf(" Error: bottom of stack.\n"));
       
   304 				return E_SYNTAX;
       
   305 			}
       
   306 			continue;
       
   307 		}
       
   308 		
       
   309 		/* Stuck, report syntax error */
       
   310 		D(printf(" Error.\n"));
       
   311 		if (expected_ret) {
       
   312 			if (s->s_lower == s->s_upper - 1) {
       
   313 				/* Only one possible expected token */
       
   314 				*expected_ret = ps->p_grammar->
       
   315 				    g_ll.ll_label[s->s_lower].lb_type;
       
   316 			}
       
   317 			else 
       
   318 		        	*expected_ret = -1;
       
   319 		}
       
   320 		return E_SYNTAX;
       
   321 	}
       
   322 }
       
   323 
       
   324 
       
   325 #ifdef Py_DEBUG
       
   326 
       
   327 /* DEBUG OUTPUT */
       
   328 
       
   329 void
       
   330 dumptree(grammar *g, node *n)
       
   331 {
       
   332 	int i;
       
   333 	
       
   334 	if (n == NULL)
       
   335 		printf("NIL");
       
   336 	else {
       
   337 		label l;
       
   338 		l.lb_type = TYPE(n);
       
   339 		l.lb_str = STR(n);
       
   340 		printf("%s", PyGrammar_LabelRepr(&l));
       
   341 		if (ISNONTERMINAL(TYPE(n))) {
       
   342 			printf("(");
       
   343 			for (i = 0; i < NCH(n); i++) {
       
   344 				if (i > 0)
       
   345 					printf(",");
       
   346 				dumptree(g, CHILD(n, i));
       
   347 			}
       
   348 			printf(")");
       
   349 		}
       
   350 	}
       
   351 }
       
   352 
       
   353 void
       
   354 showtree(grammar *g, node *n)
       
   355 {
       
   356 	int i;
       
   357 	
       
   358 	if (n == NULL)
       
   359 		return;
       
   360 	if (ISNONTERMINAL(TYPE(n))) {
       
   361 		for (i = 0; i < NCH(n); i++)
       
   362 			showtree(g, CHILD(n, i));
       
   363 	}
       
   364 	else if (ISTERMINAL(TYPE(n))) {
       
   365 		printf("%s", _PyParser_TokenNames[TYPE(n)]);
       
   366 		if (TYPE(n) == NUMBER || TYPE(n) == NAME)
       
   367 			printf("(%s)", STR(n));
       
   368 		printf(" ");
       
   369 	}
       
   370 	else
       
   371 		printf("? ");
       
   372 }
       
   373 
       
   374 void
       
   375 printtree(parser_state *ps)
       
   376 {
       
   377 	if (Py_DebugFlag) {
       
   378 		printf("Parse tree:\n");
       
   379 		dumptree(ps->p_grammar, ps->p_tree);
       
   380 		printf("\n");
       
   381 		printf("Tokens:\n");
       
   382 		showtree(ps->p_grammar, ps->p_tree);
       
   383 		printf("\n");
       
   384 	}
       
   385 	printf("Listing:\n");
       
   386 	PyNode_ListTree(ps->p_tree);
       
   387 	printf("\n");
       
   388 }
       
   389 
       
   390 #endif /* Py_DEBUG */
       
   391 
       
   392 /*
       
   393 
       
   394 Description
       
   395 -----------
       
   396 
       
   397 The parser's interface is different than usual: the function addtoken()
       
   398 must be called for each token in the input.  This makes it possible to
       
   399 turn it into an incremental parsing system later.  The parsing system
       
   400 constructs a parse tree as it goes.
       
   401 
       
   402 A parsing rule is represented as a Deterministic Finite-state Automaton
       
   403 (DFA).  A node in a DFA represents a state of the parser; an arc represents
       
   404 a transition.  Transitions are either labeled with terminal symbols or
       
   405 with non-terminals.  When the parser decides to follow an arc labeled
       
   406 with a non-terminal, it is invoked recursively with the DFA representing
       
   407 the parsing rule for that as its initial state; when that DFA accepts,
       
   408 the parser that invoked it continues.  The parse tree constructed by the
       
   409 recursively called parser is inserted as a child in the current parse tree.
       
   410 
       
   411 The DFA's can be constructed automatically from a more conventional
       
   412 language description.  An extended LL(1) grammar (ELL(1)) is suitable.
       
   413 Certain restrictions make the parser's life easier: rules that can produce
       
   414 the empty string should be outlawed (there are other ways to put loops
       
   415 or optional parts in the language).  To avoid the need to construct
       
   416 FIRST sets, we can require that all but the last alternative of a rule
       
   417 (really: arc going out of a DFA's state) must begin with a terminal
       
   418 symbol.
       
   419 
       
   420 As an example, consider this grammar:
       
   421 
       
   422 expr:	term (OP term)*
       
   423 term:	CONSTANT | '(' expr ')'
       
   424 
       
   425 The DFA corresponding to the rule for expr is:
       
   426 
       
   427 ------->.---term-->.------->
       
   428 	^          |
       
   429 	|          |
       
   430 	\----OP----/
       
   431 
       
   432 The parse tree generated for the input a+b is:
       
   433 
       
   434 (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
       
   435 
       
   436 */