diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Lalr.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Lalr.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,939 @@ +# +# Module Parse::Yapp::Lalr +# +# (c) Copyright 1998-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# +package Parse::Yapp::Lalr; +@ISA=qw( Parse::Yapp::Grammar ); + +require 5.004; + +use Parse::Yapp::Grammar; + +=for nobody + +Parse::Yapp::Compile Object Structure: +-------------------------------------- +{ + GRAMMAR => Parse::Yapp::Grammar, + STATES => [ { CORE => [ items... ], + ACTIONS => { term => action } + GOTOS => { nterm => stateno } + }... ] + CONFLICTS=>{ SOLVED => { stateno => [ ruleno, token, solved ] }, + FORCED => { TOTAL => [ nbsr, nbrr ], + DETAIL => { stateno => { TOTAL => [ nbsr, nbrr ] } + LIST => [ ruleno, token ] + } + } + } +} + +'items' are of form: [ ruleno, dotpos ] +'term' in ACTIONS is '' means default action +'action' may be: + undef: explicit error (nonassociativity) + 0 : accept + >0 : shift and go to state 'action' + <0 : reduce using rule -'action' +'solved' may have values of: + 'shift' if solved as Shift + 'reduce' if solved as Reduce + 'error' if solved by discarding both Shift and Reduce (nonassoc) + +SOLVED is a set of states containing Solved conflicts +FORCED are forced conflict resolutions + +nbsr and nbrr are number of shift/reduce and reduce/reduce conflicts + +TOTAL is the total number of SR/RR conflicts for the parser + +DETAIL is the detail of conflicts for each state +TOTAL is the total number of SR/RR conflicts for a state +LIST is the list of discarded reductions (for display purpose only) + + +=cut + +use strict; + +use Carp; + +############### +# Constructor # +############### +sub new { + my($class)=shift; + + ref($class) + and $class=ref($class); + + my($self)=$class->SUPER::new(@_); + $self->_Compile(); + bless($self,$class); +} +########### +# Methods # +########### + +########################### +# Method To View Warnings # +########################### +sub Warnings { + my($self)=shift; + my($text); + my($nbsr,$nbrr)=@{$$self{CONFLICTS}{FORCED}{TOTAL}}; + + $text=$self->SUPER::Warnings(); + + $nbsr != $$self{GRAMMAR}{EXPECT} + and $text.="$nbsr shift/reduce conflict".($nbsr > 1 ? "s" : ""); + + $nbrr + and do { + $nbsr + and $text.=" and "; + $text.="$nbrr reduce/reduce conflict".($nbrr > 1 ? "s" : ""); + }; + + ( $nbsr != $$self{GRAMMAR}{EXPECT} + or $nbrr) + and $text.="\n"; + + $text; +} +############################# +# Method To View DFA States # +############################# +sub ShowDfa { + my($self)=shift; + my($text); + my($grammar,$states)=($$self{GRAMMAR}, $$self{STATES}); + + for my $stateno (0..$#$states) { + my(@shifts,@reduces,@errors,$default); + + $text.="State $stateno:\n\n"; + + #Dump Kernel Items + for (sort { $$a[0] <=> $$b[0] + or $$a[1] <=> $$b[1] } @{$$states[$stateno]{'CORE'}}) { + my($ruleno,$pos)=@$_; + my($lhs,$rhs)=@{$$grammar{RULES}[$ruleno]}[0,1]; + my(@rhscopy)=@$rhs; + + $ruleno + or $rhscopy[-1] = '$end'; + + splice(@rhscopy,$pos,0,'.'); + $text.= "\t$lhs -> ".join(' ',@rhscopy)."\t(Rule $ruleno)\n"; + } + + #Prepare Actions + for (keys(%{$$states[$stateno]{ACTIONS}})) { + my($term,$action)=($_,$$states[$stateno]{ACTIONS}{$_}); + + $term eq chr(0) + and $term = '$end'; + + not defined($action) + and do { + push(@errors,$term); + next; + }; + + $action > 0 + and do { + push(@shifts,[ $term, $action ]); + next; + }; + + $action = -$action; + + $term + or do { + $default= [ '$default', $action ]; + next; + }; + + push(@reduces,[ $term, $action ]); + } + + #Dump shifts + @shifts + and do { + $text.="\n"; + for (sort { $$a[0] cmp $$b[0] } @shifts) { + my($term,$shift)=@$_; + + $text.="\t$term\tshift, and go to state $shift\n"; + } + }; + + #Dump errors + @errors + and do { + $text.="\n"; + for my $term (sort { $a cmp $b } @errors) { + $text.="\t$term\terror (nonassociative)\n"; + } + }; + + #Prepare reduces + exists($$self{CONFLICTS}{FORCED}{DETAIL}{$stateno}) + and push(@reduces,@{$$self{CONFLICTS}{FORCED}{DETAIL}{$stateno}{LIST}}); + + @reduces=sort { $$a[0] cmp $$b[0] or $$a[1] <=> $$b[1] } @reduces; + + defined($default) + and push(@reduces,$default); + + #Dump reduces + @reduces + and do { + $text.="\n"; + for (@reduces) { + my($term,$ruleno)=@$_; + my($discard); + + $ruleno < 0 + and do { + ++$discard; + $ruleno = -$ruleno; + }; + + $text.= "\t$term\t".($discard ? "[" : ""); + if($ruleno) { + $text.= "reduce using rule $ruleno ". + "($$grammar{RULES}[$ruleno][0])"; + } + else { + $text.='accept'; + } + $text.=($discard ? "]" : "")."\n"; + } + }; + + #Dump gotos + exists($$states[$stateno]{GOTOS}) + and do { + $text.= "\n"; + for (keys(%{$$states[$stateno]{GOTOS}})) { + $text.= "\t$_\tgo to state $$states[$stateno]{GOTOS}{$_}\n"; + } + }; + + $text.="\n"; + } + $text; +} + +###################################### +# Method to get summary about parser # +###################################### +sub Summary { + my($self)=shift; + my($text); + + $text=$self->SUPER::Summary(); + $text.="Number of states : ". + scalar(@{$$self{STATES}})."\n"; + $text; +} + +####################################### +# Method To Get Infos about conflicts # +####################################### +sub Conflicts { + my($self)=shift; + my($states)=$$self{STATES}; + my($conflicts)=$$self{CONFLICTS}; + my($text); + + for my $stateno ( sort { $a <=> $b } keys(%{$$conflicts{SOLVED}})) { + + for (@{$$conflicts{SOLVED}{$stateno}}) { + my($ruleno,$token,$how)=@$_; + + $token eq chr(0) + and $token = '$end'; + + $text.="Conflict in state $stateno between rule ". + "$ruleno and token $token resolved as $how.\n"; + } + }; + + for my $stateno ( sort { $a <=> $b } keys(%{$$conflicts{FORCED}{DETAIL}})) { + my($nbsr,$nbrr)=@{$$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}}; + + $text.="State $stateno contains "; + + $nbsr + and $text.="$nbsr shift/reduce conflict". + ($nbsr > 1 ? "s" : ""); + + $nbrr + and do { + $nbsr + and $text.=" and "; + + $text.="$nbrr reduce/reduce conflict". + ($nbrr > 1 ? "s" : ""); + }; + $text.="\n"; + }; + + $text; +} + +################################# +# Method to dump parsing tables # +################################# +sub DfaTable { + my($self)=shift; + my($states)=$$self{STATES}; + my($stateno); + my($text); + + $text="[\n\t{"; + + $text.=join("\n\t},\n\t{", + map { + my($state)=$_; + my($text); + + $text="#State ".$stateno++."\n\t\t"; + + ( not exists($$state{ACTIONS}{''}) + or keys(%{$$state{ACTIONS}}) > 1) + and do { + + $text.="ACTIONS => {\n\t\t\t"; + + $text.=join(",\n\t\t\t", + map { + my($term,$action)=($_,$$state{ACTIONS}{$_}); + my($text); + + if(substr($term,0,1) eq "'") { + $term=~s/([\@\$\"])/\\$1/g; + $term=~s/^'|'$/"/g; + } + else { + $term= $term eq chr(0) + ? "''" + : "'$term'"; + } + + if(defined($action)) { + $action=int($action); + } + else { + $action='undef'; + } + + "$term => $action"; + + } grep { $_ } keys(%{$$state{ACTIONS}})); + + $text.="\n\t\t}"; + }; + + exists($$state{ACTIONS}{''}) + and do { + keys(%{$$state{ACTIONS}}) > 1 + and $text.=",\n\t\t"; + + $text.="DEFAULT => $$state{ACTIONS}{''}"; + }; + + exists($$state{GOTOS}) + and do { + $text.=",\n\t\tGOTOS => {\n\t\t\t"; + $text.=join(",\n\t\t\t", + map { + my($nterm,$stateno)=($_,$$state{GOTOS}{$_}); + my($text); + + "'$nterm' => $stateno"; + + } keys(%{$$state{GOTOS}})); + $text.="\n\t\t}"; + }; + + $text; + + }@$states); + + $text.="\n\t}\n]"; + + $text; + +} + + +#################################### +# Method to build Dfa from Grammar # +#################################### +sub _Compile { + my($self)=shift; + my($grammar,$states); + + $grammar=$self->{GRAMMAR}; + + $states = _LR0($grammar); + + $self->{CONFLICTS} = _LALR($grammar,$states); + + $self->{STATES}=$states; +} + +######################### +# LR0 States Generation # +######################### +# +########################### +# General digraph routine # +########################### +sub _Digraph { + my($rel,$F)=@_; + my(%N,@S); + my($infinity)=(~(1<<31)); + my($Traverse); + + $Traverse = sub { + my($x,$d)=@_; + my($y); + + push(@S,$x); + $N{$x}=$d; + + exists($$rel{$x}) + and do { + for $y (keys(%{$$rel{$x}})) { + exists($N{$y}) + or &$Traverse($y,$d+1); + + $N{$y} < $N{$x} + and $N{$x} = $N{$y}; + + $$F{$x}|=$$F{$y}; + } + }; + + $N{$x} == $d + and do { + for(;;) { + $y=pop(@S); + $N{$y}=$infinity; + $y eq $x + and last; + $$F{$y}=$$F{$x}; + } + }; + }; + + for (keys(%$rel)) { + exists($N{$_}) + or &$Traverse($_,1); + } +} +####################### +# Generate LR0 states # +####################### +=for nobody +Formula used for closures: + + CLOSE(A) = DCLOSE(A) u U (CLOSE(B) | A close B) + +where: + + DCLOSE(A) = { [ A -> alpha ] in P } + + A close B iff [ A -> B gamma ] in P + +=cut +sub _SetClosures { + my($grammar)=@_; + my($rel,$closures); + + for my $symbol (keys(%{$$grammar{NTERM}})) { + $closures->{$symbol}=pack('b'.@{$$grammar{RULES}}); + + for my $ruleno (@{$$grammar{NTERM}{$symbol}}) { + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + vec($closures->{$symbol},$ruleno,1)=1; + + @$rhs > 0 + and exists($$grammar{NTERM}{$$rhs[0]}) + and ++$rel->{$symbol}{$$rhs[0]}; + } + } + _Digraph($rel,$closures); + + $closures +} + +sub _Closures { + my($grammar,$core,$closures)=@_; + my($ruleset)=pack('b'.@{$$grammar{RULES}}); + + for (@$core) { + my($ruleno,$pos)=@$_; + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + $pos < @$rhs + and exists($closures->{$$rhs[$pos]}) + and $ruleset|=$closures->{$$rhs[$pos]}; + } + [ @$core, map { [ $_, 0 ] } + grep { vec($ruleset,$_,1) } + 0..$#{$$grammar{RULES}} ]; +} + +sub _Transitions { + my($grammar,$cores,$closures,$states,$stateno)=@_; + my($core)=$$states[$stateno]{'CORE'}; + my(%transitions); + + for (@{_Closures($grammar,$core,$closures)}) { + my($ruleno,$pos)=@$_; + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + $pos == @$rhs + and do { + push(@{$$states[$stateno]{ACTIONS}{''}},$ruleno); + next; + }; + push(@{$transitions{$$rhs[$pos]}},[ $ruleno, $pos+1 ]); + } + + for (keys(%transitions)) { + my($symbol,$core)=($_,$transitions{$_}); + my($corekey)=join(',',map { join('.',@$_) } + sort { $$a[0] <=> $$b[0] + or $$a[1] <=> $$b[1] } + @$core); + my($tostateno); + + exists($cores->{$corekey}) + or do { + push(@$states,{ 'CORE' => $core }); + $cores->{$corekey}=$#$states; + }; + + $tostateno=$cores->{$corekey}; + push(@{$$states[$tostateno]{FROM}},$stateno); + + exists($$grammar{TERM}{$_}) + and do { + $$states[$stateno]{ACTIONS}{$_} = [ $tostateno ]; + next; + }; + $$states[$stateno]{GOTOS}{$_} = $tostateno; + } +} + +sub _LR0 { + my($grammar)=@_; + my($states) = []; + my($stateno); + my($closures); #$closures={ nterm => ruleset,... } + my($cores)={}; # { "itemlist" => stateno, ... } + # where "itemlist" has the form: + # "ruleno.pos,ruleno.pos" ordered by ruleno,pos + + $closures = _SetClosures($grammar); + push(@$states,{ 'CORE' => [ [ 0, 0 ] ] }); + for($stateno=0;$stateno<@$states;++$stateno) { + _Transitions($grammar,$cores,$closures,$states,$stateno); + } + + $states +} + +######################################################### +# Add Lookahead tokens where needed to make LALR states # +######################################################### +=for nobody + Compute First sets for non-terminal using the following formula: + + FIRST(A) = { a in T u { epsilon } | A l a } + u + U { FIRST(B) | B in V and A l B } + + where: + + A l x iff [ A -> X1 X2 .. Xn x alpha ] in P and Xi =>* epsilon, 1 <= i <= n +=cut +sub _SetFirst { + my($grammar,$termlst,$terminx)=@_; + my($rel,$first)=( {}, {} ); + + for my $symbol (keys(%{$$grammar{NTERM}})) { + $first->{$symbol}=pack('b'.@$termlst); + + RULE: + for my $ruleno (@{$$grammar{NTERM}{$symbol}}) { + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + for (@$rhs) { + exists($terminx->{$_}) + and do { + vec($first->{$symbol},$terminx->{$_},1)=1; + next RULE; + }; + ++$rel->{$symbol}{$_}; + exists($$grammar{NULLABLE}{$_}) + or next RULE; + } + vec($first->{$symbol},0,1)=1; + } + } + _Digraph($rel,$first); + + $first +} + +sub _Preds { + my($states,$stateno,$len)=@_; + my($queue, $preds); + + $len + or return [ $stateno ]; + + $queue=[ [ $stateno, $len ] ]; + while(@$queue) { + my($pred) = shift(@$queue); + my($stateno, $len) = @$pred; + + $len == 1 + and do { + push(@$preds,@{$states->[$stateno]{FROM}}); + next; + }; + + push(@$queue, map { [ $_, $len - 1 ] } + @{$states->[$stateno]{FROM}}); + } + + # Pass @$preds through a hash to ensure unicity + [ keys( %{ +{ map { ($_,1) } @$preds } } ) ]; +} + +sub _FirstSfx { + my($grammar,$firstset,$termlst,$terminx,$ruleno,$pos,$key)=@_; + my($first)=pack('b'.@$termlst); + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + for (;$pos < @$rhs;++$pos) { + exists($terminx->{$$rhs[$pos]}) + and do { + vec($first,$terminx->{$$rhs[$pos]},1)=1; + return($first); + }; + $first|=$firstset->{$$rhs[$pos]}; + + vec($first,0,1) + and vec($first,0,1)=0; + + exists($$grammar{NULLABLE}{$$rhs[$pos]}) + or return($first); + + } + vec($first,0,1)=1; + $first; +} + +=for noboby + Compute Follow sets using following formula: + + FOLLOW(p,A) = READ(p,A) + u + U { FOLLOW(q,B) | (p,A) include (q,B) + + where: + + READ(p,A) = U { FIRST(beta) | [ A -> alpha A . beta ] in KERNEL(GOTO(p,A)) + } - { epsilon } + + (p,a) include (q,B) iff [ B -> alpha A . beta ] in KERNEL(GOTO(p,A), + epsilon in FIRST(beta) and + q in PRED(p,alpha) +=cut +sub _ComputeFollows { + my($grammar,$states,$termlst)=@_; + my($firstset,$terminx); + my($inconsistent, $rel, $follows, $sfx)= ( {}, {}, {}, {} ); + + %$terminx= map { ($termlst->[$_],$_) } 0..$#$termlst; + + $firstset=_SetFirst($grammar,$termlst,$terminx); + + for my $stateno (0..$#$states) { + my($state)=$$states[$stateno]; + + exists($$state{ACTIONS}{''}) + and ( @{$$state{ACTIONS}{''}} > 1 + or keys(%{$$state{ACTIONS}}) > 1 ) + and do { + ++$inconsistent->{$stateno}; + + for my $ruleno (@{$$state{ACTIONS}{''}}) { + my($lhs,$rhs)=@{$$grammar{RULES}[$ruleno]}[0,1]; + + for my $predno (@{_Preds($states,$stateno,scalar(@$rhs))}) { + ++$rel->{"$stateno.$ruleno"}{"$predno.$lhs"}; + } + } + }; + + exists($$state{GOTOS}) + or next; + + for my $symbol (keys(%{$$state{GOTOS}})) { + my($tostate)=$$states[$$state{GOTOS}{$symbol}]; + my($goto)="$stateno.$symbol"; + + $follows->{$goto}=pack('b'.@$termlst); + + for my $item (@{$$tostate{'CORE'}}) { + my($ruleno,$pos)=@$item; + my($key)="$ruleno.$pos"; + + exists($sfx->{$key}) + or $sfx->{$key} = _FirstSfx($grammar,$firstset, + $termlst,$terminx, + $ruleno,$pos,$key); + + $follows->{$goto}|=$sfx->{$key}; + + vec($follows->{$goto},0,1) + and do { + my($lhs)=$$grammar{RULES}[$ruleno][0]; + + vec($follows->{$goto},0,1)=0; + + for my $predno (@{_Preds($states,$stateno,$pos-1)}) { + ++$rel->{$goto}{"$predno.$lhs"}; + } + }; + } + } + } + _Digraph($rel,$follows); + + ($follows,$inconsistent) +} + +sub _ComputeLA { + my($grammar,$states)=@_; + my($termlst)= [ '',keys(%{$$grammar{TERM}}) ]; + + my($follows,$inconsistent) = _ComputeFollows($grammar,$states,$termlst); + + for my $stateno ( keys(%$inconsistent ) ) { + my($state)=$$states[$stateno]; + my($conflict); + + #NB the sort is VERY important for conflicts resolution order + for my $ruleno (sort { $a <=> $b } + @{$$state{ACTIONS}{''}}) { + for my $term ( map { $termlst->[$_] } grep { + vec($follows->{"$stateno.$ruleno"},$_,1) } + 0..$#$termlst) { + exists($$state{ACTIONS}{$term}) + and ++$conflict; + push(@{$$state{ACTIONS}{$term}},-$ruleno); + } + } + delete($$state{ACTIONS}{''}); + $conflict + or delete($inconsistent->{$stateno}); + } + + $inconsistent +} + +############################# +# Solve remaining conflicts # +############################# + +sub _SolveConflicts { + my($grammar,$states,$inconsistent)=@_; + my(%rulesprec,$RulePrec); + my($conflicts)={ SOLVED => {}, + FORCED => { TOTAL => [ 0, 0 ], + DETAIL => {} + } + }; + + $RulePrec = sub { + my($ruleno)=@_; + my($rhs,$rprec)=@{$$grammar{RULES}[$ruleno]}[1,2]; + my($lastterm); + + defined($rprec) + and return($rprec); + + exists($rulesprec{$ruleno}) + and return($rulesprec{$ruleno}); + + $lastterm=(grep { exists($$grammar{TERM}{$_}) } @$rhs)[-1]; + + defined($lastterm) + and ref($$grammar{TERM}{$lastterm}) + and do { + $rulesprec{$ruleno}=$$grammar{TERM}{$lastterm}[1]; + return($rulesprec{$ruleno}); + }; + + undef; + }; + + for my $stateno (keys(%$inconsistent)) { + my($state)=$$states[$stateno]; + my($actions)=$$state{ACTIONS}; + my($nbsr,$nbrr); + + for my $term ( keys(%$actions) ) { + my($act)=$$actions{$term}; + + @$act > 1 + or next; + + $$act[0] > 0 + and ref($$grammar{TERM}{$term}) + and do { + my($assoc,$tprec)=@{$$grammar{TERM}{$term}}; + my($k,$error); + + for ($k=1;$k<@$act;++$k) { + my($ruleno)=-$$act[$k]; + my($rprec)=&$RulePrec($ruleno); + + defined($rprec) + or next; + + ( $tprec > $rprec + or ( $tprec == $rprec and $assoc eq 'RIGHT')) + and do { + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'shift' ]); + splice(@$act,$k--,1); + next; + }; + ( $tprec < $rprec + or $assoc eq 'LEFT') + and do { + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'reduce' ]); + $$act[0] > 0 + and do { + splice(@$act,0,1); + --$k; + }; + next; + }; + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'error' ]); + splice(@$act,$k--,1); + $$act[0] > 0 + and do { + splice(@$act,0,1); + ++$error; + --$k; + }; + } + $error + and unshift(@$act,undef); + }; + + @$act > 1 + and do { + $nbrr += @$act - 2; + ($$act[0] > 0 ? $nbsr : $nbrr) += 1; + push(@{$$conflicts{FORCED}{DETAIL}{$stateno}{LIST}}, + map { [ $term, $_ ] } splice(@$act,1)); + }; + } + + $nbsr + and do { + $$conflicts{FORCED}{TOTAL}[0]+=$nbsr; + $$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}[0]+=$nbsr; + }; + + $nbrr + and do { + $$conflicts{FORCED}{TOTAL}[1]+=$nbrr; + $$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}[1]+=$nbrr; + }; + + } + + $conflicts +} + +############################### +# Make default reduce actions # +############################### +sub _SetDefaults { + my($states)=@_; + + for my $state (@$states) { + my($actions)=$$state{ACTIONS}; + my(%reduces,$default,$nodefault); + + exists($$actions{''}) + and do { + $$actions{''}[0] = -$$actions{''}[0]; + ++$nodefault; + }; + + #shift error token => no default + exists($$actions{error}) + and $$actions{error}[0] > 0 + and ++$nodefault; + + for my $term (keys(%$actions)) { + + $$actions{$term}=$$actions{$term}[0]; + + ( not defined($$actions{$term}) + or $$actions{$term} > 0 + or $nodefault) + and next; + + push(@{$reduces{$$actions{$term}}},$term); + } + + keys(%reduces) > 0 + or next; + + $default=( map { $$_[0] } + sort { $$b[1] <=> $$a[1] or $$b[0] <=> $$a[0] } + map { [ $_, scalar(@{$reduces{$_}}) ] } + keys(%reduces))[0]; + + delete(@$actions{ @{$reduces{$default}} }); + $$state{ACTIONS}{''}=$default; + } +} + +sub _LALR { + my($grammar,$states) = @_; + my($conflicts,$inconsistent); + + $inconsistent = _ComputeLA($grammar,$states); + + $conflicts = _SolveConflicts($grammar,$states,$inconsistent); + _SetDefaults($states); + + $conflicts +} + + +1;