diff -r 228efacd68af -r 989c70555820 common/tools/raptor/XML/SAX/PurePerl/DTDDecls.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common/tools/raptor/XML/SAX/PurePerl/DTDDecls.pm Tue Aug 04 14:40:11 2009 +0100 @@ -0,0 +1,603 @@ +# $Id: DTDDecls.pm,v 1.7 2005/10/14 20:31:20 matt Exp $ + +package XML::SAX::PurePerl; + +use strict; +use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar); + +sub elementdecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + return 0 unless $data =~ /^move_along(9); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ELEMENT declaration", $reader); + + my $name = $self->Name($reader); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ELEMENT's name", $reader); + + $self->contentspec($reader, $name); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader); + + return 1; +} + +sub contentspec { + my ($self, $reader, $name) = @_; + + my $data = $reader->data(5); + + my $model; + if ($data =~ /^EMPTY/) { + $reader->move_along(5); + $model = 'EMPTY'; + } + elsif ($data =~ /^ANY/) { + $reader->move_along(3); + $model = 'ANY'; + } + else { + $model = $self->Mixed_or_children($reader); + } + + if ($model) { + # call SAX callback now. + $self->element_decl({Name => $name, Model => $model}); + return 1; + } + + $self->parser_error("contentspec not found in ELEMENT declaration", $reader); +} + +sub Mixed_or_children { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + $data =~ /^\(/ or return; # $self->parser_error("No opening bracket in Mixed or children", $reader); + + if ($data =~ /^\(\s*\#PCDATA/) { + $reader->match('('); + $self->skip_whitespace($reader); + $reader->move_along(7); + my $model = $self->Mixed($reader); + return $model; + } + + # not matched - must be Children + return $self->children($reader); +} + +# Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' ) +# | ( '(' S* PCDATA S* ')' ) +sub Mixed { + my ($self, $reader) = @_; + + # Mixed_or_children already matched '(' S* '#PCDATA' + + my $model = '(#PCDATA'; + + $self->skip_whitespace($reader); + + my %seen; + + while (1) { + last unless $reader->match('|'); + $self->skip_whitespace($reader); + + my $name = $self->Name($reader) || + $self->parser_error("No 'Name' after Mixed content '|'", $reader); + + if ($seen{$name}) { + $self->parser_error("Element '$name' has already appeared in this group", $reader); + } + $seen{$name}++; + + $model .= "|$name"; + + $self->skip_whitespace($reader); + } + + $reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader); + + $model .= ")"; + + if ($reader->match('*')) { + $model .= "*"; + } + + return $model; +} + +# [[47]] Children ::= ChoiceOrSeq Cardinality? +# [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality? +# ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')' +# [[49]] Choice ::= ( S* '|' S* Cp )+ +# [[50]] Seq ::= ( S* ',' S* Cp )+ +# // Children ::= (Choice | Seq) Cardinality? +# // Cp ::= ( QName | Choice | Seq) Cardinality? +# // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')' +# // Seq ::= '(' S* Cp ( S* ',' S* Cp )* S* ')' +# [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality ) +# | ( '(' S* PCDATA S* ')' ) +# Cardinality ::= '?' | '+' | '*' +# MixedCardinality ::= '*' +sub children { + my ($self, $reader) = @_; + + return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); +} + +sub ChoiceOrSeq { + my ($self, $reader) = @_; + + $reader->match('(') or $self->parser_error("choice/seq contains no opening bracket", $reader); + + my $model = '('; + + $self->skip_whitespace($reader); + + $model .= $self->Cp($reader); + + if (my $choice = $self->Choice($reader)) { + $model .= $choice; + } + else { + $model .= $self->Seq($reader); + } + + $self->skip_whitespace($reader); + + $reader->match(')') or $self->parser_error("choice/seq contains no closing bracket", $reader); + + $model .= ')'; + + return $model; +} + +sub Cardinality { + my ($self, $reader) = @_; + # cardinality is always optional + my $data = $reader->data; + if ($data =~ /^([\?\+\*])/) { + $reader->move_along(1); + return $1; + } + return ''; +} + +sub Cp { + my ($self, $reader) = @_; + + my $model; + my $name = eval + { + if (my $name = $self->Name($reader)) { + return $name . $self->Cardinality($reader); + } + }; + return $name if defined $name; + return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader); +} + +sub Choice { + my ($self, $reader) = @_; + + my $model = ''; + $self->skip_whitespace($reader); + + while ($reader->match('|')) { + $self->skip_whitespace($reader); + $model .= '|'; + $model .= $self->Cp($reader); + $self->skip_whitespace($reader); + } + + return $model; +} + +sub Seq { + my ($self, $reader) = @_; + + my $model = ''; + $self->skip_whitespace($reader); + + while ($reader->match(',')) { + $self->skip_whitespace($reader); + my $cp = $self->Cp($reader); + if ($cp) { + $model .= ','; + $model .= $cp; + } + $self->skip_whitespace($reader); + } + + return $model; +} + +sub AttlistDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^move_along(9); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after ATTLIST declaration", $reader); + my $name = $self->Name($reader); + + $self->AttDefList($reader, $name); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader); + + return 1; + } + + return 0; +} + +sub AttDefList { + my ($self, $reader, $name) = @_; + + 1 while $self->AttDef($reader, $name); +} + +sub AttDef { + my ($self, $reader, $el_name) = @_; + + $self->skip_whitespace($reader) || return 0; + my $att_name = $self->Name($reader) || return 0; + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after Name in attribute definition", $reader); + my $att_type = $self->AttType($reader); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after AttType in attribute definition", $reader); + my ($mode, $value) = $self->DefaultDecl($reader); + + # fire SAX event here! + $self->attribute_decl({ + eName => $el_name, + aName => $att_name, + Type => $att_type, + Mode => $mode, + Value => $value, + }); + return 1; +} + +sub AttType { + my ($self, $reader) = @_; + + return $self->StringType($reader) || + $self->TokenizedType($reader) || + $self->EnumeratedType($reader) || + $self->parser_error("Can't match AttType", $reader); +} + +sub StringType { + my ($self, $reader) = @_; + + my $data = $reader->data(5); + return unless $data =~ /^CDATA/; + $reader->move_along(5); + return 'CDATA'; +} + +sub TokenizedType { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + if ($data =~ /^(IDREFS?|ID|ENTITIES|ENTITY|NMTOKENS?)/) { + $reader->move_along(length($1)); + return $1; + } + return; +} + +sub EnumeratedType { + my ($self, $reader) = @_; + return $self->NotationType($reader) || $self->Enumeration($reader); +} + +sub NotationType { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + return unless $data =~ /^NOTATION/; + $reader->move_along(8); + + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after NOTATION", $reader); + $reader->match('(') or $self->parser_error("No opening bracket in notation section", $reader); + + $self->skip_whitespace($reader); + my $model = 'NOTATION ('; + my $name = $self->Name($reader) || + $self->parser_error("No name in notation section", $reader); + $model .= $name; + $self->skip_whitespace($reader); + $data = $reader->data; + while ($data =~ /^\|/) { + $reader->move_along(1); + $model .= '|'; + $self->skip_whitespace($reader); + my $name = $self->Name($reader) || + $self->parser_error("No name in notation section", $reader); + $model .= $name; + $self->skip_whitespace($reader); + $data = $reader->data; + } + $data =~ /^\)/ or $self->parser_error("No closing bracket in notation section", $reader); + $reader->move_along(1); + + $model .= ')'; + + return $model; +} + +sub Enumeration { + my ($self, $reader) = @_; + + return unless $reader->match('('); + + $self->skip_whitespace($reader); + my $model = '('; + my $nmtoken = $self->Nmtoken($reader) || + $self->parser_error("No Nmtoken in enumerated declaration", $reader); + $model .= $nmtoken; + $self->skip_whitespace($reader); + my $data = $reader->data; + while ($data =~ /^\|/) { + $model .= '|'; + $reader->move_along(1); + $self->skip_whitespace($reader); + my $nmtoken = $self->Nmtoken($reader) || + $self->parser_error("No Nmtoken in enumerated declaration", $reader); + $model .= $nmtoken; + $self->skip_whitespace($reader); + $data = $reader->data; + } + $data =~ /^\)/ or $self->parser_error("No closing bracket in enumerated declaration", $reader); + $reader->move_along(1); + + $model .= ')'; + + return $model; +} + +sub Nmtoken { + my ($self, $reader) = @_; + return $self->Name($reader); +} + +sub DefaultDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^(\#REQUIRED|\#IMPLIED)/) { + $reader->move_along(length($1)); + return $1; + } + my $model = ''; + if ($data =~ /^\#FIXED/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || $self->parser_error( + "no whitespace after FIXED specifier", $reader); + my $value = $self->AttValue($reader); + return "#FIXED", $value; + } + my $value = $self->AttValue($reader); + return undef, $value; +} + +sub EntityDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(8); + return 0 unless $data =~ /^move_along(8); + + $self->skip_whitespace($reader) || $self->parser_error( + "No whitespace after ENTITY declaration", $reader); + + $self->PEDecl($reader) || $self->GEDecl($reader); + + $self->skip_whitespace($reader); + + $reader->match('>') or $self->parser_error("No closing '>' in entity definition", $reader); + + return 1; +} + +sub GEDecl { + my ($self, $reader) = @_; + + my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader); + + # TODO: ExternalID calls lexhandler method. Wrong place for it. + my $value; + if ($value = $self->ExternalID($reader)) { + $value .= $self->NDataDecl($reader); + } + else { + $value = $self->EntityValue($reader); + } + + if ($self->{ParseOptions}{entities}{$name}) { + warn("entity $name already exists\n"); + } else { + $self->{ParseOptions}{entities}{$name} = 1; + $self->{ParseOptions}{expanded_entity}{$name} = $value; # ??? + } + # do callback? + return 1; +} + +sub PEDecl { + my ($self, $reader) = @_; + + return 0 unless $reader->match('%'); + + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader); + my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader); + my $value = $self->ExternalID($reader) || + $self->EntityValue($reader) || + $self->parser_error("PE is not a value or an external resource", $reader); + # do callback? + return 1; +} + +my $quotre = qr/[^%&\"]/; +my $aposre = qr/[^%&\']/; + +sub EntityValue { + my ($self, $reader) = @_; + + my $data = $reader->data; + my $quote = '"'; + my $re = $quotre; + if (!$data =~ /^"/) { + $data =~ /^'/ or $self->parser_error("Not a quote character", $reader); + $quote = "'"; + $re = $aposre; + } + $reader->move_along(1); + + my $value = ''; + + while (1) { + my $data = $reader->data; + + $self->parser_error("EOF found while reading entity value", $reader) + unless length($data); + + if ($data =~ /^($re+)/) { + my $match = $1; + $value .= $match; + $reader->move_along(length($match)); + } + elsif ($reader->match('&')) { + # if it's a char ref, expand now: + if ($reader->match('#')) { + my $char; + my $ref = ''; + if ($reader->match('x')) { + my $data = $reader->data; + while (1) { + $self->parser_error("EOF looking for reference end", $reader) + unless length($data); + if ($data !~ /^([0-9a-fA-F]*)/) { + last; + } + $ref .= $1; + $reader->move_along(length($1)); + if (length($1) == length($data)) { + $data = $reader->data; + } + else { + last; + } + } + $char = chr_ref(hex($ref)); + $ref = "x$ref"; + } + else { + my $data = $reader->data; + while (1) { + $self->parser_error("EOF looking for reference end", $reader) + unless length($data); + if ($data !~ /^([0-9]*)/) { + last; + } + $ref .= $1; + $reader->move_along(length($1)); + if (length($1) == length($data)) { + $data = $reader->data; + } + else { + last; + } + } + $char = chr($ref); + } + $reader->match(';') || + $self->parser_error("No semi-colon found after character reference", $reader); + if ($char !~ $SingleChar) { # match a single character + $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader); + } + $value .= $char; + } + else { + # entity refs in entities get expanded later, so don't parse now. + $value .= '&'; + } + } + elsif ($reader->match('%')) { + $value .= $self->PEReference($reader); + } + elsif ($reader->match($quote)) { + # end of attrib + last; + } + else { + $self->parser_error("Invalid character in attribute value: " . substr($reader->data, 0, 1), $reader); + } + } + + return $value; +} + +sub NDataDecl { + my ($self, $reader) = @_; + $self->skip_whitespace($reader) || return ''; + my $data = $reader->data(5); + return '' unless $data =~ /^NDATA/; + $reader->move_along(5); + $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader); + my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader); + return " NDATA $name"; +} + +sub NotationDecl { + my ($self, $reader) = @_; + + my $data = $reader->data(10); + return 0 unless $data =~ /^move_along(10); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after NOTATION declaration", $reader); + $data = $reader->data; + my $value = ''; + while(1) { + $self->parser_error("EOF found while looking for end of NotationDecl", $reader) + unless length($data); + + if ($data =~ /^([^>]*)>/) { + $value .= $1; + $reader->move_along(length($1) + 1); + $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" }); + last; + } + else { + $value .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } + return 1; +} + +1;