diff -r 228efacd68af -r 989c70555820 common/tools/raptor/XML/SAX/PurePerl/DocType.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common/tools/raptor/XML/SAX/PurePerl/DocType.pm Tue Aug 04 14:40:11 2009 +0100 @@ -0,0 +1,180 @@ +# $Id: DocType.pm,v 1.3 2003/07/30 13:39:22 matt Exp $ + +package XML::SAX::PurePerl; + +use strict; +use XML::SAX::PurePerl::Productions qw($PubidChar); + +sub doctypedecl { + my ($self, $reader) = @_; + + my $data = $reader->data(9); + if ($data =~ /^move_along(9); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after doctype declaration", $reader); + + my $root_name = $self->Name($reader) || + $self->parser_error("Doctype declaration has no root element name", $reader); + + if ($self->skip_whitespace($reader)) { + # might be externalid... + my %dtd = $self->ExternalID($reader); + # TODO: Call SAX event + } + + $self->skip_whitespace($reader); + + $self->InternalSubset($reader); + + $reader->match('>') or $self->parser_error("Doctype not closed", $reader); + + return 1; + } + + return 0; +} + +sub ExternalID { + my ($self, $reader) = @_; + + my $data = $reader->data(6); + + if ($data =~ /^SYSTEM/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after SYSTEM identifier", $reader); + return (SYSTEM => $self->SystemLiteral($reader)); + } + elsif ($data =~ /^PUBLIC/) { + $reader->move_along(6); + $self->skip_whitespace($reader) || + $self->parser_error("No whitespace after PUBLIC identifier", $reader); + + my $quote = $self->quote($reader) || + $self->parser_error("Not a quote character in PUBLIC identifier", $reader); + + my $data = $reader->data; + my $pubid = ''; + while(1) { + $self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader) + unless length($data); + + if ($data =~ /^([^$quote]*)$quote/) { + $pubid .= $1; + $reader->move_along(length($1) + 1); + last; + } + else { + $pubid .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } + + if ($pubid !~ /^($PubidChar)+$/) { + $self->parser_error("Invalid characters in PUBLIC identifier", $reader); + } + + $self->skip_whitespace($reader) || + $self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader); + + return (PUBLIC => $pubid, + SYSTEM => $self->SystemLiteral($reader)); + } + else { + return; + } + + return 1; +} + +sub SystemLiteral { + my ($self, $reader) = @_; + + my $quote = $self->quote($reader); + + my $data = $reader->data; + my $systemid = ''; + while (1) { + $self->parser_error("EOF found while looking for end of Sytem Literal", $reader) + unless length($data); + if ($data =~ /^([^$quote]*)$quote/) { + $systemid .= $1; + $reader->move_along(length($1) + 1); + return $systemid; + } + else { + $systemid .= $data; + $reader->move_along(length($data)); + $data = $reader->data; + } + } +} + +sub InternalSubset { + my ($self, $reader) = @_; + + return 0 unless $reader->match('['); + + 1 while $self->IntSubsetDecl($reader); + + $reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader); + $self->skip_whitespace($reader); + return 1; +} + +sub IntSubsetDecl { + my ($self, $reader) = @_; + + return $self->DeclSep($reader) || $self->markupdecl($reader); +} + +sub DeclSep { + my ($self, $reader) = @_; + + if ($self->skip_whitespace($reader)) { + return 1; + } + + if ($self->PEReference($reader)) { + return 1; + } + +# if ($self->ParsedExtSubset($reader)) { +# return 1; +# } + + return 0; +} + +sub PEReference { + my ($self, $reader) = @_; + + return 0 unless $reader->match('%'); + + my $peref = $self->Name($reader) || + $self->parser_error("PEReference did not find a Name", $reader); + # TODO - load/parse the peref + + $reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader); + return 1; +} + +sub markupdecl { + my ($self, $reader) = @_; + + if ($self->elementdecl($reader) || + $self->AttlistDecl($reader) || + $self->EntityDecl($reader) || + $self->NotationDecl($reader) || + $self->PI($reader) || + $self->Comment($reader)) + { + return 1; + } + + return 0; +} + +1;