common/tools/raptor/XML/SAX/PurePerl/DocType.pm
changeset 923 5ccf9d5ab663
parent 922 996297fad800
parent 907 bab81256b297
child 924 a5ed0e6ca679
equal deleted inserted replaced
922:996297fad800 923:5ccf9d5ab663
     1 # $Id: DocType.pm,v 1.3 2003/07/30 13:39:22 matt Exp $
       
     2 
       
     3 package XML::SAX::PurePerl;
       
     4 
       
     5 use strict;
       
     6 use XML::SAX::PurePerl::Productions qw($PubidChar);
       
     7 
       
     8 sub doctypedecl {
       
     9     my ($self, $reader) = @_;
       
    10     
       
    11     my $data = $reader->data(9);
       
    12     if ($data =~ /^<!DOCTYPE/) {
       
    13         $reader->move_along(9);
       
    14         $self->skip_whitespace($reader) ||
       
    15             $self->parser_error("No whitespace after doctype declaration", $reader);
       
    16         
       
    17         my $root_name = $self->Name($reader) ||
       
    18             $self->parser_error("Doctype declaration has no root element name", $reader);
       
    19         
       
    20         if ($self->skip_whitespace($reader)) {
       
    21             # might be externalid...
       
    22             my %dtd = $self->ExternalID($reader);
       
    23             # TODO: Call SAX event
       
    24         }
       
    25         
       
    26         $self->skip_whitespace($reader);
       
    27         
       
    28         $self->InternalSubset($reader);
       
    29         
       
    30         $reader->match('>') or $self->parser_error("Doctype not closed", $reader);
       
    31         
       
    32         return 1;
       
    33     }
       
    34     
       
    35     return 0;
       
    36 }
       
    37 
       
    38 sub ExternalID {
       
    39     my ($self, $reader) = @_;
       
    40     
       
    41     my $data = $reader->data(6);
       
    42     
       
    43     if ($data =~ /^SYSTEM/) {
       
    44         $reader->move_along(6);
       
    45         $self->skip_whitespace($reader) ||
       
    46             $self->parser_error("No whitespace after SYSTEM identifier", $reader);
       
    47         return (SYSTEM => $self->SystemLiteral($reader));
       
    48     }
       
    49     elsif ($data =~ /^PUBLIC/) {
       
    50         $reader->move_along(6);
       
    51         $self->skip_whitespace($reader) ||
       
    52             $self->parser_error("No whitespace after PUBLIC identifier", $reader);
       
    53         
       
    54         my $quote = $self->quote($reader) || 
       
    55             $self->parser_error("Not a quote character in PUBLIC identifier", $reader);
       
    56         
       
    57         my $data = $reader->data;
       
    58         my $pubid = '';
       
    59         while(1) {
       
    60             $self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader)
       
    61                 unless length($data);
       
    62             
       
    63             if ($data =~ /^([^$quote]*)$quote/) {
       
    64                 $pubid .= $1;
       
    65                 $reader->move_along(length($1) + 1);
       
    66                 last;
       
    67             }
       
    68             else {
       
    69                 $pubid .= $data;
       
    70                 $reader->move_along(length($data));
       
    71                 $data = $reader->data;
       
    72             }
       
    73         }
       
    74         
       
    75         if ($pubid !~ /^($PubidChar)+$/) {
       
    76             $self->parser_error("Invalid characters in PUBLIC identifier", $reader);
       
    77         }
       
    78         
       
    79         $self->skip_whitespace($reader) ||
       
    80             $self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader);
       
    81         
       
    82         return (PUBLIC => $pubid, 
       
    83                 SYSTEM => $self->SystemLiteral($reader));
       
    84     }
       
    85     else {
       
    86         return;
       
    87     }
       
    88     
       
    89     return 1;
       
    90 }
       
    91 
       
    92 sub SystemLiteral {
       
    93     my ($self, $reader) = @_;
       
    94     
       
    95     my $quote = $self->quote($reader);
       
    96     
       
    97     my $data = $reader->data;
       
    98     my $systemid = '';
       
    99     while (1) {
       
   100         $self->parser_error("EOF found while looking for end of Sytem Literal", $reader)
       
   101             unless length($data);
       
   102         if ($data =~ /^([^$quote]*)$quote/) {
       
   103             $systemid .= $1;
       
   104             $reader->move_along(length($1) + 1);
       
   105             return $systemid;
       
   106         }
       
   107         else {
       
   108             $systemid .= $data;
       
   109             $reader->move_along(length($data));
       
   110             $data = $reader->data;
       
   111         }
       
   112     }
       
   113 }
       
   114 
       
   115 sub InternalSubset {
       
   116     my ($self, $reader) = @_;
       
   117     
       
   118     return 0 unless $reader->match('[');
       
   119     
       
   120     1 while $self->IntSubsetDecl($reader);
       
   121     
       
   122     $reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader);
       
   123     $self->skip_whitespace($reader);
       
   124     return 1;
       
   125 }
       
   126 
       
   127 sub IntSubsetDecl {
       
   128     my ($self, $reader) = @_;
       
   129 
       
   130     return $self->DeclSep($reader) || $self->markupdecl($reader);
       
   131 }
       
   132 
       
   133 sub DeclSep {
       
   134     my ($self, $reader) = @_;
       
   135 
       
   136     if ($self->skip_whitespace($reader)) {
       
   137         return 1;
       
   138     }
       
   139 
       
   140     if ($self->PEReference($reader)) {
       
   141         return 1;
       
   142     }
       
   143     
       
   144 #    if ($self->ParsedExtSubset($reader)) {
       
   145 #        return 1;
       
   146 #    }
       
   147     
       
   148     return 0;
       
   149 }
       
   150 
       
   151 sub PEReference {
       
   152     my ($self, $reader) = @_;
       
   153     
       
   154     return 0 unless $reader->match('%');
       
   155     
       
   156     my $peref = $self->Name($reader) ||
       
   157         $self->parser_error("PEReference did not find a Name", $reader);
       
   158     # TODO - load/parse the peref
       
   159     
       
   160     $reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader);
       
   161     return 1;
       
   162 }
       
   163 
       
   164 sub markupdecl {
       
   165     my ($self, $reader) = @_;
       
   166     
       
   167     if ($self->elementdecl($reader) ||
       
   168         $self->AttlistDecl($reader) ||
       
   169         $self->EntityDecl($reader) ||
       
   170         $self->NotationDecl($reader) ||
       
   171         $self->PI($reader) ||
       
   172         $self->Comment($reader))
       
   173     {
       
   174         return 1;
       
   175     }
       
   176     
       
   177     return 0;
       
   178 }
       
   179 
       
   180 1;