177
|
1 |
# $Id: DocType.pm,v 1.3 2003/07/30 13:39:22 matt Exp $
|
|
2 |
|
|
3 |
package XML::SAX::PurePerl;
|
|
4 |
|
|
5 |
use strict;
|
|
6 |
use XML::SAX::PurePerl::Productions qw($PubidChar);
|
|
7 |
|
|
8 |
sub doctypedecl {
|
|
9 |
my ($self, $reader) = @_;
|
|
10 |
|
|
11 |
my $data = $reader->data(9);
|
|
12 |
if ($data =~ /^<!DOCTYPE/) {
|
|
13 |
$reader->move_along(9);
|
|
14 |
$self->skip_whitespace($reader) ||
|
|
15 |
$self->parser_error("No whitespace after doctype declaration", $reader);
|
|
16 |
|
|
17 |
my $root_name = $self->Name($reader) ||
|
|
18 |
$self->parser_error("Doctype declaration has no root element name", $reader);
|
|
19 |
|
|
20 |
if ($self->skip_whitespace($reader)) {
|
|
21 |
# might be externalid...
|
|
22 |
my %dtd = $self->ExternalID($reader);
|
|
23 |
# TODO: Call SAX event
|
|
24 |
}
|
|
25 |
|
|
26 |
$self->skip_whitespace($reader);
|
|
27 |
|
|
28 |
$self->InternalSubset($reader);
|
|
29 |
|
|
30 |
$reader->match('>') or $self->parser_error("Doctype not closed", $reader);
|
|
31 |
|
|
32 |
return 1;
|
|
33 |
}
|
|
34 |
|
|
35 |
return 0;
|
|
36 |
}
|
|
37 |
|
|
38 |
sub ExternalID {
|
|
39 |
my ($self, $reader) = @_;
|
|
40 |
|
|
41 |
my $data = $reader->data(6);
|
|
42 |
|
|
43 |
if ($data =~ /^SYSTEM/) {
|
|
44 |
$reader->move_along(6);
|
|
45 |
$self->skip_whitespace($reader) ||
|
|
46 |
$self->parser_error("No whitespace after SYSTEM identifier", $reader);
|
|
47 |
return (SYSTEM => $self->SystemLiteral($reader));
|
|
48 |
}
|
|
49 |
elsif ($data =~ /^PUBLIC/) {
|
|
50 |
$reader->move_along(6);
|
|
51 |
$self->skip_whitespace($reader) ||
|
|
52 |
$self->parser_error("No whitespace after PUBLIC identifier", $reader);
|
|
53 |
|
|
54 |
my $quote = $self->quote($reader) ||
|
|
55 |
$self->parser_error("Not a quote character in PUBLIC identifier", $reader);
|
|
56 |
|
|
57 |
my $data = $reader->data;
|
|
58 |
my $pubid = '';
|
|
59 |
while(1) {
|
|
60 |
$self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader)
|
|
61 |
unless length($data);
|
|
62 |
|
|
63 |
if ($data =~ /^([^$quote]*)$quote/) {
|
|
64 |
$pubid .= $1;
|
|
65 |
$reader->move_along(length($1) + 1);
|
|
66 |
last;
|
|
67 |
}
|
|
68 |
else {
|
|
69 |
$pubid .= $data;
|
|
70 |
$reader->move_along(length($data));
|
|
71 |
$data = $reader->data;
|
|
72 |
}
|
|
73 |
}
|
|
74 |
|
|
75 |
if ($pubid !~ /^($PubidChar)+$/) {
|
|
76 |
$self->parser_error("Invalid characters in PUBLIC identifier", $reader);
|
|
77 |
}
|
|
78 |
|
|
79 |
$self->skip_whitespace($reader) ||
|
|
80 |
$self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader);
|
|
81 |
|
|
82 |
return (PUBLIC => $pubid,
|
|
83 |
SYSTEM => $self->SystemLiteral($reader));
|
|
84 |
}
|
|
85 |
else {
|
|
86 |
return;
|
|
87 |
}
|
|
88 |
|
|
89 |
return 1;
|
|
90 |
}
|
|
91 |
|
|
92 |
sub SystemLiteral {
|
|
93 |
my ($self, $reader) = @_;
|
|
94 |
|
|
95 |
my $quote = $self->quote($reader);
|
|
96 |
|
|
97 |
my $data = $reader->data;
|
|
98 |
my $systemid = '';
|
|
99 |
while (1) {
|
|
100 |
$self->parser_error("EOF found while looking for end of Sytem Literal", $reader)
|
|
101 |
unless length($data);
|
|
102 |
if ($data =~ /^([^$quote]*)$quote/) {
|
|
103 |
$systemid .= $1;
|
|
104 |
$reader->move_along(length($1) + 1);
|
|
105 |
return $systemid;
|
|
106 |
}
|
|
107 |
else {
|
|
108 |
$systemid .= $data;
|
|
109 |
$reader->move_along(length($data));
|
|
110 |
$data = $reader->data;
|
|
111 |
}
|
|
112 |
}
|
|
113 |
}
|
|
114 |
|
|
115 |
sub InternalSubset {
|
|
116 |
my ($self, $reader) = @_;
|
|
117 |
|
|
118 |
return 0 unless $reader->match('[');
|
|
119 |
|
|
120 |
1 while $self->IntSubsetDecl($reader);
|
|
121 |
|
|
122 |
$reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader);
|
|
123 |
$self->skip_whitespace($reader);
|
|
124 |
return 1;
|
|
125 |
}
|
|
126 |
|
|
127 |
sub IntSubsetDecl {
|
|
128 |
my ($self, $reader) = @_;
|
|
129 |
|
|
130 |
return $self->DeclSep($reader) || $self->markupdecl($reader);
|
|
131 |
}
|
|
132 |
|
|
133 |
sub DeclSep {
|
|
134 |
my ($self, $reader) = @_;
|
|
135 |
|
|
136 |
if ($self->skip_whitespace($reader)) {
|
|
137 |
return 1;
|
|
138 |
}
|
|
139 |
|
|
140 |
if ($self->PEReference($reader)) {
|
|
141 |
return 1;
|
|
142 |
}
|
|
143 |
|
|
144 |
# if ($self->ParsedExtSubset($reader)) {
|
|
145 |
# return 1;
|
|
146 |
# }
|
|
147 |
|
|
148 |
return 0;
|
|
149 |
}
|
|
150 |
|
|
151 |
sub PEReference {
|
|
152 |
my ($self, $reader) = @_;
|
|
153 |
|
|
154 |
return 0 unless $reader->match('%');
|
|
155 |
|
|
156 |
my $peref = $self->Name($reader) ||
|
|
157 |
$self->parser_error("PEReference did not find a Name", $reader);
|
|
158 |
# TODO - load/parse the peref
|
|
159 |
|
|
160 |
$reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader);
|
|
161 |
return 1;
|
|
162 |
}
|
|
163 |
|
|
164 |
sub markupdecl {
|
|
165 |
my ($self, $reader) = @_;
|
|
166 |
|
|
167 |
if ($self->elementdecl($reader) ||
|
|
168 |
$self->AttlistDecl($reader) ||
|
|
169 |
$self->EntityDecl($reader) ||
|
|
170 |
$self->NotationDecl($reader) ||
|
|
171 |
$self->PI($reader) ||
|
|
172 |
$self->Comment($reader))
|
|
173 |
{
|
|
174 |
return 1;
|
|
175 |
}
|
|
176 |
|
|
177 |
return 0;
|
|
178 |
}
|
|
179 |
|
|
180 |
1;
|