1 # $Id: DocType.pm,v 1.3 2003/07/30 13:39:22 matt Exp $ |
|
2 |
|
3 package XML::SAX::PurePerl; |
|
4 |
|
5 use strict; |
|
6 use XML::SAX::PurePerl::Productions qw($PubidChar); |
|
7 |
|
8 sub doctypedecl { |
|
9 my ($self, $reader) = @_; |
|
10 |
|
11 my $data = $reader->data(9); |
|
12 if ($data =~ /^<!DOCTYPE/) { |
|
13 $reader->move_along(9); |
|
14 $self->skip_whitespace($reader) || |
|
15 $self->parser_error("No whitespace after doctype declaration", $reader); |
|
16 |
|
17 my $root_name = $self->Name($reader) || |
|
18 $self->parser_error("Doctype declaration has no root element name", $reader); |
|
19 |
|
20 if ($self->skip_whitespace($reader)) { |
|
21 # might be externalid... |
|
22 my %dtd = $self->ExternalID($reader); |
|
23 # TODO: Call SAX event |
|
24 } |
|
25 |
|
26 $self->skip_whitespace($reader); |
|
27 |
|
28 $self->InternalSubset($reader); |
|
29 |
|
30 $reader->match('>') or $self->parser_error("Doctype not closed", $reader); |
|
31 |
|
32 return 1; |
|
33 } |
|
34 |
|
35 return 0; |
|
36 } |
|
37 |
|
38 sub ExternalID { |
|
39 my ($self, $reader) = @_; |
|
40 |
|
41 my $data = $reader->data(6); |
|
42 |
|
43 if ($data =~ /^SYSTEM/) { |
|
44 $reader->move_along(6); |
|
45 $self->skip_whitespace($reader) || |
|
46 $self->parser_error("No whitespace after SYSTEM identifier", $reader); |
|
47 return (SYSTEM => $self->SystemLiteral($reader)); |
|
48 } |
|
49 elsif ($data =~ /^PUBLIC/) { |
|
50 $reader->move_along(6); |
|
51 $self->skip_whitespace($reader) || |
|
52 $self->parser_error("No whitespace after PUBLIC identifier", $reader); |
|
53 |
|
54 my $quote = $self->quote($reader) || |
|
55 $self->parser_error("Not a quote character in PUBLIC identifier", $reader); |
|
56 |
|
57 my $data = $reader->data; |
|
58 my $pubid = ''; |
|
59 while(1) { |
|
60 $self->parser_error("EOF while looking for end of PUBLIC identifiier", $reader) |
|
61 unless length($data); |
|
62 |
|
63 if ($data =~ /^([^$quote]*)$quote/) { |
|
64 $pubid .= $1; |
|
65 $reader->move_along(length($1) + 1); |
|
66 last; |
|
67 } |
|
68 else { |
|
69 $pubid .= $data; |
|
70 $reader->move_along(length($data)); |
|
71 $data = $reader->data; |
|
72 } |
|
73 } |
|
74 |
|
75 if ($pubid !~ /^($PubidChar)+$/) { |
|
76 $self->parser_error("Invalid characters in PUBLIC identifier", $reader); |
|
77 } |
|
78 |
|
79 $self->skip_whitespace($reader) || |
|
80 $self->parser_error("Not whitespace after PUBLIC ID in DOCTYPE", $reader); |
|
81 |
|
82 return (PUBLIC => $pubid, |
|
83 SYSTEM => $self->SystemLiteral($reader)); |
|
84 } |
|
85 else { |
|
86 return; |
|
87 } |
|
88 |
|
89 return 1; |
|
90 } |
|
91 |
|
92 sub SystemLiteral { |
|
93 my ($self, $reader) = @_; |
|
94 |
|
95 my $quote = $self->quote($reader); |
|
96 |
|
97 my $data = $reader->data; |
|
98 my $systemid = ''; |
|
99 while (1) { |
|
100 $self->parser_error("EOF found while looking for end of Sytem Literal", $reader) |
|
101 unless length($data); |
|
102 if ($data =~ /^([^$quote]*)$quote/) { |
|
103 $systemid .= $1; |
|
104 $reader->move_along(length($1) + 1); |
|
105 return $systemid; |
|
106 } |
|
107 else { |
|
108 $systemid .= $data; |
|
109 $reader->move_along(length($data)); |
|
110 $data = $reader->data; |
|
111 } |
|
112 } |
|
113 } |
|
114 |
|
115 sub InternalSubset { |
|
116 my ($self, $reader) = @_; |
|
117 |
|
118 return 0 unless $reader->match('['); |
|
119 |
|
120 1 while $self->IntSubsetDecl($reader); |
|
121 |
|
122 $reader->match(']') or $self->parser_error("No close bracket on internal subset (found: " . $reader->data, $reader); |
|
123 $self->skip_whitespace($reader); |
|
124 return 1; |
|
125 } |
|
126 |
|
127 sub IntSubsetDecl { |
|
128 my ($self, $reader) = @_; |
|
129 |
|
130 return $self->DeclSep($reader) || $self->markupdecl($reader); |
|
131 } |
|
132 |
|
133 sub DeclSep { |
|
134 my ($self, $reader) = @_; |
|
135 |
|
136 if ($self->skip_whitespace($reader)) { |
|
137 return 1; |
|
138 } |
|
139 |
|
140 if ($self->PEReference($reader)) { |
|
141 return 1; |
|
142 } |
|
143 |
|
144 # if ($self->ParsedExtSubset($reader)) { |
|
145 # return 1; |
|
146 # } |
|
147 |
|
148 return 0; |
|
149 } |
|
150 |
|
151 sub PEReference { |
|
152 my ($self, $reader) = @_; |
|
153 |
|
154 return 0 unless $reader->match('%'); |
|
155 |
|
156 my $peref = $self->Name($reader) || |
|
157 $self->parser_error("PEReference did not find a Name", $reader); |
|
158 # TODO - load/parse the peref |
|
159 |
|
160 $reader->match(';') or $self->parser_error("Invalid token in PEReference", $reader); |
|
161 return 1; |
|
162 } |
|
163 |
|
164 sub markupdecl { |
|
165 my ($self, $reader) = @_; |
|
166 |
|
167 if ($self->elementdecl($reader) || |
|
168 $self->AttlistDecl($reader) || |
|
169 $self->EntityDecl($reader) || |
|
170 $self->NotationDecl($reader) || |
|
171 $self->PI($reader) || |
|
172 $self->Comment($reader)) |
|
173 { |
|
174 return 1; |
|
175 } |
|
176 |
|
177 return 0; |
|
178 } |
|
179 |
|
180 1; |
|