655
|
1 |
package XML::Handler::BuildDOM;
|
|
2 |
use strict;
|
|
3 |
use XML::DOM;
|
|
4 |
|
|
5 |
#
|
|
6 |
# TODO:
|
|
7 |
# - add support for parameter entity references
|
|
8 |
# - expand API: insert Elements in the tree or stuff into DocType etc.
|
|
9 |
|
|
10 |
sub new
|
|
11 |
{
|
|
12 |
my ($class, %args) = @_;
|
|
13 |
bless \%args, $class;
|
|
14 |
}
|
|
15 |
|
|
16 |
#-------- PerlSAX Handler methods ------------------------------
|
|
17 |
|
|
18 |
sub start_document # was Init
|
|
19 |
{
|
|
20 |
my $self = shift;
|
|
21 |
|
|
22 |
# Define Document if it's not set & not obtainable from Element or DocType
|
|
23 |
$self->{Document} ||=
|
|
24 |
(defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef)
|
|
25 |
|| (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef)
|
|
26 |
|| new XML::DOM::Document();
|
|
27 |
|
|
28 |
$self->{Element} ||= $self->{Document};
|
|
29 |
|
|
30 |
unless (defined $self->{DocType})
|
|
31 |
{
|
|
32 |
$self->{DocType} = $self->{Document}->getDoctype
|
|
33 |
if defined $self->{Document};
|
|
34 |
|
|
35 |
unless (defined $self->{Doctype})
|
|
36 |
{
|
|
37 |
#?? should be $doc->createDocType for extensibility!
|
|
38 |
$self->{DocType} = new XML::DOM::DocumentType ($self->{Document});
|
|
39 |
$self->{Document}->setDoctype ($self->{DocType});
|
|
40 |
}
|
|
41 |
}
|
|
42 |
|
|
43 |
# Prepare for document prolog
|
|
44 |
$self->{InProlog} = 1;
|
|
45 |
|
|
46 |
# We haven't passed the root element yet
|
|
47 |
$self->{EndDoc} = 0;
|
|
48 |
|
|
49 |
undef $self->{LastText};
|
|
50 |
}
|
|
51 |
|
|
52 |
sub end_document # was Final
|
|
53 |
{
|
|
54 |
my $self = shift;
|
|
55 |
unless ($self->{SawDocType})
|
|
56 |
{
|
|
57 |
my $doctype = $self->{Document}->removeDoctype;
|
|
58 |
$doctype->dispose;
|
|
59 |
#?? do we always want to destroy the Doctype?
|
|
60 |
}
|
|
61 |
$self->{Document};
|
|
62 |
}
|
|
63 |
|
|
64 |
sub characters # was Char
|
|
65 |
{
|
|
66 |
my $self = $_[0];
|
|
67 |
my $str = $_[1]->{Data};
|
|
68 |
|
|
69 |
if ($self->{InCDATA} && $self->{KeepCDATA})
|
|
70 |
{
|
|
71 |
undef $self->{LastText};
|
|
72 |
# Merge text with previous node if possible
|
|
73 |
$self->{Element}->addCDATA ($str);
|
|
74 |
}
|
|
75 |
else
|
|
76 |
{
|
|
77 |
# Merge text with previous node if possible
|
|
78 |
# Used to be: $expat->{DOM_Element}->addText ($str);
|
|
79 |
if ($self->{LastText})
|
|
80 |
{
|
|
81 |
$self->{LastText}->appendData ($str);
|
|
82 |
}
|
|
83 |
else
|
|
84 |
{
|
|
85 |
$self->{LastText} = $self->{Document}->createTextNode ($str);
|
|
86 |
$self->{Element}->appendChild ($self->{LastText});
|
|
87 |
}
|
|
88 |
}
|
|
89 |
}
|
|
90 |
|
|
91 |
sub start_element # was Start
|
|
92 |
{
|
|
93 |
my ($self, $hash) = @_;
|
|
94 |
my $elem = $hash->{Name};
|
|
95 |
my $attr = $hash->{Attributes};
|
|
96 |
|
|
97 |
my $parent = $self->{Element};
|
|
98 |
my $doc = $self->{Document};
|
|
99 |
|
|
100 |
if ($parent == $doc)
|
|
101 |
{
|
|
102 |
# End of document prolog, i.e. start of first Element
|
|
103 |
$self->{InProlog} = 0;
|
|
104 |
}
|
|
105 |
|
|
106 |
undef $self->{LastText};
|
|
107 |
my $node = $doc->createElement ($elem);
|
|
108 |
$self->{Element} = $node;
|
|
109 |
$parent->appendChild ($node);
|
|
110 |
|
|
111 |
my $i = 0;
|
|
112 |
my $n = scalar keys %$attr;
|
|
113 |
return unless $n;
|
|
114 |
|
|
115 |
if (exists $hash->{AttributeOrder})
|
|
116 |
{
|
|
117 |
my $defaulted = $hash->{Defaulted};
|
|
118 |
my @order = @{ $hash->{AttributeOrder} };
|
|
119 |
|
|
120 |
# Specified attributes
|
|
121 |
for (my $i = 0; $i < $defaulted; $i++)
|
|
122 |
{
|
|
123 |
my $a = $order[$i];
|
|
124 |
my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
|
|
125 |
$node->setAttributeNode ($att);
|
|
126 |
}
|
|
127 |
|
|
128 |
# Defaulted attributes
|
|
129 |
for (my $i = $defaulted; $i < @order; $i++)
|
|
130 |
{
|
|
131 |
my $a = $order[$i];
|
|
132 |
my $att = $doc->createAttribute ($elem, $attr->{$a}, 0);
|
|
133 |
$node->setAttributeNode ($att);
|
|
134 |
}
|
|
135 |
}
|
|
136 |
else
|
|
137 |
{
|
|
138 |
# We're assuming that all attributes were specified (1)
|
|
139 |
for my $a (keys %$attr)
|
|
140 |
{
|
|
141 |
my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
|
|
142 |
$node->setAttributeNode ($att);
|
|
143 |
}
|
|
144 |
}
|
|
145 |
}
|
|
146 |
|
|
147 |
sub end_element
|
|
148 |
{
|
|
149 |
my $self = shift;
|
|
150 |
$self->{Element} = $self->{Element}->getParentNode;
|
|
151 |
undef $self->{LastText};
|
|
152 |
|
|
153 |
# Check for end of root element
|
|
154 |
$self->{EndDoc} = 1 if ($self->{Element} == $self->{Document});
|
|
155 |
}
|
|
156 |
|
|
157 |
sub entity_reference # was Default
|
|
158 |
{
|
|
159 |
my $self = $_[0];
|
|
160 |
my $name = $_[1]->{Name};
|
|
161 |
|
|
162 |
$self->{Element}->appendChild (
|
|
163 |
$self->{Document}->createEntityReference ($name));
|
|
164 |
undef $self->{LastText};
|
|
165 |
}
|
|
166 |
|
|
167 |
sub start_cdata
|
|
168 |
{
|
|
169 |
my $self = shift;
|
|
170 |
$self->{InCDATA} = 1;
|
|
171 |
}
|
|
172 |
|
|
173 |
sub end_cdata
|
|
174 |
{
|
|
175 |
my $self = shift;
|
|
176 |
$self->{InCDATA} = 0;
|
|
177 |
}
|
|
178 |
|
|
179 |
sub comment
|
|
180 |
{
|
|
181 |
my $self = $_[0];
|
|
182 |
|
|
183 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
184 |
|
|
185 |
undef $self->{LastText};
|
|
186 |
my $comment = $self->{Document}->createComment ($_[1]->{Data});
|
|
187 |
$self->{Element}->appendChild ($comment);
|
|
188 |
}
|
|
189 |
|
|
190 |
sub doctype_decl
|
|
191 |
{
|
|
192 |
my ($self, $hash) = @_;
|
|
193 |
|
|
194 |
$self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId},
|
|
195 |
$hash->{PublicId}, $hash->{Internal});
|
|
196 |
$self->{SawDocType} = 1;
|
|
197 |
}
|
|
198 |
|
|
199 |
sub attlist_decl
|
|
200 |
{
|
|
201 |
my ($self, $hash) = @_;
|
|
202 |
|
|
203 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
204 |
|
|
205 |
$self->{DocType}->addAttDef ($hash->{ElementName},
|
|
206 |
$hash->{AttributeName},
|
|
207 |
$hash->{Type},
|
|
208 |
$hash->{Default},
|
|
209 |
$hash->{Fixed});
|
|
210 |
}
|
|
211 |
|
|
212 |
sub xml_decl
|
|
213 |
{
|
|
214 |
my ($self, $hash) = @_;
|
|
215 |
|
|
216 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
217 |
|
|
218 |
undef $self->{LastText};
|
|
219 |
$self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document},
|
|
220 |
$hash->{Version},
|
|
221 |
$hash->{Encoding},
|
|
222 |
$hash->{Standalone}));
|
|
223 |
}
|
|
224 |
|
|
225 |
sub entity_decl
|
|
226 |
{
|
|
227 |
my ($self, $hash) = @_;
|
|
228 |
|
|
229 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
230 |
|
|
231 |
# Parameter Entities names are passed starting with '%'
|
|
232 |
my $parameter = 0;
|
|
233 |
|
|
234 |
#?? parameter entities currently not supported by PerlSAX!
|
|
235 |
|
|
236 |
undef $self->{LastText};
|
|
237 |
$self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value},
|
|
238 |
$hash->{SystemId}, $hash->{PublicId},
|
|
239 |
$hash->{Notation});
|
|
240 |
}
|
|
241 |
|
|
242 |
# Unparsed is called when it encounters e.g:
|
|
243 |
#
|
|
244 |
# <!ENTITY logo SYSTEM "http://server/logo.gif" NDATA gif>
|
|
245 |
#
|
|
246 |
sub unparsed_decl
|
|
247 |
{
|
|
248 |
my ($self, $hash) = @_;
|
|
249 |
|
|
250 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
251 |
|
|
252 |
# same as regular ENTITY, as far as DOM is concerned
|
|
253 |
$self->entity_decl ($hash);
|
|
254 |
}
|
|
255 |
|
|
256 |
sub element_decl
|
|
257 |
{
|
|
258 |
my ($self, $hash) = @_;
|
|
259 |
|
|
260 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
261 |
|
|
262 |
undef $self->{LastText};
|
|
263 |
$self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model});
|
|
264 |
}
|
|
265 |
|
|
266 |
sub notation_decl
|
|
267 |
{
|
|
268 |
my ($self, $hash) = @_;
|
|
269 |
|
|
270 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
271 |
|
|
272 |
undef $self->{LastText};
|
|
273 |
$self->{DocType}->addNotation ($hash->{Name}, $hash->{Base},
|
|
274 |
$hash->{SystemId}, $hash->{PublicId});
|
|
275 |
}
|
|
276 |
|
|
277 |
sub processing_instruction
|
|
278 |
{
|
|
279 |
my ($self, $hash) = @_;
|
|
280 |
|
|
281 |
local $XML::DOM::IgnoreReadOnly = 1;
|
|
282 |
|
|
283 |
undef $self->{LastText};
|
|
284 |
$self->{Element}->appendChild (new XML::DOM::ProcessingInstruction
|
|
285 |
($self->{Document}, $hash->{Target}, $hash->{Data}));
|
|
286 |
}
|
|
287 |
|
|
288 |
return 1;
|
|
289 |
|
|
290 |
__END__
|
|
291 |
|
|
292 |
=head1 NAME
|
|
293 |
|
|
294 |
XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures
|
|
295 |
|
|
296 |
=head1 SYNOPSIS
|
|
297 |
|
|
298 |
use XML::Handler::BuildDOM;
|
|
299 |
use XML::Parser::PerlSAX;
|
|
300 |
|
|
301 |
my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1);
|
|
302 |
my $parser = new XML::Parser::PerlSAX (Handler => $handler);
|
|
303 |
|
|
304 |
my $doc = $parser->parsefile ("file.xml");
|
|
305 |
|
|
306 |
=head1 DESCRIPTION
|
|
307 |
|
|
308 |
XML::Handler::BuildDOM creates L<XML::DOM> document structures
|
|
309 |
(i.e. L<XML::DOM::Document>) from PerlSAX events.
|
|
310 |
|
|
311 |
This class used to be called L<XML::PerlSAX::DOM> prior to libxml-enno 1.0.1.
|
|
312 |
|
|
313 |
=head2 CONSTRUCTOR OPTIONS
|
|
314 |
|
|
315 |
The XML::Handler::BuildDOM constructor supports the following options:
|
|
316 |
|
|
317 |
=over 4
|
|
318 |
|
|
319 |
=item * KeepCDATA => 1
|
|
320 |
|
|
321 |
If set to 0 (default), CDATASections will be converted to regular text.
|
|
322 |
|
|
323 |
=item * Document => $doc
|
|
324 |
|
|
325 |
If undefined, start_document will extract it from Element or DocType (if set),
|
|
326 |
otherwise it will create a new XML::DOM::Document.
|
|
327 |
|
|
328 |
=item * Element => $elem
|
|
329 |
|
|
330 |
If undefined, it is set to Document. This will be the insertion point (or parent)
|
|
331 |
for the nodes defined by the following callbacks.
|
|
332 |
|
|
333 |
=item * DocType => $doctype
|
|
334 |
|
|
335 |
If undefined, start_document will extract it from Document (if possible).
|
|
336 |
Otherwise it adds a new XML::DOM::DocumentType to the Document.
|
|
337 |
|
|
338 |
=back
|