|
1 package XML::Handler::BuildDOM; |
|
2 use strict; |
|
3 use XML::DOM; |
|
4 |
|
5 # |
|
6 # TODO: |
|
7 # - add support for parameter entity references |
|
8 # - expand API: insert Elements in the tree or stuff into DocType etc. |
|
9 |
|
10 sub new |
|
11 { |
|
12 my ($class, %args) = @_; |
|
13 bless \%args, $class; |
|
14 } |
|
15 |
|
16 #-------- PerlSAX Handler methods ------------------------------ |
|
17 |
|
18 sub start_document # was Init |
|
19 { |
|
20 my $self = shift; |
|
21 |
|
22 # Define Document if it's not set & not obtainable from Element or DocType |
|
23 $self->{Document} ||= |
|
24 (defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef) |
|
25 || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef) |
|
26 || new XML::DOM::Document(); |
|
27 |
|
28 $self->{Element} ||= $self->{Document}; |
|
29 |
|
30 unless (defined $self->{DocType}) |
|
31 { |
|
32 $self->{DocType} = $self->{Document}->getDoctype |
|
33 if defined $self->{Document}; |
|
34 |
|
35 unless (defined $self->{Doctype}) |
|
36 { |
|
37 #?? should be $doc->createDocType for extensibility! |
|
38 $self->{DocType} = new XML::DOM::DocumentType ($self->{Document}); |
|
39 $self->{Document}->setDoctype ($self->{DocType}); |
|
40 } |
|
41 } |
|
42 |
|
43 # Prepare for document prolog |
|
44 $self->{InProlog} = 1; |
|
45 |
|
46 # We haven't passed the root element yet |
|
47 $self->{EndDoc} = 0; |
|
48 |
|
49 undef $self->{LastText}; |
|
50 } |
|
51 |
|
52 sub end_document # was Final |
|
53 { |
|
54 my $self = shift; |
|
55 unless ($self->{SawDocType}) |
|
56 { |
|
57 my $doctype = $self->{Document}->removeDoctype; |
|
58 $doctype->dispose; |
|
59 #?? do we always want to destroy the Doctype? |
|
60 } |
|
61 $self->{Document}; |
|
62 } |
|
63 |
|
64 sub characters # was Char |
|
65 { |
|
66 my $self = $_[0]; |
|
67 my $str = $_[1]->{Data}; |
|
68 |
|
69 if ($self->{InCDATA} && $self->{KeepCDATA}) |
|
70 { |
|
71 undef $self->{LastText}; |
|
72 # Merge text with previous node if possible |
|
73 $self->{Element}->addCDATA ($str); |
|
74 } |
|
75 else |
|
76 { |
|
77 # Merge text with previous node if possible |
|
78 # Used to be: $expat->{DOM_Element}->addText ($str); |
|
79 if ($self->{LastText}) |
|
80 { |
|
81 $self->{LastText}->appendData ($str); |
|
82 } |
|
83 else |
|
84 { |
|
85 $self->{LastText} = $self->{Document}->createTextNode ($str); |
|
86 $self->{Element}->appendChild ($self->{LastText}); |
|
87 } |
|
88 } |
|
89 } |
|
90 |
|
91 sub start_element # was Start |
|
92 { |
|
93 my ($self, $hash) = @_; |
|
94 my $elem = $hash->{Name}; |
|
95 my $attr = $hash->{Attributes}; |
|
96 |
|
97 my $parent = $self->{Element}; |
|
98 my $doc = $self->{Document}; |
|
99 |
|
100 if ($parent == $doc) |
|
101 { |
|
102 # End of document prolog, i.e. start of first Element |
|
103 $self->{InProlog} = 0; |
|
104 } |
|
105 |
|
106 undef $self->{LastText}; |
|
107 my $node = $doc->createElement ($elem); |
|
108 $self->{Element} = $node; |
|
109 $parent->appendChild ($node); |
|
110 |
|
111 my $i = 0; |
|
112 my $n = scalar keys %$attr; |
|
113 return unless $n; |
|
114 |
|
115 if (exists $hash->{AttributeOrder}) |
|
116 { |
|
117 my $defaulted = $hash->{Defaulted}; |
|
118 my @order = @{ $hash->{AttributeOrder} }; |
|
119 |
|
120 # Specified attributes |
|
121 for (my $i = 0; $i < $defaulted; $i++) |
|
122 { |
|
123 my $a = $order[$i]; |
|
124 my $att = $doc->createAttribute ($a, $attr->{$a}, 1); |
|
125 $node->setAttributeNode ($att); |
|
126 } |
|
127 |
|
128 # Defaulted attributes |
|
129 for (my $i = $defaulted; $i < @order; $i++) |
|
130 { |
|
131 my $a = $order[$i]; |
|
132 my $att = $doc->createAttribute ($elem, $attr->{$a}, 0); |
|
133 $node->setAttributeNode ($att); |
|
134 } |
|
135 } |
|
136 else |
|
137 { |
|
138 # We're assuming that all attributes were specified (1) |
|
139 for my $a (keys %$attr) |
|
140 { |
|
141 my $att = $doc->createAttribute ($a, $attr->{$a}, 1); |
|
142 $node->setAttributeNode ($att); |
|
143 } |
|
144 } |
|
145 } |
|
146 |
|
147 sub end_element |
|
148 { |
|
149 my $self = shift; |
|
150 $self->{Element} = $self->{Element}->getParentNode; |
|
151 undef $self->{LastText}; |
|
152 |
|
153 # Check for end of root element |
|
154 $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document}); |
|
155 } |
|
156 |
|
157 sub entity_reference # was Default |
|
158 { |
|
159 my $self = $_[0]; |
|
160 my $name = $_[1]->{Name}; |
|
161 |
|
162 $self->{Element}->appendChild ( |
|
163 $self->{Document}->createEntityReference ($name)); |
|
164 undef $self->{LastText}; |
|
165 } |
|
166 |
|
167 sub start_cdata |
|
168 { |
|
169 my $self = shift; |
|
170 $self->{InCDATA} = 1; |
|
171 } |
|
172 |
|
173 sub end_cdata |
|
174 { |
|
175 my $self = shift; |
|
176 $self->{InCDATA} = 0; |
|
177 } |
|
178 |
|
179 sub comment |
|
180 { |
|
181 my $self = $_[0]; |
|
182 |
|
183 local $XML::DOM::IgnoreReadOnly = 1; |
|
184 |
|
185 undef $self->{LastText}; |
|
186 my $comment = $self->{Document}->createComment ($_[1]->{Data}); |
|
187 $self->{Element}->appendChild ($comment); |
|
188 } |
|
189 |
|
190 sub doctype_decl |
|
191 { |
|
192 my ($self, $hash) = @_; |
|
193 |
|
194 $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, |
|
195 $hash->{PublicId}, $hash->{Internal}); |
|
196 $self->{SawDocType} = 1; |
|
197 } |
|
198 |
|
199 sub attlist_decl |
|
200 { |
|
201 my ($self, $hash) = @_; |
|
202 |
|
203 local $XML::DOM::IgnoreReadOnly = 1; |
|
204 |
|
205 $self->{DocType}->addAttDef ($hash->{ElementName}, |
|
206 $hash->{AttributeName}, |
|
207 $hash->{Type}, |
|
208 $hash->{Default}, |
|
209 $hash->{Fixed}); |
|
210 } |
|
211 |
|
212 sub xml_decl |
|
213 { |
|
214 my ($self, $hash) = @_; |
|
215 |
|
216 local $XML::DOM::IgnoreReadOnly = 1; |
|
217 |
|
218 undef $self->{LastText}; |
|
219 $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, |
|
220 $hash->{Version}, |
|
221 $hash->{Encoding}, |
|
222 $hash->{Standalone})); |
|
223 } |
|
224 |
|
225 sub entity_decl |
|
226 { |
|
227 my ($self, $hash) = @_; |
|
228 |
|
229 local $XML::DOM::IgnoreReadOnly = 1; |
|
230 |
|
231 # Parameter Entities names are passed starting with '%' |
|
232 my $parameter = 0; |
|
233 |
|
234 #?? parameter entities currently not supported by PerlSAX! |
|
235 |
|
236 undef $self->{LastText}; |
|
237 $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, |
|
238 $hash->{SystemId}, $hash->{PublicId}, |
|
239 $hash->{Notation}); |
|
240 } |
|
241 |
|
242 # Unparsed is called when it encounters e.g: |
|
243 # |
|
244 # <!ENTITY logo SYSTEM "http://server/logo.gif" NDATA gif> |
|
245 # |
|
246 sub unparsed_decl |
|
247 { |
|
248 my ($self, $hash) = @_; |
|
249 |
|
250 local $XML::DOM::IgnoreReadOnly = 1; |
|
251 |
|
252 # same as regular ENTITY, as far as DOM is concerned |
|
253 $self->entity_decl ($hash); |
|
254 } |
|
255 |
|
256 sub element_decl |
|
257 { |
|
258 my ($self, $hash) = @_; |
|
259 |
|
260 local $XML::DOM::IgnoreReadOnly = 1; |
|
261 |
|
262 undef $self->{LastText}; |
|
263 $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model}); |
|
264 } |
|
265 |
|
266 sub notation_decl |
|
267 { |
|
268 my ($self, $hash) = @_; |
|
269 |
|
270 local $XML::DOM::IgnoreReadOnly = 1; |
|
271 |
|
272 undef $self->{LastText}; |
|
273 $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, |
|
274 $hash->{SystemId}, $hash->{PublicId}); |
|
275 } |
|
276 |
|
277 sub processing_instruction |
|
278 { |
|
279 my ($self, $hash) = @_; |
|
280 |
|
281 local $XML::DOM::IgnoreReadOnly = 1; |
|
282 |
|
283 undef $self->{LastText}; |
|
284 $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction |
|
285 ($self->{Document}, $hash->{Target}, $hash->{Data})); |
|
286 } |
|
287 |
|
288 return 1; |
|
289 |
|
290 __END__ |
|
291 |
|
292 =head1 NAME |
|
293 |
|
294 XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures |
|
295 |
|
296 =head1 SYNOPSIS |
|
297 |
|
298 use XML::Handler::BuildDOM; |
|
299 use XML::Parser::PerlSAX; |
|
300 |
|
301 my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1); |
|
302 my $parser = new XML::Parser::PerlSAX (Handler => $handler); |
|
303 |
|
304 my $doc = $parser->parsefile ("file.xml"); |
|
305 |
|
306 =head1 DESCRIPTION |
|
307 |
|
308 XML::Handler::BuildDOM creates L<XML::DOM> document structures |
|
309 (i.e. L<XML::DOM::Document>) from PerlSAX events. |
|
310 |
|
311 This class used to be called L<XML::PerlSAX::DOM> prior to libxml-enno 1.0.1. |
|
312 |
|
313 =head2 CONSTRUCTOR OPTIONS |
|
314 |
|
315 The XML::Handler::BuildDOM constructor supports the following options: |
|
316 |
|
317 =over 4 |
|
318 |
|
319 =item * KeepCDATA => 1 |
|
320 |
|
321 If set to 0 (default), CDATASections will be converted to regular text. |
|
322 |
|
323 =item * Document => $doc |
|
324 |
|
325 If undefined, start_document will extract it from Element or DocType (if set), |
|
326 otherwise it will create a new XML::DOM::Document. |
|
327 |
|
328 =item * Element => $elem |
|
329 |
|
330 If undefined, it is set to Document. This will be the insertion point (or parent) |
|
331 for the nodes defined by the following callbacks. |
|
332 |
|
333 =item * DocType => $doctype |
|
334 |
|
335 If undefined, start_document will extract it from Document (if possible). |
|
336 Otherwise it adds a new XML::DOM::DocumentType to the Document. |
|
337 |
|
338 =back |