|
1 ############################################################################ |
|
2 # Copyright (c) 1998 Enno Derksen |
|
3 # All rights reserved. |
|
4 # This program is free software; you can redistribute it and/or modify it |
|
5 # under the same terms as Perl itself. |
|
6 ############################################################################ |
|
7 # |
|
8 # Functions added to the XML::DOM implementation for XQL support |
|
9 # |
|
10 # NOTE: This code is a bad example of how to use XML::DOM. |
|
11 # I'm accessing internal (private) data members for a little gain in performance. |
|
12 # When the internal DOM implementation changes, this code will no longer work. |
|
13 # But since I maintain XML::DOM, it's easy for me to keep them in sync. |
|
14 # Regular users are adviced to use the XML::DOM API as described in the |
|
15 # documentation. |
|
16 # |
|
17 |
|
18 use strict; |
|
19 package XML::XQL::DOM; |
|
20 |
|
21 BEGIN |
|
22 { |
|
23 require XML::DOM; |
|
24 |
|
25 # import constant field definitions, e.g. _Doc |
|
26 import XML::DOM::Node qw{ :Fields }; |
|
27 } |
|
28 |
|
29 package XML::DOM::Node; |
|
30 |
|
31 sub xql |
|
32 { |
|
33 my $self = shift; |
|
34 |
|
35 # Odd number of args, assume first is XQL expression without 'Expr' key |
|
36 unshift @_, 'Expr' if (@_ % 2 == 1); |
|
37 my $query = new XML::XQL::Query (@_); |
|
38 my @result = $query->solve ($self); |
|
39 $query->dispose; |
|
40 |
|
41 @result; |
|
42 } |
|
43 |
|
44 sub xql_sortKey |
|
45 { |
|
46 my $key = $_[0]->[_SortKey]; |
|
47 return $key if defined $key; |
|
48 |
|
49 $key = XML::XQL::createSortKey ($_[0]->[_Parent]->xql_sortKey, |
|
50 $_[0]->xql_childIndex, 1); |
|
51 #print "xql_sortKey $_[0] ind=" . $_[0]->xql_childIndex . " key=$key str=" . XML::XQL::keyStr($key) . "\n"; |
|
52 $_[0]->[_SortKey] = $key; |
|
53 } |
|
54 |
|
55 # Find previous sibling that is not a text node with ignorable whitespace |
|
56 sub xql_prevNonWS |
|
57 { |
|
58 my $self = shift; |
|
59 my $parent = $self->[_Parent]; |
|
60 return unless $parent; |
|
61 |
|
62 for (my $i = $parent->getChildIndex ($self) - 1; $i >= 0; $i--) |
|
63 { |
|
64 my $node = $parent->getChildAtIndex ($i); |
|
65 return $node unless $node->xql_isIgnorableWS; # skip whitespace |
|
66 } |
|
67 undef; |
|
68 } |
|
69 |
|
70 # True if it's a Text node with just whitespace and xml::space != "preserve" |
|
71 sub xql_isIgnorableWS |
|
72 { |
|
73 0; |
|
74 } |
|
75 |
|
76 # Whether the node should preserve whitespace |
|
77 # It should if it has attribute xml:space="preserve" |
|
78 sub xql_preserveSpace |
|
79 { |
|
80 $_[0]->[_Parent]->xql_preserveSpace; |
|
81 } |
|
82 |
|
83 sub xql_element |
|
84 { |
|
85 #?? I wonder which implemention is used for e.g. DOM::Text, since XML::XQL::Node also has an implementation |
|
86 []; |
|
87 } |
|
88 |
|
89 sub xql_document |
|
90 { |
|
91 $_[0]->[_Doc]; |
|
92 } |
|
93 |
|
94 sub xql_node |
|
95 { |
|
96 my $kids = $_[0]->[_C]; |
|
97 if (defined $kids) |
|
98 { |
|
99 # Must copy the list or else we return a blessed reference |
|
100 # (which causes trouble later on) |
|
101 my @list = @$kids; |
|
102 return \@list; |
|
103 } |
|
104 |
|
105 []; |
|
106 } |
|
107 |
|
108 #?? implement something to support NamedNodeMaps in DocumentType |
|
109 sub xql_childIndex |
|
110 { |
|
111 $_[0]->[_Parent]->getChildIndex ($_[0]); |
|
112 } |
|
113 |
|
114 #?? implement something to support NamedNodeMaps in DocumentType |
|
115 sub xql_childCount |
|
116 { |
|
117 my $ch = $_[0]->[_C]; |
|
118 defined $ch ? scalar(@$ch) : 0; |
|
119 } |
|
120 |
|
121 sub xql_parent |
|
122 { |
|
123 $_[0]->[_Parent]; |
|
124 } |
|
125 |
|
126 sub xql_DOM_nodeType |
|
127 { |
|
128 $_[0]->getNodeType; |
|
129 } |
|
130 |
|
131 sub xql_nodeType |
|
132 { |
|
133 $_[0]->getNodeType; |
|
134 } |
|
135 |
|
136 # As it appears in the XML document |
|
137 sub xql_xmlString |
|
138 { |
|
139 $_[0]->toString; |
|
140 } |
|
141 |
|
142 package XML::DOM::Element; |
|
143 |
|
144 sub xql_attribute |
|
145 { |
|
146 my ($node, $attrName) = @_; |
|
147 |
|
148 if (defined $attrName) |
|
149 { |
|
150 my $attr = $node->getAttributeNode ($attrName); |
|
151 defined ($attr) ? [ $attr ] : []; |
|
152 } |
|
153 else |
|
154 { |
|
155 defined $node->[_A] ? $node->[_A]->getValues : []; |
|
156 } |
|
157 } |
|
158 |
|
159 # Used by XML::XQL::Union::genSortKey to generate sort keys |
|
160 # Returns the maximum of the number of children and the number of Attr nodes. |
|
161 sub xql_childCount |
|
162 { |
|
163 my $n = scalar @{$_[0]->[_C]}; |
|
164 my $m = defined $_[0]->[_A] ? $_[0]->[_A]->getLength : 0; |
|
165 return $n > $m ? $n : $m; |
|
166 } |
|
167 |
|
168 sub xql_element |
|
169 { |
|
170 my ($node, $elem) = @_; |
|
171 |
|
172 my @list; |
|
173 if (defined $elem) |
|
174 { |
|
175 for my $kid (@{$node->[_C]}) |
|
176 { |
|
177 push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem; |
|
178 } |
|
179 } |
|
180 else |
|
181 { |
|
182 for my $kid (@{$node->[_C]}) |
|
183 { |
|
184 push @list, $kid if $kid->isElementNode; |
|
185 } |
|
186 } |
|
187 \@list; |
|
188 } |
|
189 |
|
190 sub xql_nodeName |
|
191 { |
|
192 $_[0]->[_TagName]; |
|
193 } |
|
194 |
|
195 sub xql_baseName |
|
196 { |
|
197 my $name = $_[0]->[_TagName]; |
|
198 $name =~ s/^\w*://; |
|
199 $name; |
|
200 } |
|
201 |
|
202 sub xql_prefix |
|
203 { |
|
204 my $name = $_[0]->[_TagName]; |
|
205 $name =~ /([^:]+):/; |
|
206 $1; |
|
207 } |
|
208 |
|
209 sub xql_rawText |
|
210 { |
|
211 my ($self, $recurse) = @_; |
|
212 $recurse = 1 unless defined $recurse; |
|
213 |
|
214 my $text = ""; |
|
215 |
|
216 for my $kid (@{$self->xql_node}) |
|
217 { |
|
218 my $type = $kid->xql_nodeType; |
|
219 |
|
220 # type=1: element |
|
221 # type=3: text (Text, CDATASection, EntityReference) |
|
222 if (($type == 1 && $recurse) || $type == 3) |
|
223 { |
|
224 $text .= $kid->xql_rawText ($recurse); |
|
225 } |
|
226 } |
|
227 $text; |
|
228 } |
|
229 |
|
230 sub xql_text |
|
231 { |
|
232 my ($self, $recurse) = @_; |
|
233 $recurse = 1 unless defined $recurse; |
|
234 |
|
235 my $j = -1; |
|
236 my @text; |
|
237 my $last_was_text = 0; |
|
238 |
|
239 # Collect text blocks. Consecutive blocks of Text, CDataSection and |
|
240 # EntityReference nodes should be merged without stripping and without |
|
241 # putting spaces in between. |
|
242 for my $kid (@{$self->xql_node}) |
|
243 { |
|
244 my $type = $kid->xql_nodeType; |
|
245 |
|
246 if ($type == 1) # 1: element |
|
247 { |
|
248 if ($recurse) |
|
249 { |
|
250 $text[++$j] = $kid->xql_text ($recurse); |
|
251 } |
|
252 $last_was_text = 0; |
|
253 } |
|
254 elsif ($type == 3) # 3: text (Text, CDATASection, EntityReference) |
|
255 { |
|
256 ++$j unless $last_was_text; # next text block |
|
257 $text[$j] .= $kid->getData; |
|
258 $last_was_text = 1; |
|
259 } |
|
260 else # e.g. Comment |
|
261 { |
|
262 $last_was_text = 0; |
|
263 } |
|
264 } |
|
265 |
|
266 # trim whitespace and remove empty blocks |
|
267 my $i = 0; |
|
268 my $n = @text; |
|
269 while ($i < $n) |
|
270 { |
|
271 # similar to XML::XQL::trimSpace |
|
272 $text[$i] =~ s/^\s+//; |
|
273 $text[$i] =~ s/\s+$//; |
|
274 |
|
275 if ($text[$i] eq "") |
|
276 { |
|
277 splice (@text, $i, 1); # remove empty block |
|
278 $n--; |
|
279 } |
|
280 else |
|
281 { |
|
282 $i++; |
|
283 } |
|
284 } |
|
285 join (" ", @text); |
|
286 } |
|
287 |
|
288 # |
|
289 # Returns a list of text blocks for this Element. |
|
290 # A text block is a concatenation of consecutive text-containing nodes (i.e. |
|
291 # Text, CDATASection or EntityReference nodes.) |
|
292 # For each text block a reference to an array is returned with the following |
|
293 # 3 items: |
|
294 # [0] index of first node of the text block |
|
295 # [1] index of last node of the text block |
|
296 # [2] concatenation of the raw text (of the nodes in this text block) |
|
297 # |
|
298 # The text blocks are returned in reverse order for the convenience of |
|
299 # the routines that want to modify the text blocks. |
|
300 # |
|
301 sub xql_rawTextBlocks |
|
302 { |
|
303 my ($self) = @_; |
|
304 |
|
305 my @result; |
|
306 my $curr; |
|
307 my $prevWasText = 0; |
|
308 my $kids = $self->[_C]; |
|
309 my $n = @$kids; |
|
310 for (my $i = 0; $i < $n; $i++) |
|
311 { |
|
312 my $node = $kids->[$i]; |
|
313 # 3: text (Text, CDATASection, EntityReference) |
|
314 if ($node->xql_nodeType == 3) |
|
315 { |
|
316 if ($prevWasText) |
|
317 { |
|
318 $curr->[1] = $i; |
|
319 $curr->[2] .= $node->getData; |
|
320 } |
|
321 else |
|
322 { |
|
323 $curr = [$i, $i, $node->getData]; |
|
324 unshift @result, $curr; |
|
325 $prevWasText = 1; |
|
326 } |
|
327 } |
|
328 else |
|
329 { |
|
330 $prevWasText = 0; |
|
331 } |
|
332 } |
|
333 @result; |
|
334 } |
|
335 |
|
336 sub xql_replaceBlockWithText |
|
337 { |
|
338 my ($self, $start, $end, $text) = @_; |
|
339 for (my $i = $end; $i > $start; $i--) |
|
340 { |
|
341 # dispose of the old nodes |
|
342 $self->removeChild ($self->[_C]->[$i])->dispose; |
|
343 } |
|
344 my $node = $self->[_C]->[$start]; |
|
345 my $newNode = $self->[_Doc]->createTextNode ($text); |
|
346 $self->replaceChild ($newNode, $node)->dispose; |
|
347 } |
|
348 |
|
349 sub xql_setValue |
|
350 { |
|
351 my ($self, $str) = @_; |
|
352 # Remove all children |
|
353 for my $kid (@{$self->[_C]}) |
|
354 { |
|
355 $self->removeChild ($kid); |
|
356 } |
|
357 # Add a (single) text node |
|
358 $self->appendChild ($self->[_Doc]->createTextNode ($str)); |
|
359 } |
|
360 |
|
361 sub xql_value |
|
362 { |
|
363 XML::XQL::elementValue ($_[0]); |
|
364 } |
|
365 |
|
366 sub xql_preserveSpace |
|
367 { |
|
368 # attribute value should be "preserve" (1), "default" (0) or "" (ask parent) |
|
369 my $space = $_[0]->getAttribute ("xml:space"); |
|
370 $space eq "" ? $_[0]->[_Parent]->xql_preserveSpace : ($space eq "preserve"); |
|
371 } |
|
372 |
|
373 package XML::DOM::Attr; |
|
374 |
|
375 sub xql_sortKey |
|
376 { |
|
377 my $key = $_[0]->[_SortKey]; |
|
378 return $key if defined $key; |
|
379 |
|
380 $_[0]->[_SortKey] = XML::XQL::createSortKey ($_[0]->xql_parent->xql_sortKey, |
|
381 $_[0]->xql_childIndex, 0); |
|
382 } |
|
383 |
|
384 sub xql_nodeName |
|
385 { |
|
386 $_[0]->getNodeName; |
|
387 } |
|
388 |
|
389 sub xql_text |
|
390 { |
|
391 XML::XQL::trimSpace ($_[0]->getValue); |
|
392 } |
|
393 |
|
394 sub xql_rawText |
|
395 { |
|
396 $_[0]->getValue; |
|
397 } |
|
398 |
|
399 sub xql_value |
|
400 { |
|
401 XML::XQL::attrValue ($_[0]); |
|
402 } |
|
403 |
|
404 sub xql_setValue |
|
405 { |
|
406 $_[0]->setValue ($_[1]); |
|
407 } |
|
408 |
|
409 sub xql_baseName |
|
410 { |
|
411 my $name = $_[0]->getNodeName; |
|
412 $name =~ s/^\w*://; |
|
413 $name; |
|
414 } |
|
415 |
|
416 sub xql_prefix |
|
417 { |
|
418 my $name = $_[0]->getNodeName; |
|
419 $name =~ s/:\w*$//; |
|
420 $name; |
|
421 } |
|
422 |
|
423 sub xql_parent |
|
424 { |
|
425 $_[0]->[_UsedIn]->{''}->{Parent}; |
|
426 } |
|
427 |
|
428 sub xql_childIndex |
|
429 { |
|
430 my $map = $_[0]->[_UsedIn]; |
|
431 $map ? $map->getChildIndex ($_[0]) : 0; |
|
432 } |
|
433 |
|
434 package XML::DOM::Text; |
|
435 |
|
436 sub xql_rawText |
|
437 { |
|
438 $_[0]->[_Data]; |
|
439 } |
|
440 |
|
441 sub xql_text |
|
442 { |
|
443 XML::XQL::trimSpace ($_[0]->[_Data]); |
|
444 } |
|
445 |
|
446 sub xql_setValue |
|
447 { |
|
448 $_[0]->setData ($_[1]); |
|
449 } |
|
450 |
|
451 sub xql_isIgnorableWS |
|
452 { |
|
453 $_[0]->[_Data] =~ /^\s*$/ && |
|
454 !$_[0]->xql_preserveSpace; |
|
455 } |
|
456 |
|
457 package XML::DOM::CDATASection; |
|
458 |
|
459 sub xql_rawText |
|
460 { |
|
461 $_[0]->[_Data]; |
|
462 } |
|
463 |
|
464 sub xql_text |
|
465 { |
|
466 XML::XQL::trimSpace ($_[0]->[_Data]); |
|
467 } |
|
468 |
|
469 sub xql_setValue |
|
470 { |
|
471 $_[0]->setData ($_[1]); |
|
472 } |
|
473 |
|
474 sub xql_nodeType |
|
475 { |
|
476 3; # it contains text, so XQL spec states it's a text node |
|
477 } |
|
478 |
|
479 package XML::DOM::EntityReference; |
|
480 |
|
481 BEGIN |
|
482 { |
|
483 # import constant field definitions, e.g. _Data |
|
484 import XML::DOM::CharacterData qw{ :Fields }; |
|
485 } |
|
486 |
|
487 sub xql_text |
|
488 { |
|
489 $_[0]->getData; |
|
490 } |
|
491 |
|
492 sub xql_rawText |
|
493 { |
|
494 XML::XQL::trimSpace ($_[0]->[_Data]); |
|
495 } |
|
496 |
|
497 sub xql_setValue |
|
498 { |
|
499 $_[0]->setData ($_[1]); |
|
500 } |
|
501 |
|
502 sub xql_nodeType |
|
503 { |
|
504 3; # it contains text, so XQL spec states it's a text node |
|
505 } |
|
506 |
|
507 package XML::DOM::Document; |
|
508 |
|
509 BEGIN |
|
510 { |
|
511 # import constant field definitions, e.g. _TagName |
|
512 import XML::DOM::Element qw{ :Fields }; |
|
513 } |
|
514 |
|
515 sub xql_sortKey |
|
516 { |
|
517 ""; |
|
518 } |
|
519 |
|
520 sub xql_element |
|
521 { |
|
522 my ($node, $elem) = @_; |
|
523 |
|
524 my @list; |
|
525 if (defined $elem) |
|
526 { |
|
527 for my $kid (@{$node->[_C]}) |
|
528 { |
|
529 push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem; |
|
530 } |
|
531 } |
|
532 else |
|
533 { |
|
534 for my $kid (@{$node->[_C]}) |
|
535 { |
|
536 push @list, $kid if $kid->isElementNode; |
|
537 } |
|
538 } |
|
539 \@list; |
|
540 } |
|
541 |
|
542 sub xql_parent |
|
543 { |
|
544 undef; |
|
545 } |
|
546 |
|
547 # By default the elements in a document don't preserve whitespace |
|
548 sub xql_preserveSpace |
|
549 { |
|
550 0; |
|
551 } |
|
552 |
|
553 package XML::DOM::DocumentFragment; |
|
554 |
|
555 BEGIN |
|
556 { |
|
557 # import constant field definitions, e.g. _TagName |
|
558 import XML::DOM::Element qw{ :Fields }; |
|
559 } |
|
560 |
|
561 sub xql_element |
|
562 { |
|
563 my ($node, $elemName) = @_; |
|
564 |
|
565 my @list; |
|
566 if (defined $elemName) |
|
567 { |
|
568 for my $kid (@{$node->[_C]}) |
|
569 { |
|
570 push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elemName; |
|
571 } |
|
572 } |
|
573 else |
|
574 { |
|
575 for my $kid (@{$node->[_C]}) |
|
576 { |
|
577 push @list, $kid if $kid->isElementNode; |
|
578 } |
|
579 } |
|
580 \@list; |
|
581 } |
|
582 |
|
583 sub xql_parent |
|
584 { |
|
585 undef; |
|
586 } |
|
587 |
|
588 1; # module loaded successfuly |
|
589 |
|
590 __END__ |
|
591 |
|
592 =head1 NAME |
|
593 |
|
594 XML::XQL::DOM - Adds XQL support to XML::DOM nodes |
|
595 |
|
596 =head1 SYNOPSIS |
|
597 |
|
598 use XML::XQL; |
|
599 use XML::XQL::DOM; |
|
600 |
|
601 $parser = new XML::DOM::Parser; |
|
602 $doc = $parser->parsefile ("file.xml"); |
|
603 |
|
604 # Return all elements with tagName='title' under the root element 'book' |
|
605 $query = new XML::XQL::Query (Expr => "book/title"); |
|
606 @result = $query->solve ($doc); |
|
607 |
|
608 # Or (to save some typing) |
|
609 @result = XML::XQL::solve ("book/title", $doc); |
|
610 |
|
611 # Or (see XML::DOM::Node) |
|
612 @result = $doc->xql ("book/title"); |
|
613 |
|
614 =head1 DESCRIPTION |
|
615 |
|
616 XML::XQL::DOM adds methods to L<XML::DOM> nodes to support XQL queries |
|
617 on XML::DOM document structures. |
|
618 |
|
619 See L<XML::XQL> and L<XML::XQL::Query> for more details. |
|
620 L<XML::DOM::Node> describes the B<xql()> method. |
|
621 |
|
622 |