dummy_foundation/lib/XML/XQL/DOM.pm
changeset 4 60053dab7e2a
parent 3 8b87ea768cb8
child 5 842a773e65f2
child 6 c34a018f3291
equal deleted inserted replaced
3:8b87ea768cb8 4:60053dab7e2a
     1 ############################################################################
       
     2 # Copyright (c) 1998 Enno Derksen
       
     3 # All rights reserved.
       
     4 # This program is free software; you can redistribute it and/or modify it
       
     5 # under the same terms as Perl itself.
       
     6 ############################################################################
       
     7 #
       
     8 # Functions added to the XML::DOM implementation for XQL support
       
     9 #
       
    10 # NOTE: This code is a bad example of how to use XML::DOM.
       
    11 # I'm accessing internal (private) data members for a little gain in performance.
       
    12 # When the internal DOM implementation changes, this code will no longer work.
       
    13 # But since I maintain XML::DOM, it's easy for me to keep them in sync.
       
    14 # Regular users are adviced to use the XML::DOM API as described in the 
       
    15 # documentation.
       
    16 #
       
    17 
       
    18 use strict;
       
    19 package XML::XQL::DOM;
       
    20 
       
    21 BEGIN
       
    22 {
       
    23     require XML::DOM;
       
    24 
       
    25     # import constant field definitions, e.g. _Doc
       
    26     import XML::DOM::Node qw{ :Fields };
       
    27 }
       
    28 
       
    29 package XML::DOM::Node;
       
    30 
       
    31 sub xql
       
    32 {
       
    33     my $self = shift;
       
    34 
       
    35     # Odd number of args, assume first is XQL expression without 'Expr' key
       
    36     unshift @_, 'Expr' if (@_ % 2 == 1);
       
    37     my $query = new XML::XQL::Query (@_);
       
    38     my @result = $query->solve ($self);
       
    39     $query->dispose;
       
    40 
       
    41     @result;
       
    42 }
       
    43 
       
    44 sub xql_sortKey
       
    45 {
       
    46     my $key = $_[0]->[_SortKey];
       
    47     return $key if defined $key;
       
    48 
       
    49     $key = XML::XQL::createSortKey ($_[0]->[_Parent]->xql_sortKey, 
       
    50 				    $_[0]->xql_childIndex, 1);
       
    51 #print "xql_sortKey $_[0] ind=" . $_[0]->xql_childIndex . " key=$key str=" . XML::XQL::keyStr($key) . "\n";
       
    52     $_[0]->[_SortKey] = $key;
       
    53 }
       
    54 
       
    55 # Find previous sibling that is not a text node with ignorable whitespace
       
    56 sub xql_prevNonWS
       
    57 {
       
    58     my $self = shift;
       
    59     my $parent = $self->[_Parent];
       
    60     return unless $parent;
       
    61 
       
    62     for (my $i = $parent->getChildIndex ($self) - 1; $i >= 0; $i--)
       
    63     {
       
    64 	my $node = $parent->getChildAtIndex ($i);
       
    65 	return $node unless $node->xql_isIgnorableWS;	# skip whitespace
       
    66     }
       
    67     undef;
       
    68 }
       
    69 
       
    70 # True if it's a Text node with just whitespace and xml::space != "preserve"
       
    71 sub xql_isIgnorableWS
       
    72 {
       
    73     0;
       
    74 }
       
    75 
       
    76 # Whether the node should preserve whitespace
       
    77 # It should if it has attribute xml:space="preserve"
       
    78 sub xql_preserveSpace
       
    79 {
       
    80     $_[0]->[_Parent]->xql_preserveSpace;
       
    81 }
       
    82 
       
    83 sub xql_element
       
    84 {
       
    85 #?? I wonder which implemention is used for e.g. DOM::Text, since XML::XQL::Node also has an implementation
       
    86     [];
       
    87 }
       
    88 
       
    89 sub xql_document
       
    90 {
       
    91     $_[0]->[_Doc];
       
    92 }
       
    93 
       
    94 sub xql_node
       
    95 {
       
    96     my $kids = $_[0]->[_C];
       
    97     if (defined $kids)
       
    98     {
       
    99 	# Must copy the list or else we return a blessed reference
       
   100 	# (which causes trouble later on)
       
   101 	my @list = @$kids;
       
   102 	return \@list;
       
   103     }
       
   104 
       
   105     [];
       
   106 }
       
   107 
       
   108 #?? implement something to support NamedNodeMaps in DocumentType
       
   109 sub xql_childIndex
       
   110 {
       
   111     $_[0]->[_Parent]->getChildIndex ($_[0]);
       
   112 }
       
   113 
       
   114 #?? implement something to support NamedNodeMaps in DocumentType
       
   115 sub xql_childCount
       
   116 {
       
   117     my $ch = $_[0]->[_C];
       
   118     defined $ch ? scalar(@$ch) : 0;
       
   119 }
       
   120 
       
   121 sub xql_parent
       
   122 {
       
   123     $_[0]->[_Parent];
       
   124 }
       
   125 
       
   126 sub xql_DOM_nodeType
       
   127 {
       
   128     $_[0]->getNodeType;
       
   129 }
       
   130 
       
   131 sub xql_nodeType
       
   132 {
       
   133     $_[0]->getNodeType;
       
   134 }
       
   135 
       
   136 # As it appears in the XML document
       
   137 sub xql_xmlString
       
   138 {
       
   139     $_[0]->toString;
       
   140 }
       
   141 
       
   142 package XML::DOM::Element;
       
   143 
       
   144 sub xql_attribute
       
   145 {
       
   146     my ($node, $attrName) = @_;
       
   147 
       
   148     if (defined $attrName)
       
   149     {
       
   150 	my $attr = $node->getAttributeNode ($attrName);
       
   151 	defined ($attr) ? [ $attr ] : [];
       
   152     }
       
   153     else
       
   154     {
       
   155 	defined $node->[_A] ? $node->[_A]->getValues : [];
       
   156     }
       
   157 }
       
   158 
       
   159 # Used by XML::XQL::Union::genSortKey to generate sort keys
       
   160 # Returns the maximum of the number of children and the number of Attr nodes.
       
   161 sub xql_childCount
       
   162 {
       
   163     my $n = scalar @{$_[0]->[_C]};
       
   164     my $m = defined $_[0]->[_A] ? $_[0]->[_A]->getLength : 0;
       
   165     return $n > $m ? $n : $m;
       
   166 }
       
   167 
       
   168 sub xql_element
       
   169 {
       
   170     my ($node, $elem) = @_;
       
   171 
       
   172     my @list;
       
   173     if (defined $elem)
       
   174     {
       
   175 	for my $kid (@{$node->[_C]})
       
   176 	{
       
   177 	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem;
       
   178 	}
       
   179     }
       
   180     else
       
   181     {
       
   182 	for my $kid (@{$node->[_C]})
       
   183 	{
       
   184 	    push @list, $kid if $kid->isElementNode;
       
   185 	}
       
   186     }
       
   187     \@list;
       
   188 }
       
   189 
       
   190 sub xql_nodeName
       
   191 {
       
   192     $_[0]->[_TagName];
       
   193 }
       
   194 
       
   195 sub xql_baseName
       
   196 {
       
   197     my $name = $_[0]->[_TagName];
       
   198     $name =~ s/^\w*://;
       
   199     $name;
       
   200 }
       
   201 
       
   202 sub xql_prefix
       
   203 {
       
   204     my $name = $_[0]->[_TagName];
       
   205     $name =~ /([^:]+):/;
       
   206     $1;
       
   207 }
       
   208 
       
   209 sub xql_rawText
       
   210 {
       
   211     my ($self, $recurse) = @_;
       
   212     $recurse = 1 unless defined $recurse;
       
   213 
       
   214     my $text = "";
       
   215 
       
   216     for my $kid (@{$self->xql_node})
       
   217     {
       
   218 	my $type = $kid->xql_nodeType;
       
   219 
       
   220 	# type=1: element
       
   221 	# type=3: text (Text, CDATASection, EntityReference)
       
   222 	if (($type == 1 && $recurse) || $type == 3)
       
   223 	{
       
   224 	    $text .= $kid->xql_rawText ($recurse);
       
   225 	}
       
   226     }
       
   227     $text;
       
   228 }
       
   229 
       
   230 sub xql_text
       
   231 {
       
   232     my ($self, $recurse) = @_;
       
   233     $recurse = 1 unless defined $recurse;
       
   234 
       
   235     my $j = -1;
       
   236     my @text;
       
   237     my $last_was_text = 0;
       
   238 
       
   239     # Collect text blocks. Consecutive blocks of Text, CDataSection and 
       
   240     # EntityReference nodes should be merged without stripping and without
       
   241     # putting spaces in between.
       
   242     for my $kid (@{$self->xql_node})
       
   243     {
       
   244 	my $type = $kid->xql_nodeType;
       
   245 
       
   246 	if ($type == 1)	    # 1: element
       
   247 	{
       
   248 	    if ($recurse)
       
   249 	    {
       
   250 		$text[++$j] = $kid->xql_text ($recurse);
       
   251 	    }
       
   252 	    $last_was_text = 0;
       
   253 	}
       
   254 	elsif ($type == 3)  # 3: text (Text, CDATASection, EntityReference)
       
   255 	{
       
   256 	    ++$j unless $last_was_text;		# next text block
       
   257 	    $text[$j] .= $kid->getData;
       
   258 	    $last_was_text = 1;
       
   259 	}
       
   260 	else	# e.g. Comment
       
   261 	{
       
   262 	    $last_was_text = 0;
       
   263 	}
       
   264     }
       
   265 
       
   266     # trim whitespace and remove empty blocks
       
   267     my $i = 0;
       
   268     my $n = @text;
       
   269     while ($i < $n)
       
   270     {
       
   271 	# similar to XML::XQL::trimSpace
       
   272 	$text[$i] =~ s/^\s+//;
       
   273 	$text[$i] =~ s/\s+$//;
       
   274 
       
   275 	if ($text[$i] eq "")
       
   276 	{
       
   277 	    splice (@text, $i, 1);	# remove empty block
       
   278 	    $n--;
       
   279 	}
       
   280 	else
       
   281 	{
       
   282 	    $i++;
       
   283 	}
       
   284     }
       
   285     join (" ", @text);
       
   286 }
       
   287 
       
   288 #
       
   289 # Returns a list of text blocks for this Element.
       
   290 # A text block is a concatenation of consecutive text-containing nodes (i.e.
       
   291 # Text, CDATASection or EntityReference nodes.)
       
   292 # For each text block a reference to an array is returned with the following
       
   293 # 3 items:
       
   294 #  [0] index of first node of the text block
       
   295 #  [1] index of last node of the text block
       
   296 #  [2] concatenation of the raw text (of the nodes in this text block)
       
   297 #
       
   298 # The text blocks are returned in reverse order for the convenience of
       
   299 # the routines that want to modify the text blocks.
       
   300 #
       
   301 sub xql_rawTextBlocks
       
   302 {
       
   303     my ($self) = @_;
       
   304 
       
   305     my @result;
       
   306     my $curr;
       
   307     my $prevWasText = 0;
       
   308     my $kids = $self->[_C];
       
   309     my $n = @$kids;
       
   310     for (my $i = 0; $i < $n; $i++)
       
   311     {
       
   312 	my $node = $kids->[$i];
       
   313 	# 3: text (Text, CDATASection, EntityReference)
       
   314 	if ($node->xql_nodeType == 3)
       
   315 	{
       
   316 	    if ($prevWasText)
       
   317 	    {
       
   318 		$curr->[1] = $i;
       
   319 		$curr->[2] .= $node->getData;
       
   320 	    }
       
   321 	    else
       
   322 	    {
       
   323 		$curr = [$i, $i, $node->getData];
       
   324 		unshift @result, $curr;
       
   325 		$prevWasText = 1;
       
   326 	    }
       
   327 	}
       
   328 	else
       
   329 	{
       
   330 	    $prevWasText = 0;
       
   331 	}
       
   332     }
       
   333     @result;
       
   334 }
       
   335 
       
   336 sub xql_replaceBlockWithText
       
   337 {
       
   338     my ($self, $start, $end, $text) = @_;
       
   339     for (my $i = $end; $i > $start; $i--)
       
   340     {
       
   341 	# dispose of the old nodes
       
   342 	$self->removeChild ($self->[_C]->[$i])->dispose;
       
   343     }
       
   344     my $node = $self->[_C]->[$start];
       
   345     my $newNode = $self->[_Doc]->createTextNode ($text);
       
   346     $self->replaceChild ($newNode, $node)->dispose;
       
   347 }
       
   348 
       
   349 sub xql_setValue
       
   350 {
       
   351     my ($self, $str) = @_;
       
   352     # Remove all children
       
   353     for my $kid (@{$self->[_C]})
       
   354     {
       
   355 	$self->removeChild ($kid);
       
   356     }
       
   357     # Add a (single) text node
       
   358     $self->appendChild ($self->[_Doc]->createTextNode ($str));
       
   359 }
       
   360 
       
   361 sub xql_value
       
   362 {
       
   363     XML::XQL::elementValue ($_[0]);
       
   364 }
       
   365 
       
   366 sub xql_preserveSpace
       
   367 {
       
   368     # attribute value should be "preserve" (1), "default" (0) or "" (ask parent)
       
   369     my $space = $_[0]->getAttribute ("xml:space");
       
   370     $space eq "" ? $_[0]->[_Parent]->xql_preserveSpace : ($space eq "preserve");
       
   371 }
       
   372 
       
   373 package XML::DOM::Attr;
       
   374 
       
   375 sub xql_sortKey
       
   376 {
       
   377     my $key = $_[0]->[_SortKey];
       
   378     return $key if defined $key;
       
   379 
       
   380     $_[0]->[_SortKey] = XML::XQL::createSortKey ($_[0]->xql_parent->xql_sortKey, 
       
   381 						$_[0]->xql_childIndex, 0);
       
   382 }
       
   383 
       
   384 sub xql_nodeName
       
   385 {
       
   386     $_[0]->getNodeName;
       
   387 }
       
   388 
       
   389 sub xql_text
       
   390 {
       
   391     XML::XQL::trimSpace ($_[0]->getValue);
       
   392 }
       
   393 
       
   394 sub xql_rawText
       
   395 {
       
   396     $_[0]->getValue;
       
   397 }
       
   398 
       
   399 sub xql_value
       
   400 {
       
   401     XML::XQL::attrValue ($_[0]);
       
   402 }
       
   403 
       
   404 sub xql_setValue
       
   405 {
       
   406     $_[0]->setValue ($_[1]);
       
   407 }
       
   408 
       
   409 sub xql_baseName
       
   410 {
       
   411     my $name = $_[0]->getNodeName;
       
   412     $name =~ s/^\w*://;
       
   413     $name;
       
   414 }
       
   415 
       
   416 sub xql_prefix
       
   417 {
       
   418     my $name = $_[0]->getNodeName;
       
   419     $name =~ s/:\w*$//;
       
   420     $name;
       
   421 }
       
   422 
       
   423 sub xql_parent
       
   424 {
       
   425     $_[0]->[_UsedIn]->{''}->{Parent};
       
   426 }
       
   427 
       
   428 sub xql_childIndex
       
   429 {
       
   430     my $map = $_[0]->[_UsedIn];
       
   431     $map ? $map->getChildIndex ($_[0]) : 0;
       
   432 }
       
   433 
       
   434 package XML::DOM::Text;
       
   435 
       
   436 sub xql_rawText
       
   437 {
       
   438     $_[0]->[_Data];
       
   439 }
       
   440 
       
   441 sub xql_text
       
   442 {
       
   443     XML::XQL::trimSpace ($_[0]->[_Data]);
       
   444 }
       
   445 
       
   446 sub xql_setValue
       
   447 {
       
   448     $_[0]->setData ($_[1]);
       
   449 }
       
   450 
       
   451 sub xql_isIgnorableWS
       
   452 {
       
   453     $_[0]->[_Data] =~ /^\s*$/ &&
       
   454     !$_[0]->xql_preserveSpace;
       
   455 }
       
   456 
       
   457 package XML::DOM::CDATASection;
       
   458 
       
   459 sub xql_rawText
       
   460 {
       
   461     $_[0]->[_Data];
       
   462 }
       
   463 
       
   464 sub xql_text
       
   465 {
       
   466     XML::XQL::trimSpace ($_[0]->[_Data]);
       
   467 }
       
   468 
       
   469 sub xql_setValue
       
   470 {
       
   471     $_[0]->setData ($_[1]);
       
   472 }
       
   473 
       
   474 sub xql_nodeType
       
   475 {
       
   476     3;	# it contains text, so XQL spec states it's a text node
       
   477 }
       
   478 
       
   479 package XML::DOM::EntityReference;
       
   480 
       
   481 BEGIN
       
   482 {
       
   483     # import constant field definitions, e.g. _Data
       
   484     import XML::DOM::CharacterData qw{ :Fields };
       
   485 }
       
   486 
       
   487 sub xql_text
       
   488 {
       
   489     $_[0]->getData;
       
   490 }
       
   491 
       
   492 sub xql_rawText
       
   493 {
       
   494     XML::XQL::trimSpace ($_[0]->[_Data]);
       
   495 }
       
   496 
       
   497 sub xql_setValue
       
   498 {
       
   499     $_[0]->setData ($_[1]);
       
   500 }
       
   501 
       
   502 sub xql_nodeType
       
   503 {
       
   504     3;	# it contains text, so XQL spec states it's a text node
       
   505 }
       
   506 
       
   507 package XML::DOM::Document;
       
   508 
       
   509 BEGIN
       
   510 {
       
   511     # import constant field definitions, e.g. _TagName
       
   512     import XML::DOM::Element qw{ :Fields };
       
   513 }
       
   514 
       
   515 sub xql_sortKey
       
   516 {
       
   517     "";
       
   518 }
       
   519 
       
   520 sub xql_element
       
   521 {
       
   522     my ($node, $elem) = @_;
       
   523 
       
   524     my @list;
       
   525     if (defined $elem)
       
   526     {
       
   527 	for my $kid (@{$node->[_C]})
       
   528 	{
       
   529 	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem;
       
   530 	}
       
   531     }
       
   532     else
       
   533     {
       
   534 	for my $kid (@{$node->[_C]})
       
   535 	{
       
   536 	    push @list, $kid if $kid->isElementNode;
       
   537 	}
       
   538     }
       
   539     \@list;
       
   540 }
       
   541 
       
   542 sub xql_parent
       
   543 {
       
   544     undef;
       
   545 }
       
   546 
       
   547 # By default the elements in a document don't preserve whitespace
       
   548 sub xql_preserveSpace
       
   549 {
       
   550     0;
       
   551 }
       
   552 
       
   553 package XML::DOM::DocumentFragment;
       
   554 
       
   555 BEGIN
       
   556 {
       
   557     # import constant field definitions, e.g. _TagName
       
   558     import XML::DOM::Element qw{ :Fields };
       
   559 }
       
   560 
       
   561 sub xql_element
       
   562 {
       
   563     my ($node, $elemName) = @_;
       
   564 
       
   565     my @list;
       
   566     if (defined $elemName)
       
   567     {
       
   568 	for my $kid (@{$node->[_C]})
       
   569 	{
       
   570 	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elemName;
       
   571 	}
       
   572     }
       
   573     else
       
   574     {
       
   575 	for my $kid (@{$node->[_C]})
       
   576 	{
       
   577 	    push @list, $kid if $kid->isElementNode;
       
   578 	}
       
   579     }
       
   580     \@list;
       
   581 }
       
   582 
       
   583 sub xql_parent
       
   584 {
       
   585     undef;
       
   586 }
       
   587 
       
   588 1; # module loaded successfuly
       
   589 
       
   590 __END__
       
   591 
       
   592 =head1 NAME
       
   593 
       
   594 XML::XQL::DOM - Adds XQL support to XML::DOM nodes
       
   595 
       
   596 =head1 SYNOPSIS
       
   597 
       
   598  use XML::XQL;
       
   599  use XML::XQL::DOM;
       
   600 
       
   601  $parser = new XML::DOM::Parser;
       
   602  $doc = $parser->parsefile ("file.xml");
       
   603 
       
   604  # Return all elements with tagName='title' under the root element 'book'
       
   605  $query = new XML::XQL::Query (Expr => "book/title");
       
   606  @result = $query->solve ($doc);
       
   607 
       
   608  # Or (to save some typing)
       
   609  @result = XML::XQL::solve ("book/title", $doc);
       
   610 
       
   611  # Or (see XML::DOM::Node)
       
   612  @result = $doc->xql ("book/title");
       
   613 
       
   614 =head1 DESCRIPTION
       
   615 
       
   616 XML::XQL::DOM adds methods to L<XML::DOM> nodes to support XQL queries
       
   617 on XML::DOM document structures.
       
   618 
       
   619 See L<XML::XQL> and L<XML::XQL::Query> for more details.
       
   620 L<XML::DOM::Node> describes the B<xql()> method.
       
   621 
       
   622