deprecated/buildtools/buildsystemtools/lib/XML/XQL/DOM.pm
author kelvzhu
Wed, 27 Oct 2010 16:03:51 +0800
changeset 662 60be34e1b006
parent 655 3f65fd25dfd4
permissions -rw-r--r--
Merge

############################################################################
# Copyright (c) 1998 Enno Derksen
# All rights reserved.
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
############################################################################
#
# Functions added to the XML::DOM implementation for XQL support
#
# NOTE: This code is a bad example of how to use XML::DOM.
# I'm accessing internal (private) data members for a little gain in performance.
# When the internal DOM implementation changes, this code will no longer work.
# But since I maintain XML::DOM, it's easy for me to keep them in sync.
# Regular users are adviced to use the XML::DOM API as described in the 
# documentation.
#

use strict;
package XML::XQL::DOM;

BEGIN
{
    require XML::DOM;

    # import constant field definitions, e.g. _Doc
    import XML::DOM::Node qw{ :Fields };
}

package XML::DOM::Node;

sub xql
{
    my $self = shift;

    # Odd number of args, assume first is XQL expression without 'Expr' key
    unshift @_, 'Expr' if (@_ % 2 == 1);
    my $query = new XML::XQL::Query (@_);
    my @result = $query->solve ($self);
    $query->dispose;

    @result;
}

sub xql_sortKey
{
    my $key = $_[0]->[_SortKey];
    return $key if defined $key;

    $key = XML::XQL::createSortKey ($_[0]->[_Parent]->xql_sortKey, 
				    $_[0]->xql_childIndex, 1);
#print "xql_sortKey $_[0] ind=" . $_[0]->xql_childIndex . " key=$key str=" . XML::XQL::keyStr($key) . "\n";
    $_[0]->[_SortKey] = $key;
}

# Find previous sibling that is not a text node with ignorable whitespace
sub xql_prevNonWS
{
    my $self = shift;
    my $parent = $self->[_Parent];
    return unless $parent;

    for (my $i = $parent->getChildIndex ($self) - 1; $i >= 0; $i--)
    {
	my $node = $parent->getChildAtIndex ($i);
	return $node unless $node->xql_isIgnorableWS;	# skip whitespace
    }
    undef;
}

# True if it's a Text node with just whitespace and xml::space != "preserve"
sub xql_isIgnorableWS
{
    0;
}

# Whether the node should preserve whitespace
# It should if it has attribute xml:space="preserve"
sub xql_preserveSpace
{
    $_[0]->[_Parent]->xql_preserveSpace;
}

sub xql_element
{
#?? I wonder which implemention is used for e.g. DOM::Text, since XML::XQL::Node also has an implementation
    [];
}

sub xql_document
{
    $_[0]->[_Doc];
}

sub xql_node
{
    my $kids = $_[0]->[_C];
    if (defined $kids)
    {
	# Must copy the list or else we return a blessed reference
	# (which causes trouble later on)
	my @list = @$kids;
	return \@list;
    }

    [];
}

#?? implement something to support NamedNodeMaps in DocumentType
sub xql_childIndex
{
    $_[0]->[_Parent]->getChildIndex ($_[0]);
}

#?? implement something to support NamedNodeMaps in DocumentType
sub xql_childCount
{
    my $ch = $_[0]->[_C];
    defined $ch ? scalar(@$ch) : 0;
}

sub xql_parent
{
    $_[0]->[_Parent];
}

sub xql_DOM_nodeType
{
    $_[0]->getNodeType;
}

sub xql_nodeType
{
    $_[0]->getNodeType;
}

# As it appears in the XML document
sub xql_xmlString
{
    $_[0]->toString;
}

package XML::DOM::Element;

sub xql_attribute
{
    my ($node, $attrName) = @_;

    if (defined $attrName)
    {
	my $attr = $node->getAttributeNode ($attrName);
	defined ($attr) ? [ $attr ] : [];
    }
    else
    {
	defined $node->[_A] ? $node->[_A]->getValues : [];
    }
}

# Used by XML::XQL::Union::genSortKey to generate sort keys
# Returns the maximum of the number of children and the number of Attr nodes.
sub xql_childCount
{
    my $n = scalar @{$_[0]->[_C]};
    my $m = defined $_[0]->[_A] ? $_[0]->[_A]->getLength : 0;
    return $n > $m ? $n : $m;
}

sub xql_element
{
    my ($node, $elem) = @_;

    my @list;
    if (defined $elem)
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem;
	}
    }
    else
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode;
	}
    }
    \@list;
}

sub xql_nodeName
{
    $_[0]->[_TagName];
}

sub xql_baseName
{
    my $name = $_[0]->[_TagName];
    $name =~ s/^\w*://;
    $name;
}

sub xql_prefix
{
    my $name = $_[0]->[_TagName];
    $name =~ /([^:]+):/;
    $1;
}

sub xql_rawText
{
    my ($self, $recurse) = @_;
    $recurse = 1 unless defined $recurse;

    my $text = "";

    for my $kid (@{$self->xql_node})
    {
	my $type = $kid->xql_nodeType;

	# type=1: element
	# type=3: text (Text, CDATASection, EntityReference)
	if (($type == 1 && $recurse) || $type == 3)
	{
	    $text .= $kid->xql_rawText ($recurse);
	}
    }
    $text;
}

sub xql_text
{
    my ($self, $recurse) = @_;
    $recurse = 1 unless defined $recurse;

    my $j = -1;
    my @text;
    my $last_was_text = 0;

    # Collect text blocks. Consecutive blocks of Text, CDataSection and 
    # EntityReference nodes should be merged without stripping and without
    # putting spaces in between.
    for my $kid (@{$self->xql_node})
    {
	my $type = $kid->xql_nodeType;

	if ($type == 1)	    # 1: element
	{
	    if ($recurse)
	    {
		$text[++$j] = $kid->xql_text ($recurse);
	    }
	    $last_was_text = 0;
	}
	elsif ($type == 3)  # 3: text (Text, CDATASection, EntityReference)
	{
	    ++$j unless $last_was_text;		# next text block
	    $text[$j] .= $kid->getData;
	    $last_was_text = 1;
	}
	else	# e.g. Comment
	{
	    $last_was_text = 0;
	}
    }

    # trim whitespace and remove empty blocks
    my $i = 0;
    my $n = @text;
    while ($i < $n)
    {
	# similar to XML::XQL::trimSpace
	$text[$i] =~ s/^\s+//;
	$text[$i] =~ s/\s+$//;

	if ($text[$i] eq "")
	{
	    splice (@text, $i, 1);	# remove empty block
	    $n--;
	}
	else
	{
	    $i++;
	}
    }
    join (" ", @text);
}

#
# Returns a list of text blocks for this Element.
# A text block is a concatenation of consecutive text-containing nodes (i.e.
# Text, CDATASection or EntityReference nodes.)
# For each text block a reference to an array is returned with the following
# 3 items:
#  [0] index of first node of the text block
#  [1] index of last node of the text block
#  [2] concatenation of the raw text (of the nodes in this text block)
#
# The text blocks are returned in reverse order for the convenience of
# the routines that want to modify the text blocks.
#
sub xql_rawTextBlocks
{
    my ($self) = @_;

    my @result;
    my $curr;
    my $prevWasText = 0;
    my $kids = $self->[_C];
    my $n = @$kids;
    for (my $i = 0; $i < $n; $i++)
    {
	my $node = $kids->[$i];
	# 3: text (Text, CDATASection, EntityReference)
	if ($node->xql_nodeType == 3)
	{
	    if ($prevWasText)
	    {
		$curr->[1] = $i;
		$curr->[2] .= $node->getData;
	    }
	    else
	    {
		$curr = [$i, $i, $node->getData];
		unshift @result, $curr;
		$prevWasText = 1;
	    }
	}
	else
	{
	    $prevWasText = 0;
	}
    }
    @result;
}

sub xql_replaceBlockWithText
{
    my ($self, $start, $end, $text) = @_;
    for (my $i = $end; $i > $start; $i--)
    {
	# dispose of the old nodes
	$self->removeChild ($self->[_C]->[$i])->dispose;
    }
    my $node = $self->[_C]->[$start];
    my $newNode = $self->[_Doc]->createTextNode ($text);
    $self->replaceChild ($newNode, $node)->dispose;
}

sub xql_setValue
{
    my ($self, $str) = @_;
    # Remove all children
    for my $kid (@{$self->[_C]})
    {
	$self->removeChild ($kid);
    }
    # Add a (single) text node
    $self->appendChild ($self->[_Doc]->createTextNode ($str));
}

sub xql_value
{
    XML::XQL::elementValue ($_[0]);
}

sub xql_preserveSpace
{
    # attribute value should be "preserve" (1), "default" (0) or "" (ask parent)
    my $space = $_[0]->getAttribute ("xml:space");
    $space eq "" ? $_[0]->[_Parent]->xql_preserveSpace : ($space eq "preserve");
}

package XML::DOM::Attr;

sub xql_sortKey
{
    my $key = $_[0]->[_SortKey];
    return $key if defined $key;

    $_[0]->[_SortKey] = XML::XQL::createSortKey ($_[0]->xql_parent->xql_sortKey, 
						$_[0]->xql_childIndex, 0);
}

sub xql_nodeName
{
    $_[0]->getNodeName;
}

sub xql_text
{
    XML::XQL::trimSpace ($_[0]->getValue);
}

sub xql_rawText
{
    $_[0]->getValue;
}

sub xql_value
{
    XML::XQL::attrValue ($_[0]);
}

sub xql_setValue
{
    $_[0]->setValue ($_[1]);
}

sub xql_baseName
{
    my $name = $_[0]->getNodeName;
    $name =~ s/^\w*://;
    $name;
}

sub xql_prefix
{
    my $name = $_[0]->getNodeName;
    $name =~ s/:\w*$//;
    $name;
}

sub xql_parent
{
    $_[0]->[_UsedIn]->{''}->{Parent};
}

sub xql_childIndex
{
    my $map = $_[0]->[_UsedIn];
    $map ? $map->getChildIndex ($_[0]) : 0;
}

package XML::DOM::Text;

sub xql_rawText
{
    $_[0]->[_Data];
}

sub xql_text
{
    XML::XQL::trimSpace ($_[0]->[_Data]);
}

sub xql_setValue
{
    $_[0]->setData ($_[1]);
}

sub xql_isIgnorableWS
{
    $_[0]->[_Data] =~ /^\s*$/ &&
    !$_[0]->xql_preserveSpace;
}

package XML::DOM::CDATASection;

sub xql_rawText
{
    $_[0]->[_Data];
}

sub xql_text
{
    XML::XQL::trimSpace ($_[0]->[_Data]);
}

sub xql_setValue
{
    $_[0]->setData ($_[1]);
}

sub xql_nodeType
{
    3;	# it contains text, so XQL spec states it's a text node
}

package XML::DOM::EntityReference;

BEGIN
{
    # import constant field definitions, e.g. _Data
    import XML::DOM::CharacterData qw{ :Fields };
}

sub xql_text
{
    $_[0]->getData;
}

sub xql_rawText
{
    XML::XQL::trimSpace ($_[0]->[_Data]);
}

sub xql_setValue
{
    $_[0]->setData ($_[1]);
}

sub xql_nodeType
{
    3;	# it contains text, so XQL spec states it's a text node
}

package XML::DOM::Document;

BEGIN
{
    # import constant field definitions, e.g. _TagName
    import XML::DOM::Element qw{ :Fields };
}

sub xql_sortKey
{
    "";
}

sub xql_element
{
    my ($node, $elem) = @_;

    my @list;
    if (defined $elem)
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elem;
	}
    }
    else
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode;
	}
    }
    \@list;
}

sub xql_parent
{
    undef;
}

# By default the elements in a document don't preserve whitespace
sub xql_preserveSpace
{
    0;
}

package XML::DOM::DocumentFragment;

BEGIN
{
    # import constant field definitions, e.g. _TagName
    import XML::DOM::Element qw{ :Fields };
}

sub xql_element
{
    my ($node, $elemName) = @_;

    my @list;
    if (defined $elemName)
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode && $kid->[_TagName] eq $elemName;
	}
    }
    else
    {
	for my $kid (@{$node->[_C]})
	{
	    push @list, $kid if $kid->isElementNode;
	}
    }
    \@list;
}

sub xql_parent
{
    undef;
}

1; # module loaded successfuly

__END__

=head1 NAME

XML::XQL::DOM - Adds XQL support to XML::DOM nodes

=head1 SYNOPSIS

 use XML::XQL;
 use XML::XQL::DOM;

 $parser = new XML::DOM::Parser;
 $doc = $parser->parsefile ("file.xml");

 # Return all elements with tagName='title' under the root element 'book'
 $query = new XML::XQL::Query (Expr => "book/title");
 @result = $query->solve ($doc);

 # Or (to save some typing)
 @result = XML::XQL::solve ("book/title", $doc);

 # Or (see XML::DOM::Node)
 @result = $doc->xql ("book/title");

=head1 DESCRIPTION

XML::XQL::DOM adds methods to L<XML::DOM> nodes to support XQL queries
on XML::DOM document structures.

See L<XML::XQL> and L<XML::XQL::Query> for more details.
L<XML::DOM::Node> describes the B<xql()> method.