deprecated/buildtools/buildsystemtools/lib/XML/Handler/BuildDOM.pm
author lorewang
Wed, 17 Nov 2010 11:24:29 +0800
changeset 685 39f7ecf8fbc7
parent 655 3f65fd25dfd4
permissions -rw-r--r--
pull from trunk

package XML::Handler::BuildDOM;
use strict;
use XML::DOM;

#
# TODO:
# - add support for parameter entity references
# - expand API: insert Elements in the tree or stuff into DocType etc.

sub new
{
    my ($class, %args) = @_;
    bless \%args, $class;
}

#-------- PerlSAX Handler methods ------------------------------

sub start_document # was Init
{
    my $self = shift;

    # Define Document if it's not set & not obtainable from Element or DocType
    $self->{Document} ||= 
	(defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef)
     || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef)
     || new XML::DOM::Document();

    $self->{Element} ||= $self->{Document};

    unless (defined $self->{DocType})
    {
	$self->{DocType} = $self->{Document}->getDoctype
	    if defined $self->{Document};

	unless (defined $self->{Doctype})
	{
#?? should be $doc->createDocType for extensibility!
	    $self->{DocType} = new XML::DOM::DocumentType ($self->{Document});
	    $self->{Document}->setDoctype ($self->{DocType});
	}
    }
  
    # Prepare for document prolog
    $self->{InProlog} = 1;

    # We haven't passed the root element yet
    $self->{EndDoc} = 0;

    undef $self->{LastText};
}

sub end_document # was Final
{
    my $self = shift;
    unless ($self->{SawDocType})
    {
	my $doctype = $self->{Document}->removeDoctype;
	$doctype->dispose;
#?? do we always want to destroy the Doctype?
    }
    $self->{Document};
}

sub characters # was Char
{
    my $self = $_[0];
    my $str = $_[1]->{Data};

    if ($self->{InCDATA} && $self->{KeepCDATA})
    {
	undef $self->{LastText};
	# Merge text with previous node if possible
	$self->{Element}->addCDATA ($str);
    }
    else
    {
	# Merge text with previous node if possible
	# Used to be:	$expat->{DOM_Element}->addText ($str);
	if ($self->{LastText})
	{
	    $self->{LastText}->appendData ($str);
	}
	else
	{
	    $self->{LastText} = $self->{Document}->createTextNode ($str);
	    $self->{Element}->appendChild ($self->{LastText});
	}
    }
}

sub start_element # was Start
{
    my ($self, $hash) = @_;
    my $elem = $hash->{Name};
    my $attr = $hash->{Attributes};

    my $parent = $self->{Element};
    my $doc = $self->{Document};
    
    if ($parent == $doc)
    {
	# End of document prolog, i.e. start of first Element
	$self->{InProlog} = 0;
    }
    
    undef $self->{LastText};
    my $node = $doc->createElement ($elem);
    $self->{Element} = $node;
    $parent->appendChild ($node);
    
    my $i = 0;
    my $n = scalar keys %$attr;
    return unless $n;

    if (exists $hash->{AttributeOrder})
    {
	my $defaulted = $hash->{Defaulted};
	my @order = @{ $hash->{AttributeOrder} };
	
	# Specified attributes
	for (my $i = 0; $i < $defaulted; $i++)
	{
	    my $a = $order[$i];
	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
	    $node->setAttributeNode ($att);
	}

	# Defaulted attributes
	for (my $i = $defaulted; $i < @order; $i++)
	{
	    my $a = $order[$i];
	    my $att = $doc->createAttribute ($elem, $attr->{$a}, 0);
	    $node->setAttributeNode ($att);
	}
    }
    else
    {
	# We're assuming that all attributes were specified (1)
	for my $a (keys %$attr)
	{
	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
	    $node->setAttributeNode ($att);
	}
    }
}

sub end_element
{
    my $self = shift;
    $self->{Element} = $self->{Element}->getParentNode;
    undef $self->{LastText};

    # Check for end of root element
    $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document});
}

sub entity_reference # was Default
{
    my $self = $_[0];
    my $name = $_[1]->{Name};
    
    $self->{Element}->appendChild (
			    $self->{Document}->createEntityReference ($name));
    undef $self->{LastText};
}

sub start_cdata
{
    my $self = shift;
    $self->{InCDATA} = 1;
}

sub end_cdata
{
    my $self = shift;
    $self->{InCDATA} = 0;
}

sub comment
{
    my $self = $_[0];

    local $XML::DOM::IgnoreReadOnly = 1;

    undef $self->{LastText};
    my $comment = $self->{Document}->createComment ($_[1]->{Data});
    $self->{Element}->appendChild ($comment);
}

sub doctype_decl
{
    my ($self, $hash) = @_;

    $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, 
				 $hash->{PublicId}, $hash->{Internal});
    $self->{SawDocType} = 1;
}

sub attlist_decl
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    $self->{DocType}->addAttDef ($hash->{ElementName},
				 $hash->{AttributeName},
				 $hash->{Type},
				 $hash->{Default},
				 $hash->{Fixed});
}

sub xml_decl
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    undef $self->{LastText};
    $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, 
							  $hash->{Version},
							  $hash->{Encoding},
							  $hash->{Standalone}));
}

sub entity_decl
{
    my ($self, $hash) = @_;
    
    local $XML::DOM::IgnoreReadOnly = 1;

    # Parameter Entities names are passed starting with '%'
    my $parameter = 0;

#?? parameter entities currently not supported by PerlSAX!

    undef $self->{LastText};
    $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, 
				 $hash->{SystemId}, $hash->{PublicId}, 
				 $hash->{Notation});
}

# Unparsed is called when it encounters e.g:
#
#   <!ENTITY logo SYSTEM "http://server/logo.gif" NDATA gif>
#
sub unparsed_decl
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    # same as regular ENTITY, as far as DOM is concerned
    $self->entity_decl ($hash);
}

sub element_decl
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    undef $self->{LastText};
    $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model});
}

sub notation_decl
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    undef $self->{LastText};
    $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, 
				   $hash->{SystemId}, $hash->{PublicId});
}

sub processing_instruction
{
    my ($self, $hash) = @_;

    local $XML::DOM::IgnoreReadOnly = 1;

    undef $self->{LastText};
    $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction 
			    ($self->{Document}, $hash->{Target}, $hash->{Data}));
}

return 1;

__END__

=head1 NAME

XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures

=head1 SYNOPSIS

 use XML::Handler::BuildDOM;
 use XML::Parser::PerlSAX;

 my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1);
 my $parser = new XML::Parser::PerlSAX (Handler => $handler);

 my $doc = $parser->parsefile ("file.xml");

=head1 DESCRIPTION

XML::Handler::BuildDOM creates L<XML::DOM> document structures 
(i.e. L<XML::DOM::Document>) from PerlSAX events.

This class used to be called L<XML::PerlSAX::DOM> prior to libxml-enno 1.0.1.

=head2 CONSTRUCTOR OPTIONS

The XML::Handler::BuildDOM constructor supports the following options:

=over 4

=item * KeepCDATA => 1 

If set to 0 (default), CDATASections will be converted to regular text.

=item * Document => $doc

If undefined, start_document will extract it from Element or DocType (if set),
otherwise it will create a new XML::DOM::Document.

=item * Element => $elem

If undefined, it is set to Document. This will be the insertion point (or parent)
for the nodes defined by the following callbacks.

=item * DocType => $doctype

If undefined, start_document will extract it from Document (if possible).
Otherwise it adds a new XML::DOM::DocumentType to the Document.

=back