dummy_foundation/lib/XML/Handler/BuildDOM.pm
changeset 0 02cd6b52f378
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dummy_foundation/lib/XML/Handler/BuildDOM.pm	Thu May 28 10:10:03 2009 +0100
@@ -0,0 +1,338 @@
+package XML::Handler::BuildDOM;
+use strict;
+use XML::DOM;
+
+#
+# TODO:
+# - add support for parameter entity references
+# - expand API: insert Elements in the tree or stuff into DocType etc.
+
+sub new
+{
+    my ($class, %args) = @_;
+    bless \%args, $class;
+}
+
+#-------- PerlSAX Handler methods ------------------------------
+
+sub start_document # was Init
+{
+    my $self = shift;
+
+    # Define Document if it's not set & not obtainable from Element or DocType
+    $self->{Document} ||= 
+	(defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef)
+     || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef)
+     || new XML::DOM::Document();
+
+    $self->{Element} ||= $self->{Document};
+
+    unless (defined $self->{DocType})
+    {
+	$self->{DocType} = $self->{Document}->getDoctype
+	    if defined $self->{Document};
+
+	unless (defined $self->{Doctype})
+	{
+#?? should be $doc->createDocType for extensibility!
+	    $self->{DocType} = new XML::DOM::DocumentType ($self->{Document});
+	    $self->{Document}->setDoctype ($self->{DocType});
+	}
+    }
+  
+    # Prepare for document prolog
+    $self->{InProlog} = 1;
+
+    # We haven't passed the root element yet
+    $self->{EndDoc} = 0;
+
+    undef $self->{LastText};
+}
+
+sub end_document # was Final
+{
+    my $self = shift;
+    unless ($self->{SawDocType})
+    {
+	my $doctype = $self->{Document}->removeDoctype;
+	$doctype->dispose;
+#?? do we always want to destroy the Doctype?
+    }
+    $self->{Document};
+}
+
+sub characters # was Char
+{
+    my $self = $_[0];
+    my $str = $_[1]->{Data};
+
+    if ($self->{InCDATA} && $self->{KeepCDATA})
+    {
+	undef $self->{LastText};
+	# Merge text with previous node if possible
+	$self->{Element}->addCDATA ($str);
+    }
+    else
+    {
+	# Merge text with previous node if possible
+	# Used to be:	$expat->{DOM_Element}->addText ($str);
+	if ($self->{LastText})
+	{
+	    $self->{LastText}->appendData ($str);
+	}
+	else
+	{
+	    $self->{LastText} = $self->{Document}->createTextNode ($str);
+	    $self->{Element}->appendChild ($self->{LastText});
+	}
+    }
+}
+
+sub start_element # was Start
+{
+    my ($self, $hash) = @_;
+    my $elem = $hash->{Name};
+    my $attr = $hash->{Attributes};
+
+    my $parent = $self->{Element};
+    my $doc = $self->{Document};
+    
+    if ($parent == $doc)
+    {
+	# End of document prolog, i.e. start of first Element
+	$self->{InProlog} = 0;
+    }
+    
+    undef $self->{LastText};
+    my $node = $doc->createElement ($elem);
+    $self->{Element} = $node;
+    $parent->appendChild ($node);
+    
+    my $i = 0;
+    my $n = scalar keys %$attr;
+    return unless $n;
+
+    if (exists $hash->{AttributeOrder})
+    {
+	my $defaulted = $hash->{Defaulted};
+	my @order = @{ $hash->{AttributeOrder} };
+	
+	# Specified attributes
+	for (my $i = 0; $i < $defaulted; $i++)
+	{
+	    my $a = $order[$i];
+	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
+	    $node->setAttributeNode ($att);
+	}
+
+	# Defaulted attributes
+	for (my $i = $defaulted; $i < @order; $i++)
+	{
+	    my $a = $order[$i];
+	    my $att = $doc->createAttribute ($elem, $attr->{$a}, 0);
+	    $node->setAttributeNode ($att);
+	}
+    }
+    else
+    {
+	# We're assuming that all attributes were specified (1)
+	for my $a (keys %$attr)
+	{
+	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
+	    $node->setAttributeNode ($att);
+	}
+    }
+}
+
+sub end_element
+{
+    my $self = shift;
+    $self->{Element} = $self->{Element}->getParentNode;
+    undef $self->{LastText};
+
+    # Check for end of root element
+    $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document});
+}
+
+sub entity_reference # was Default
+{
+    my $self = $_[0];
+    my $name = $_[1]->{Name};
+    
+    $self->{Element}->appendChild (
+			    $self->{Document}->createEntityReference ($name));
+    undef $self->{LastText};
+}
+
+sub start_cdata
+{
+    my $self = shift;
+    $self->{InCDATA} = 1;
+}
+
+sub end_cdata
+{
+    my $self = shift;
+    $self->{InCDATA} = 0;
+}
+
+sub comment
+{
+    my $self = $_[0];
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    undef $self->{LastText};
+    my $comment = $self->{Document}->createComment ($_[1]->{Data});
+    $self->{Element}->appendChild ($comment);
+}
+
+sub doctype_decl
+{
+    my ($self, $hash) = @_;
+
+    $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, 
+				 $hash->{PublicId}, $hash->{Internal});
+    $self->{SawDocType} = 1;
+}
+
+sub attlist_decl
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    $self->{DocType}->addAttDef ($hash->{ElementName},
+				 $hash->{AttributeName},
+				 $hash->{Type},
+				 $hash->{Default},
+				 $hash->{Fixed});
+}
+
+sub xml_decl
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    undef $self->{LastText};
+    $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, 
+							  $hash->{Version},
+							  $hash->{Encoding},
+							  $hash->{Standalone}));
+}
+
+sub entity_decl
+{
+    my ($self, $hash) = @_;
+    
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    # Parameter Entities names are passed starting with '%'
+    my $parameter = 0;
+
+#?? parameter entities currently not supported by PerlSAX!
+
+    undef $self->{LastText};
+    $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, 
+				 $hash->{SystemId}, $hash->{PublicId}, 
+				 $hash->{Notation});
+}
+
+# Unparsed is called when it encounters e.g:
+#
+#   <!ENTITY logo SYSTEM "http://server/logo.gif" NDATA gif>
+#
+sub unparsed_decl
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    # same as regular ENTITY, as far as DOM is concerned
+    $self->entity_decl ($hash);
+}
+
+sub element_decl
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    undef $self->{LastText};
+    $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model});
+}
+
+sub notation_decl
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    undef $self->{LastText};
+    $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, 
+				   $hash->{SystemId}, $hash->{PublicId});
+}
+
+sub processing_instruction
+{
+    my ($self, $hash) = @_;
+
+    local $XML::DOM::IgnoreReadOnly = 1;
+
+    undef $self->{LastText};
+    $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction 
+			    ($self->{Document}, $hash->{Target}, $hash->{Data}));
+}
+
+return 1;
+
+__END__
+
+=head1 NAME
+
+XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures
+
+=head1 SYNOPSIS
+
+ use XML::Handler::BuildDOM;
+ use XML::Parser::PerlSAX;
+
+ my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1);
+ my $parser = new XML::Parser::PerlSAX (Handler => $handler);
+
+ my $doc = $parser->parsefile ("file.xml");
+
+=head1 DESCRIPTION
+
+XML::Handler::BuildDOM creates L<XML::DOM> document structures 
+(i.e. L<XML::DOM::Document>) from PerlSAX events.
+
+This class used to be called L<XML::PerlSAX::DOM> prior to libxml-enno 1.0.1.
+
+=head2 CONSTRUCTOR OPTIONS
+
+The XML::Handler::BuildDOM constructor supports the following options:
+
+=over 4
+
+=item * KeepCDATA => 1 
+
+If set to 0 (default), CDATASections will be converted to regular text.
+
+=item * Document => $doc
+
+If undefined, start_document will extract it from Element or DocType (if set),
+otherwise it will create a new XML::DOM::Document.
+
+=item * Element => $elem
+
+If undefined, it is set to Document. This will be the insertion point (or parent)
+for the nodes defined by the following callbacks.
+
+=item * DocType => $doctype
+
+If undefined, start_document will extract it from Document (if possible).
+Otherwise it adds a new XML::DOM::DocumentType to the Document.
+
+=back