dummy_foundation/lib/XML/Handler/BuildDOM.pm
changeset 0 02cd6b52f378
equal deleted inserted replaced
-1:000000000000 0:02cd6b52f378
       
     1 package XML::Handler::BuildDOM;
       
     2 use strict;
       
     3 use XML::DOM;
       
     4 
       
     5 #
       
     6 # TODO:
       
     7 # - add support for parameter entity references
       
     8 # - expand API: insert Elements in the tree or stuff into DocType etc.
       
     9 
       
    10 sub new
       
    11 {
       
    12     my ($class, %args) = @_;
       
    13     bless \%args, $class;
       
    14 }
       
    15 
       
    16 #-------- PerlSAX Handler methods ------------------------------
       
    17 
       
    18 sub start_document # was Init
       
    19 {
       
    20     my $self = shift;
       
    21 
       
    22     # Define Document if it's not set & not obtainable from Element or DocType
       
    23     $self->{Document} ||= 
       
    24 	(defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef)
       
    25      || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef)
       
    26      || new XML::DOM::Document();
       
    27 
       
    28     $self->{Element} ||= $self->{Document};
       
    29 
       
    30     unless (defined $self->{DocType})
       
    31     {
       
    32 	$self->{DocType} = $self->{Document}->getDoctype
       
    33 	    if defined $self->{Document};
       
    34 
       
    35 	unless (defined $self->{Doctype})
       
    36 	{
       
    37 #?? should be $doc->createDocType for extensibility!
       
    38 	    $self->{DocType} = new XML::DOM::DocumentType ($self->{Document});
       
    39 	    $self->{Document}->setDoctype ($self->{DocType});
       
    40 	}
       
    41     }
       
    42   
       
    43     # Prepare for document prolog
       
    44     $self->{InProlog} = 1;
       
    45 
       
    46     # We haven't passed the root element yet
       
    47     $self->{EndDoc} = 0;
       
    48 
       
    49     undef $self->{LastText};
       
    50 }
       
    51 
       
    52 sub end_document # was Final
       
    53 {
       
    54     my $self = shift;
       
    55     unless ($self->{SawDocType})
       
    56     {
       
    57 	my $doctype = $self->{Document}->removeDoctype;
       
    58 	$doctype->dispose;
       
    59 #?? do we always want to destroy the Doctype?
       
    60     }
       
    61     $self->{Document};
       
    62 }
       
    63 
       
    64 sub characters # was Char
       
    65 {
       
    66     my $self = $_[0];
       
    67     my $str = $_[1]->{Data};
       
    68 
       
    69     if ($self->{InCDATA} && $self->{KeepCDATA})
       
    70     {
       
    71 	undef $self->{LastText};
       
    72 	# Merge text with previous node if possible
       
    73 	$self->{Element}->addCDATA ($str);
       
    74     }
       
    75     else
       
    76     {
       
    77 	# Merge text with previous node if possible
       
    78 	# Used to be:	$expat->{DOM_Element}->addText ($str);
       
    79 	if ($self->{LastText})
       
    80 	{
       
    81 	    $self->{LastText}->appendData ($str);
       
    82 	}
       
    83 	else
       
    84 	{
       
    85 	    $self->{LastText} = $self->{Document}->createTextNode ($str);
       
    86 	    $self->{Element}->appendChild ($self->{LastText});
       
    87 	}
       
    88     }
       
    89 }
       
    90 
       
    91 sub start_element # was Start
       
    92 {
       
    93     my ($self, $hash) = @_;
       
    94     my $elem = $hash->{Name};
       
    95     my $attr = $hash->{Attributes};
       
    96 
       
    97     my $parent = $self->{Element};
       
    98     my $doc = $self->{Document};
       
    99     
       
   100     if ($parent == $doc)
       
   101     {
       
   102 	# End of document prolog, i.e. start of first Element
       
   103 	$self->{InProlog} = 0;
       
   104     }
       
   105     
       
   106     undef $self->{LastText};
       
   107     my $node = $doc->createElement ($elem);
       
   108     $self->{Element} = $node;
       
   109     $parent->appendChild ($node);
       
   110     
       
   111     my $i = 0;
       
   112     my $n = scalar keys %$attr;
       
   113     return unless $n;
       
   114 
       
   115     if (exists $hash->{AttributeOrder})
       
   116     {
       
   117 	my $defaulted = $hash->{Defaulted};
       
   118 	my @order = @{ $hash->{AttributeOrder} };
       
   119 	
       
   120 	# Specified attributes
       
   121 	for (my $i = 0; $i < $defaulted; $i++)
       
   122 	{
       
   123 	    my $a = $order[$i];
       
   124 	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
       
   125 	    $node->setAttributeNode ($att);
       
   126 	}
       
   127 
       
   128 	# Defaulted attributes
       
   129 	for (my $i = $defaulted; $i < @order; $i++)
       
   130 	{
       
   131 	    my $a = $order[$i];
       
   132 	    my $att = $doc->createAttribute ($elem, $attr->{$a}, 0);
       
   133 	    $node->setAttributeNode ($att);
       
   134 	}
       
   135     }
       
   136     else
       
   137     {
       
   138 	# We're assuming that all attributes were specified (1)
       
   139 	for my $a (keys %$attr)
       
   140 	{
       
   141 	    my $att = $doc->createAttribute ($a, $attr->{$a}, 1);
       
   142 	    $node->setAttributeNode ($att);
       
   143 	}
       
   144     }
       
   145 }
       
   146 
       
   147 sub end_element
       
   148 {
       
   149     my $self = shift;
       
   150     $self->{Element} = $self->{Element}->getParentNode;
       
   151     undef $self->{LastText};
       
   152 
       
   153     # Check for end of root element
       
   154     $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document});
       
   155 }
       
   156 
       
   157 sub entity_reference # was Default
       
   158 {
       
   159     my $self = $_[0];
       
   160     my $name = $_[1]->{Name};
       
   161     
       
   162     $self->{Element}->appendChild (
       
   163 			    $self->{Document}->createEntityReference ($name));
       
   164     undef $self->{LastText};
       
   165 }
       
   166 
       
   167 sub start_cdata
       
   168 {
       
   169     my $self = shift;
       
   170     $self->{InCDATA} = 1;
       
   171 }
       
   172 
       
   173 sub end_cdata
       
   174 {
       
   175     my $self = shift;
       
   176     $self->{InCDATA} = 0;
       
   177 }
       
   178 
       
   179 sub comment
       
   180 {
       
   181     my $self = $_[0];
       
   182 
       
   183     local $XML::DOM::IgnoreReadOnly = 1;
       
   184 
       
   185     undef $self->{LastText};
       
   186     my $comment = $self->{Document}->createComment ($_[1]->{Data});
       
   187     $self->{Element}->appendChild ($comment);
       
   188 }
       
   189 
       
   190 sub doctype_decl
       
   191 {
       
   192     my ($self, $hash) = @_;
       
   193 
       
   194     $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, 
       
   195 				 $hash->{PublicId}, $hash->{Internal});
       
   196     $self->{SawDocType} = 1;
       
   197 }
       
   198 
       
   199 sub attlist_decl
       
   200 {
       
   201     my ($self, $hash) = @_;
       
   202 
       
   203     local $XML::DOM::IgnoreReadOnly = 1;
       
   204 
       
   205     $self->{DocType}->addAttDef ($hash->{ElementName},
       
   206 				 $hash->{AttributeName},
       
   207 				 $hash->{Type},
       
   208 				 $hash->{Default},
       
   209 				 $hash->{Fixed});
       
   210 }
       
   211 
       
   212 sub xml_decl
       
   213 {
       
   214     my ($self, $hash) = @_;
       
   215 
       
   216     local $XML::DOM::IgnoreReadOnly = 1;
       
   217 
       
   218     undef $self->{LastText};
       
   219     $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, 
       
   220 							  $hash->{Version},
       
   221 							  $hash->{Encoding},
       
   222 							  $hash->{Standalone}));
       
   223 }
       
   224 
       
   225 sub entity_decl
       
   226 {
       
   227     my ($self, $hash) = @_;
       
   228     
       
   229     local $XML::DOM::IgnoreReadOnly = 1;
       
   230 
       
   231     # Parameter Entities names are passed starting with '%'
       
   232     my $parameter = 0;
       
   233 
       
   234 #?? parameter entities currently not supported by PerlSAX!
       
   235 
       
   236     undef $self->{LastText};
       
   237     $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, 
       
   238 				 $hash->{SystemId}, $hash->{PublicId}, 
       
   239 				 $hash->{Notation});
       
   240 }
       
   241 
       
   242 # Unparsed is called when it encounters e.g:
       
   243 #
       
   244 #   <!ENTITY logo SYSTEM "http://server/logo.gif" NDATA gif>
       
   245 #
       
   246 sub unparsed_decl
       
   247 {
       
   248     my ($self, $hash) = @_;
       
   249 
       
   250     local $XML::DOM::IgnoreReadOnly = 1;
       
   251 
       
   252     # same as regular ENTITY, as far as DOM is concerned
       
   253     $self->entity_decl ($hash);
       
   254 }
       
   255 
       
   256 sub element_decl
       
   257 {
       
   258     my ($self, $hash) = @_;
       
   259 
       
   260     local $XML::DOM::IgnoreReadOnly = 1;
       
   261 
       
   262     undef $self->{LastText};
       
   263     $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model});
       
   264 }
       
   265 
       
   266 sub notation_decl
       
   267 {
       
   268     my ($self, $hash) = @_;
       
   269 
       
   270     local $XML::DOM::IgnoreReadOnly = 1;
       
   271 
       
   272     undef $self->{LastText};
       
   273     $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, 
       
   274 				   $hash->{SystemId}, $hash->{PublicId});
       
   275 }
       
   276 
       
   277 sub processing_instruction
       
   278 {
       
   279     my ($self, $hash) = @_;
       
   280 
       
   281     local $XML::DOM::IgnoreReadOnly = 1;
       
   282 
       
   283     undef $self->{LastText};
       
   284     $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction 
       
   285 			    ($self->{Document}, $hash->{Target}, $hash->{Data}));
       
   286 }
       
   287 
       
   288 return 1;
       
   289 
       
   290 __END__
       
   291 
       
   292 =head1 NAME
       
   293 
       
   294 XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures
       
   295 
       
   296 =head1 SYNOPSIS
       
   297 
       
   298  use XML::Handler::BuildDOM;
       
   299  use XML::Parser::PerlSAX;
       
   300 
       
   301  my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1);
       
   302  my $parser = new XML::Parser::PerlSAX (Handler => $handler);
       
   303 
       
   304  my $doc = $parser->parsefile ("file.xml");
       
   305 
       
   306 =head1 DESCRIPTION
       
   307 
       
   308 XML::Handler::BuildDOM creates L<XML::DOM> document structures 
       
   309 (i.e. L<XML::DOM::Document>) from PerlSAX events.
       
   310 
       
   311 This class used to be called L<XML::PerlSAX::DOM> prior to libxml-enno 1.0.1.
       
   312 
       
   313 =head2 CONSTRUCTOR OPTIONS
       
   314 
       
   315 The XML::Handler::BuildDOM constructor supports the following options:
       
   316 
       
   317 =over 4
       
   318 
       
   319 =item * KeepCDATA => 1 
       
   320 
       
   321 If set to 0 (default), CDATASections will be converted to regular text.
       
   322 
       
   323 =item * Document => $doc
       
   324 
       
   325 If undefined, start_document will extract it from Element or DocType (if set),
       
   326 otherwise it will create a new XML::DOM::Document.
       
   327 
       
   328 =item * Element => $elem
       
   329 
       
   330 If undefined, it is set to Document. This will be the insertion point (or parent)
       
   331 for the nodes defined by the following callbacks.
       
   332 
       
   333 =item * DocType => $doctype
       
   334 
       
   335 If undefined, start_document will extract it from Document (if possible).
       
   336 Otherwise it adds a new XML::DOM::DocumentType to the Document.
       
   337 
       
   338 =back