|
1 package XML::Handler::Composer; |
|
2 use strict; |
|
3 use XML::UM; |
|
4 use Carp; |
|
5 |
|
6 use vars qw{ %DEFAULT_QUOTES %XML_MAPPING_CRITERIA }; |
|
7 |
|
8 %DEFAULT_QUOTES = ( |
|
9 XMLDecl => '"', |
|
10 Attr => '"', |
|
11 Entity => '"', |
|
12 SystemLiteral => '"', |
|
13 ); |
|
14 |
|
15 %XML_MAPPING_CRITERIA = |
|
16 ( |
|
17 Text => |
|
18 { |
|
19 '<' => '<', |
|
20 '&' => '&', |
|
21 |
|
22 ']]>' => ']]>', |
|
23 }, |
|
24 |
|
25 CDataSection => |
|
26 { |
|
27 ']]>' => ']]>', # NOTE: this won't be translated back correctly |
|
28 }, |
|
29 |
|
30 Attr => # attribute value (assuming double quotes "" are used) |
|
31 { |
|
32 # '"' => '"', # Use ("'" => ''') when using single quotes |
|
33 '<' => '<', |
|
34 '&' => '&', |
|
35 }, |
|
36 |
|
37 Entity => # entity value (assuming double quotes "" are used) |
|
38 { |
|
39 # '"' => '"', # Use ("'" => ''') when using single quotes |
|
40 '%' => '%', |
|
41 '&' => '&', |
|
42 }, |
|
43 |
|
44 Comment => |
|
45 { |
|
46 '--' => '--', # NOTE: this won't be translated back correctly |
|
47 }, |
|
48 |
|
49 ProcessingInstruction => |
|
50 { |
|
51 '?>' => '?>', # not sure if this will be translated back correctly |
|
52 }, |
|
53 |
|
54 # The SYSTEM and PUBLIC identifiers in DOCTYPE declaration (quoted strings) |
|
55 SystemLiteral => |
|
56 { |
|
57 # '"' => '"', # Use ("'" => ''') when using single quotes |
|
58 }, |
|
59 |
|
60 ); |
|
61 |
|
62 sub new |
|
63 { |
|
64 my ($class, %options) = @_; |
|
65 my $self = bless \%options, $class; |
|
66 |
|
67 $self->{EndWithNewline} = 1 unless defined $self->{EndWithNewline}; |
|
68 |
|
69 if (defined $self->{Newline}) |
|
70 { |
|
71 $self->{ConvertNewlines} = 1; |
|
72 } |
|
73 else |
|
74 { |
|
75 # Use this when printing newlines in case the user didn't specify one |
|
76 $self->{Newline} = "\x0A"; |
|
77 } |
|
78 |
|
79 $self->{DocTypeIndent} = $self->{Newline} . " " |
|
80 unless defined $self->{DocTypeIndent}; |
|
81 |
|
82 $self->{IndentAttlist} = " " unless defined $self->{IndentAttlist}; |
|
83 |
|
84 $self->{Print} = sub { print @_ } unless defined $self->{Print}; |
|
85 |
|
86 $self->{Quote} ||= {}; |
|
87 for my $q (keys %DEFAULT_QUOTES) |
|
88 { |
|
89 $self->{Quote}->{$q} ||= $DEFAULT_QUOTES{$q}; |
|
90 } |
|
91 |
|
92 # Convert to UTF-8 by default, i.e. when <?xml encoding=...?> is missing |
|
93 # and no {Encoding} is specified. |
|
94 # Note that the internal representation *is* UTF-8, so we |
|
95 # simply return the (string) parameter. |
|
96 $self->{Encode} = sub { shift } unless defined $self->{Encode}; |
|
97 |
|
98 # Convert unmapped characters to hexadecimal constants a la '号' |
|
99 $self->{EncodeUnmapped} = \&XML::UM::encode_unmapped_hex |
|
100 unless defined $self->{EncodeUnmapped}; |
|
101 |
|
102 my $encoding = $self->{Encoding}; |
|
103 $self->setEncoding ($encoding) if defined $encoding; |
|
104 |
|
105 $self->initMappers; |
|
106 |
|
107 $self; |
|
108 } |
|
109 |
|
110 # |
|
111 # Setup the mapping routines that convert '<' to '<' etc. |
|
112 # for the specific XML constructs. |
|
113 # |
|
114 sub initMappers |
|
115 { |
|
116 my $self = shift; |
|
117 my %escape; |
|
118 my $convert_newlines = $self->{ConvertNewlines}; |
|
119 |
|
120 for my $n (qw{ Text Comment CDataSection Attr SystemLiteral |
|
121 ProcessingInstruction Entity }) |
|
122 { |
|
123 $escape{$n} = $self->create_utf8_mapper ($n, $convert_newlines); |
|
124 } |
|
125 |
|
126 # Text with xml:space="preserve", should not have newlines converted. |
|
127 $escape{TextPreserveNL} = $self->create_utf8_mapper ('Text', 0); |
|
128 # (If newline conversion is inactive, $escape{TextPreserveNL} does the |
|
129 # same as $escape{Text} defined above ...) |
|
130 |
|
131 $self->{Escape} = \%escape; |
|
132 } |
|
133 |
|
134 sub setEncoding |
|
135 { |
|
136 my ($self, $encoding) = @_; |
|
137 |
|
138 $self->{Encode} = XML::UM::get_encode ( |
|
139 Encoding => $encoding, EncodeUnmapped => $self->{EncodeUnmapped}); |
|
140 } |
|
141 |
|
142 sub create_utf8_mapper |
|
143 { |
|
144 my ($self, $construct, $convert_newlines) = @_; |
|
145 |
|
146 my $c = $XML_MAPPING_CRITERIA{$construct}; |
|
147 croak "no XML mapping criteria defined for $construct" |
|
148 unless defined $c; |
|
149 |
|
150 my %hash = %$c; |
|
151 |
|
152 # If this construct appears between quotes in the XML document |
|
153 # (and it has a quoting character defined), |
|
154 # ensure that the quoting character is appropriately converted |
|
155 # to " or ' |
|
156 my $quote = $self->{Quote}->{$construct}; |
|
157 if (defined $quote) |
|
158 { |
|
159 $hash{$quote} = $quote eq '"' ? '"' : '''; |
|
160 } |
|
161 |
|
162 if ($convert_newlines) |
|
163 { |
|
164 $hash{"\x0A"} = $self->{Newline}; |
|
165 } |
|
166 |
|
167 gen_utf8_subst (%hash); |
|
168 } |
|
169 |
|
170 # |
|
171 # Converts a string literal e.g. "ABC" into '\x41\x42\x43' |
|
172 # so it can be stuffed into a regular expression |
|
173 # |
|
174 sub str_to_hex # static |
|
175 { |
|
176 my $s = shift; |
|
177 |
|
178 $s =~ s/(.)/ sprintf ("\\x%02x", ord ($1)) /egos; |
|
179 |
|
180 $s; |
|
181 } |
|
182 |
|
183 # |
|
184 # In later perl versions (5.005_55 and up) we can simply say: |
|
185 # |
|
186 # use utf8; |
|
187 # $literals = join ("|", map { str_to_hex ($_) } keys %hash); |
|
188 # $s =~ s/($literals)/$hash{$1}/ego; |
|
189 # |
|
190 |
|
191 sub gen_utf8_subst # static |
|
192 { |
|
193 my (%hash) = @_; |
|
194 |
|
195 my $code = 'sub { my $s = shift; $s =~ s/('; |
|
196 $code .= join ("|", map { str_to_hex ($_) } keys %hash); |
|
197 $code .= ')|('; |
|
198 $code .= '[\\x00-\\xBF]|[\\xC0-\\xDF].|[\\xE0-\\xEF]..|[\\xF0-\\xFF]...'; |
|
199 $code .= ')/ defined ($1) ? $hash{$1} : $2 /ego; $s }'; |
|
200 |
|
201 my $f = eval $code; |
|
202 croak "XML::Handler::Composer - can't eval code: $code\nReason: $@" if $@; |
|
203 |
|
204 $f; |
|
205 } |
|
206 |
|
207 # This should be optimized! |
|
208 sub print |
|
209 { |
|
210 my ($self, $str) = @_; |
|
211 $self->{Print}->($self->{Encode}->($str)); |
|
212 } |
|
213 |
|
214 # Used by start_element. It determines the style in which empty elements |
|
215 # are printed. The default implementation returns "/>" so they are printed |
|
216 # like this: <a/> |
|
217 # Override this method to support e.g. XHTML style tags. |
|
218 sub get_compressed_element_suffix |
|
219 { |
|
220 my ($self, $event) = @_; |
|
221 |
|
222 "/>"; |
|
223 |
|
224 # return " />" for XHTML style, or |
|
225 # "><$tagName/>" for uncompressed tags (where $tagName is $event->{Name}) |
|
226 } |
|
227 |
|
228 #----- PerlSAX handlers ------------------------------------------------------- |
|
229 |
|
230 sub start_document |
|
231 { |
|
232 my ($self) = @_; |
|
233 |
|
234 $self->{InCDATA} = 0; |
|
235 $self->{DTD} = undef; |
|
236 $self->{PreserveWS} = 0; # root element has xml:space="default" |
|
237 $self->{PreserveStack} = []; |
|
238 $self->{PrintedXmlDecl} = 0; # whether <?xml ...?> was printed |
|
239 } |
|
240 |
|
241 sub end_document |
|
242 { |
|
243 my ($self) = @_; |
|
244 |
|
245 # Print final Newline at the end of the XML document (if desired) |
|
246 $self->print ($self->{Newline}) if $self->{EndWithNewline}; |
|
247 } |
|
248 |
|
249 # This event is received *AFTER* the Notation, Element, Attlist etc. events |
|
250 # that are contained within the DTD. |
|
251 sub doctype_decl |
|
252 { |
|
253 my ($self, $event) = @_; |
|
254 $self->flush_xml_decl; |
|
255 |
|
256 my $q = $self->{Quote}->{SystemLiteral}; |
|
257 my $escape_literal = $self->{Escape}->{SystemLiteral}; |
|
258 |
|
259 my $name = $event->{Name}; |
|
260 my $sysId = $event->{SystemId}; |
|
261 $sysId = &$escape_literal ($sysId) if defined $sysId; |
|
262 my $pubId = $event->{PublicId}; |
|
263 $pubId = &$escape_literal ($pubId) if defined $pubId; |
|
264 |
|
265 my $str = "<!DOCTYPE $name"; |
|
266 if (defined $pubId) |
|
267 { |
|
268 $str .= " PUBLIC $q$pubId$q $q$sysId$q"; |
|
269 } |
|
270 elsif (defined $sysId) |
|
271 { |
|
272 $str .= " SYSTEM $q$sysId$q"; |
|
273 } |
|
274 |
|
275 my $dtd_contents = $self->{DTD}; |
|
276 my $nl = $self->{Newline}; |
|
277 |
|
278 if (defined $dtd_contents) |
|
279 { |
|
280 delete $self->{DTD}; |
|
281 |
|
282 $str .= " [$dtd_contents$nl]>$nl"; |
|
283 } |
|
284 else |
|
285 { |
|
286 $str .= ">$nl"; |
|
287 } |
|
288 $self->print ($str); |
|
289 } |
|
290 |
|
291 sub start_element |
|
292 { |
|
293 my ($self, $event) = @_; |
|
294 |
|
295 my $preserve_stack = $self->{PreserveStack}; |
|
296 if (@$preserve_stack == 0) |
|
297 { |
|
298 # This is the root element. Print the <?xml ...?> declaration now if |
|
299 # it wasn't printed and it should be. |
|
300 $self->flush_xml_decl; |
|
301 } |
|
302 |
|
303 my $str = "<" . $event->{Name}; |
|
304 |
|
305 my $suffix = ">"; |
|
306 if ($event->{Compress}) |
|
307 { |
|
308 $suffix = $self->get_compressed_element_suffix ($event); |
|
309 } |
|
310 |
|
311 # Push PreserveWS state of parent element on the stack |
|
312 push @{ $preserve_stack }, $self->{PreserveWS}; |
|
313 $self->{PreserveWS} = $event->{PreserveWS}; |
|
314 |
|
315 my $ha = $event->{Attributes}; |
|
316 my @attr; |
|
317 if (exists $event->{AttributeOrder}) |
|
318 { |
|
319 my $defaulted = $event->{Defaulted}; |
|
320 if (defined $defaulted && !$self->{PrintDefaultAttr}) |
|
321 { |
|
322 if ($defaulted > 0) |
|
323 { |
|
324 @attr = @{ $event->{AttributeOrder} }[0 .. $defaulted - 1]; |
|
325 } |
|
326 # else: all attributes are defaulted i.e. @attr = (); |
|
327 } |
|
328 else # no attr are defaulted |
|
329 { |
|
330 @attr = @{ $event->{AttributeOrder} }; |
|
331 } |
|
332 } |
|
333 else # no attr order defined |
|
334 { |
|
335 @attr = keys %$ha; |
|
336 } |
|
337 |
|
338 my $escape = $self->{Escape}->{Attr}; |
|
339 my $q = $self->{Quote}->{Attr}; |
|
340 |
|
341 for (my $i = 0; $i < @attr; $i++) |
|
342 { |
|
343 #?? could print a newline every so often... |
|
344 my $name = $attr[$i]; |
|
345 my $val = &$escape ($ha->{$name}); |
|
346 $str .= " $name=$q$val$q"; |
|
347 } |
|
348 $str .= $suffix; |
|
349 |
|
350 $self->print ($str); |
|
351 } |
|
352 |
|
353 sub end_element |
|
354 { |
|
355 my ($self, $event) = @_; |
|
356 |
|
357 $self->{PreserveWS} = pop @{ $self->{PreserveStack} }; |
|
358 |
|
359 return if $event->{Compress}; |
|
360 |
|
361 $self->print ("</" . $event->{Name} . ">"); |
|
362 } |
|
363 |
|
364 sub characters |
|
365 { |
|
366 my ($self, $event) = @_; |
|
367 |
|
368 if ($self->{InCDATA}) |
|
369 { |
|
370 #?? should this use $self->{PreserveWS} ? |
|
371 |
|
372 my $esc = $self->{Escape}->{CDataSection}; |
|
373 $self->print (&$esc ($event->{Data})); |
|
374 } |
|
375 else # regular text |
|
376 { |
|
377 my $esc = $self->{PreserveWS} ? |
|
378 $self->{Escape}->{TextPreserveNL} : |
|
379 $self->{Escape}->{Text}; |
|
380 |
|
381 $self->print (&$esc ($event->{Data})); |
|
382 } |
|
383 } |
|
384 |
|
385 sub start_cdata |
|
386 { |
|
387 my $self = shift; |
|
388 $self->{InCDATA} = 1; |
|
389 |
|
390 $self->print ("<![CDATA["); |
|
391 } |
|
392 |
|
393 sub end_cdata |
|
394 { |
|
395 my $self = shift; |
|
396 $self->{InCDATA} = 0; |
|
397 |
|
398 $self->print ("]]>"); |
|
399 } |
|
400 |
|
401 sub comment |
|
402 { |
|
403 my ($self, $event) = @_; |
|
404 $self->flush_xml_decl; |
|
405 |
|
406 my $esc = $self->{Escape}->{Comment}; |
|
407 #?? still need to support comments in the DTD |
|
408 |
|
409 $self->print ("<!--" . &$esc ($event->{Data}) . "-->"); |
|
410 } |
|
411 |
|
412 sub entity_reference |
|
413 { |
|
414 my ($self, $event) = @_; |
|
415 $self->flush_xml_decl; |
|
416 |
|
417 my $par = $event->{Parameter} ? '%' : '&'; |
|
418 #?? parameter entities (like %par;) are NOT supported! |
|
419 # PerlSAX::handle_default should be fixed! |
|
420 |
|
421 $self->print ($par . $event->{Name} . ";"); |
|
422 } |
|
423 |
|
424 sub unparsed_entity_decl |
|
425 { |
|
426 my ($self, $event) = @_; |
|
427 $self->flush_xml_decl; |
|
428 |
|
429 $self->entity_decl ($event); |
|
430 } |
|
431 |
|
432 sub notation_decl |
|
433 { |
|
434 my ($self, $event) = @_; |
|
435 $self->flush_xml_decl; |
|
436 |
|
437 my $name = $event->{Name}; |
|
438 my $sysId = $event->{SystemId}; |
|
439 my $pubId = $event->{PublicId}; |
|
440 |
|
441 my $q = $self->{Quote}->{SystemLiteral}; |
|
442 my $escape = $self->{Escape}->{SystemLiteral}; |
|
443 |
|
444 $sysId = &$escape ($sysId) if defined $sysId; |
|
445 $pubId = &$escape ($pubId) if defined $pubId; |
|
446 |
|
447 my $str = $self->{DocTypeIndent} . "<!NOTATION $name"; |
|
448 |
|
449 if (defined $pubId) |
|
450 { |
|
451 $str .= " PUBLIC $q$pubId$q"; |
|
452 } |
|
453 if (defined $sysId) |
|
454 { |
|
455 $str .= " SYSTEM $q$sysId$q"; |
|
456 } |
|
457 $str .= ">"; |
|
458 |
|
459 $self->{DTD} .= $str; |
|
460 } |
|
461 |
|
462 sub element_decl |
|
463 { |
|
464 my ($self, $event) = @_; |
|
465 $self->flush_xml_decl; |
|
466 |
|
467 my $name = $event->{Name}; |
|
468 my $model = $event->{Model}; |
|
469 |
|
470 $self->{DTD} .= $self->{DocTypeIndent} . "<!ELEMENT $name $model>"; |
|
471 } |
|
472 |
|
473 sub entity_decl |
|
474 { |
|
475 my ($self, $event) = @_; |
|
476 $self->flush_xml_decl; |
|
477 |
|
478 my $name = $event->{Name}; |
|
479 |
|
480 my $par = ""; |
|
481 if ($name =~ /^%/) |
|
482 { |
|
483 # It's a parameter entity (i.e. %ent; instead of &ent;) |
|
484 $name = substr ($name, 1); |
|
485 $par = "% "; |
|
486 } |
|
487 |
|
488 my $str = $self->{DocTypeIndent} . "<!ENTITY $par$name"; |
|
489 |
|
490 my $value = $event->{Value}; |
|
491 my $sysId = $event->{SysId}; |
|
492 my $pubId = $event->{PubId}; |
|
493 my $ndata = $event->{Ndata}; |
|
494 |
|
495 my $q = $self->{Quote}->{SystemLiteral}; |
|
496 my $escape = $self->{Escape}->{SystemLiteral}; |
|
497 |
|
498 if (defined $value) |
|
499 { |
|
500 #?? use {Entity} quote etc... |
|
501 my $esc = $self->{Escape}->{Entity}; |
|
502 my $p = $self->{Quote}->{Entity}; |
|
503 $str .= " $p" . &$esc ($value) . $p; |
|
504 } |
|
505 if (defined $pubId) |
|
506 { |
|
507 $str .= " PUBLIC $q" . &$escape ($pubId) . $q; |
|
508 } |
|
509 elsif (defined $sysId) |
|
510 { |
|
511 $str .= " SYSTEM"; |
|
512 } |
|
513 |
|
514 if (defined $sysId) |
|
515 { |
|
516 $str .= " $q" . &$escape ($sysId) . $q; |
|
517 } |
|
518 $str .= " NDATA $ndata" if defined $ndata; |
|
519 $str .= ">"; |
|
520 |
|
521 $self->{DTD} .= $str; |
|
522 } |
|
523 |
|
524 sub attlist_decl |
|
525 { |
|
526 my ($self, $event) = @_; |
|
527 $self->flush_xml_decl; |
|
528 |
|
529 my $elem = $event->{ElementName}; |
|
530 |
|
531 my $str = $event->{AttributeName} . " " . $event->{Type}; |
|
532 $str .= " #FIXED" if defined $event->{Fixed}; |
|
533 |
|
534 $str = $str; |
|
535 |
|
536 my $def = $event->{Default}; |
|
537 if (defined $def) |
|
538 { |
|
539 $str .= " $def"; |
|
540 |
|
541 # Note sometimes Default is a value with quotes. |
|
542 # We'll use the existing quotes in that case... |
|
543 } |
|
544 |
|
545 my $indent; |
|
546 if (!exists($event->{First}) || $event->{First}) |
|
547 { |
|
548 $self->{DTD} .= $self->{DocTypeIndent} . "<!ATTLIST $elem"; |
|
549 |
|
550 if ($event->{MoreFollow}) |
|
551 { |
|
552 $indent = $self->{Newline} . $self->{IndentAttlist}; |
|
553 } |
|
554 else |
|
555 { |
|
556 $indent = " "; |
|
557 } |
|
558 } |
|
559 else |
|
560 { |
|
561 $indent = $self->{Newline} . $self->{IndentAttlist}; |
|
562 } |
|
563 |
|
564 $self->{DTD} .= $indent . $str; |
|
565 |
|
566 unless ($event->{MoreFollow}) |
|
567 { |
|
568 $self->{DTD} .= '>'; |
|
569 } |
|
570 } |
|
571 |
|
572 sub xml_decl |
|
573 { |
|
574 my ($self, $event) = @_; |
|
575 return if $self->{PrintedXmlDecl}; # already printed it |
|
576 |
|
577 my $version = $event->{Version}; |
|
578 my $encoding = $event->{Encoding}; |
|
579 if (defined $self->{Encoding}) |
|
580 { |
|
581 $encoding = $self->{Encoding}; |
|
582 } |
|
583 else |
|
584 { |
|
585 $self->setEncoding ($encoding) if defined $encoding; |
|
586 } |
|
587 |
|
588 my $standalone = $event->{Standalone}; |
|
589 $standalone = ($standalone ? "yes" : "no") if defined $standalone; |
|
590 |
|
591 my $q = $self->{Quote}->{XMLDecl}; |
|
592 my $nl = $self->{Newline}; |
|
593 |
|
594 my $str = "<?xml"; |
|
595 $str .= " version=$q$version$q" if defined $version; |
|
596 $str .= " encoding=$q$encoding$q" if defined $encoding; |
|
597 $str .= " standalone=$q$standalone$q" if defined $standalone; |
|
598 $str .= "?>$nl$nl"; |
|
599 |
|
600 $self->print ($str); |
|
601 $self->{PrintedXmlDecl} = 1; |
|
602 } |
|
603 |
|
604 # |
|
605 # Prints the <xml ...?> declaration if it wasn't already printed |
|
606 # *and* the user wanted it to be printed (because s/he set $self->{Encoding}) |
|
607 # |
|
608 sub flush_xml_decl |
|
609 { |
|
610 my ($self) = @_; |
|
611 return if $self->{PrintedXmlDecl}; |
|
612 |
|
613 if (defined $self->{Encoding}) |
|
614 { |
|
615 $self->xml_decl ({ Version => '1.0', Encoding => $self->{Encoding} }); |
|
616 } |
|
617 |
|
618 # If it wasn't printed just now, it doesn't need to be printed at all, |
|
619 # so pretend we did print it. |
|
620 $self->{PrintedXmlDecl} = 1; |
|
621 } |
|
622 |
|
623 sub processing_instruction |
|
624 { |
|
625 my ($self, $event) = @_; |
|
626 $self->flush_xml_decl; |
|
627 |
|
628 my $escape = $self->{Escape}->{ProcessingInstruction}; |
|
629 |
|
630 my $str = "<?" . $event->{Target} . " " . |
|
631 &$escape ($event->{Data}). "?>"; |
|
632 |
|
633 $self->print ($str); |
|
634 } |
|
635 |
|
636 1; # package return code |
|
637 |
|
638 __END__ |
|
639 |
|
640 =head1 NAME |
|
641 |
|
642 XML::Handler::Composer - Another XML printer/writer/generator |
|
643 |
|
644 =head1 SYNOPSIS |
|
645 |
|
646 use XML::Handler::Composer; |
|
647 |
|
648 my $composer = new XML::Handler::Composer ( [OPTIONS] ); |
|
649 |
|
650 =head1 DESCRIPTION |
|
651 |
|
652 XML::Handler::Composer is similar to XML::Writer, XML::Handler::XMLWriter, |
|
653 XML::Handler::YAWriter etc. in that it generates XML output. |
|
654 |
|
655 This implementation may not be fast and it may not be the best solution for |
|
656 your particular problem, but it has some features that may be missing in the |
|
657 other implementations: |
|
658 |
|
659 =over 4 |
|
660 |
|
661 =item * Supports every output encoding that L<XML::UM> supports |
|
662 |
|
663 L<XML::UM> supports every encoding for which there is a mapping file |
|
664 in the L<XML::Encoding> distribution. |
|
665 |
|
666 =item * Pretty printing |
|
667 |
|
668 When used with L<XML::Filter::Reindent>. |
|
669 |
|
670 =item * Fine control over which kind of quotes are used |
|
671 |
|
672 See options below. |
|
673 |
|
674 =item * Supports PerlSAX interface |
|
675 |
|
676 =back |
|
677 |
|
678 =head1 Constructor Options |
|
679 |
|
680 =over 4 |
|
681 |
|
682 =item * EndWithNewline (Default: 1) |
|
683 |
|
684 Whether to print a newline at the end of the file (i.e. after the root element) |
|
685 |
|
686 =item * Newline (Default: undef) |
|
687 |
|
688 If defined, which newline to use for printing. |
|
689 (Note that XML::Parser etc. convert newlines into "\x0A".) |
|
690 |
|
691 If undef, newlines will not be converted and XML::Handler::Composer will |
|
692 use "\x0A" when printing. |
|
693 |
|
694 A value of "\n" will convert the internal newlines into the platform |
|
695 specific line separator. |
|
696 |
|
697 See the PreserveWS option in the characters event (below) for finer control |
|
698 over when newline conversion is active. |
|
699 |
|
700 =item * DocTypeIndent (Default: a Newline and 2 spaces) |
|
701 |
|
702 Newline plus indent that is used to separate lines inside the DTD. |
|
703 |
|
704 =item * IndentAttList (Default: 8 spaces) |
|
705 |
|
706 Indent used when printing an <!ATTLIST> declaration that has more than one |
|
707 attribute definition, e.g. |
|
708 |
|
709 <!ATTLIST my_elem |
|
710 attr1 CDATA "foo" |
|
711 attr2 CDATA "bar" |
|
712 > |
|
713 |
|
714 =item * Quote (Default: { XMLDecl => '"', Attr => '"', Entity => '"', SystemLiteral => '"' }) |
|
715 |
|
716 Quote contains a reference to a hash that defines which quoting characters |
|
717 to use when printing XML declarations (XMLDecl), attribute values (Attr), |
|
718 <!ENTITY> values (Entity) and system/public literals (SystemLiteral) |
|
719 as found in <!DOCTYPE>, <!ENTITY> declarations etc. |
|
720 |
|
721 =item * PrintDefaultAttr (Default: 0) |
|
722 |
|
723 If 1, prints attribute values regardless of whether they are default |
|
724 attribute values (as defined in <!ATTLIST> declarations.) |
|
725 Normally, default attributes are not printed. |
|
726 |
|
727 =item * Encoding (Default: undef) |
|
728 |
|
729 Defines the output encoding (if specified.) |
|
730 Note that future calls to the xml_decl() handler may override this setting |
|
731 (if they contain an Encoding definition.) |
|
732 |
|
733 =item * EncodeUnmapped (Default: \&XML::UM::encode_unmapped_dec) |
|
734 |
|
735 Defines how Unicode characters not found in the mapping file (of the |
|
736 specified encoding) are printed. |
|
737 By default, they are converted to decimal entity references, like '{' |
|
738 |
|
739 Use \&XML::UM::encode_unmapped_hex for hexadecimal constants, like '«' |
|
740 |
|
741 =item * Print (Default: sub { print @_ }, which prints to stdout) |
|
742 |
|
743 The subroutine that is used to print the encoded XML output. |
|
744 The default prints the string to stdout. |
|
745 |
|
746 =back |
|
747 |
|
748 =head1 Method: get_compressed_element_suffix ($event) |
|
749 |
|
750 Override this method to support the different styles for printing |
|
751 empty elements in compressed notation, e.g. <p/>, <p></p>, <p />, <p>. |
|
752 |
|
753 The default returns "/>", which results in <p/>. |
|
754 Use " />" for XHTML style elements or ">" for certain HTML style elements. |
|
755 |
|
756 The $event parameter is the hash reference that was received from the |
|
757 start_element() handler. |
|
758 |
|
759 =head1 Extra PerlSAX event information |
|
760 |
|
761 XML::Handler::Composer relies on hints from previous SAX filters to |
|
762 format certain parts of the XML. |
|
763 These SAX filters (e.g. XML::Filter::Reindent) pass extra information by adding |
|
764 name/value pairs to the appropriate PerlSAX events (the events themselves are |
|
765 hash references.) |
|
766 |
|
767 =over 4 |
|
768 |
|
769 =item * entity_reference: Parameter => 1 |
|
770 |
|
771 If Parameter is 1, it means that it is a parameter entity reference. |
|
772 A parameter entity is referenced with %ent; instead of &ent; and the |
|
773 entity declaration starts with <!ENTITY % ent ...> instead of <!ENTITY ent ...> |
|
774 |
|
775 NOTE: This should be added to the PerlSAX interface! |
|
776 |
|
777 =item * start_element/end_element: Compress => 1 |
|
778 |
|
779 If Compress is 1 in both the start_element and end_element event, the element |
|
780 will be printed in compressed form, e.g. <a/> instead of <a></a>. |
|
781 |
|
782 =item * start_element: PreserveWS => 1 |
|
783 |
|
784 If newline conversion is active (i.e. Newline was defined in the constructor), |
|
785 then newlines will *NOT* be converted in text (character events) within this |
|
786 element. |
|
787 |
|
788 =item * attlist_decl: First, MoreFollow |
|
789 |
|
790 The First and MoreFollow options can be used to force successive <!ATTLIST> |
|
791 declarations for the same element to be merged, e.g. |
|
792 |
|
793 <!ATTLIST my_elem |
|
794 attr1 CDATA "foo" |
|
795 attr2 CDATA "bar" |
|
796 attr3 CDATA "quux" |
|
797 > |
|
798 |
|
799 In this example, the attlist_decl event for foo should contain |
|
800 (First => 1, MoreFollow => 1) and the event for bar should contain |
|
801 (MoreFollow => 1). The quux event should have no extra info. |
|
802 |
|
803 'First' indicates that the event is the first of a sequence. |
|
804 'MoreFollow' indicates that more events will follow in this sequence. |
|
805 |
|
806 If neither option is set by the preceding PerlSAX filter, each attribute |
|
807 definition will be printed as a separate <!ATTLIST> line. |
|
808 |
|
809 =back |
|
810 |
|
811 =head1 CAVEATS |
|
812 |
|
813 This code is highly experimental! |
|
814 It has not been tested well and the API may change. |
|
815 |
|
816 =head1 AUTHOR |
|
817 |
|
818 Send bug reports, hints, tips, suggestions to Enno Derksen at |
|
819 <F<enno@att.com>>. |
|
820 |
|
821 =cut |