--- a/dummy_foundation/lib/XML/Handler/Composer.pm Wed Jun 03 18:33:51 2009 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,821 +0,0 @@
-package XML::Handler::Composer;
-use strict;
-use XML::UM;
-use Carp;
-
-use vars qw{ %DEFAULT_QUOTES %XML_MAPPING_CRITERIA };
-
-%DEFAULT_QUOTES = (
- XMLDecl => '"',
- Attr => '"',
- Entity => '"',
- SystemLiteral => '"',
- );
-
-%XML_MAPPING_CRITERIA =
-(
- Text =>
- {
- '<' => '<',
- '&' => '&',
-
- ']]>' => ']]>',
- },
-
- CDataSection =>
- {
- ']]>' => ']]>', # NOTE: this won't be translated back correctly
- },
-
- Attr => # attribute value (assuming double quotes "" are used)
- {
-# '"' => '"', # Use ("'" => ''') when using single quotes
- '<' => '<',
- '&' => '&',
- },
-
- Entity => # entity value (assuming double quotes "" are used)
- {
-# '"' => '"', # Use ("'" => ''') when using single quotes
- '%' => '%',
- '&' => '&',
- },
-
- Comment =>
- {
- '--' => '--', # NOTE: this won't be translated back correctly
- },
-
- ProcessingInstruction =>
- {
- '?>' => '?>', # not sure if this will be translated back correctly
- },
-
- # The SYSTEM and PUBLIC identifiers in DOCTYPE declaration (quoted strings)
- SystemLiteral =>
- {
-# '"' => '"', # Use ("'" => ''') when using single quotes
- },
-
-);
-
-sub new
-{
- my ($class, %options) = @_;
- my $self = bless \%options, $class;
-
- $self->{EndWithNewline} = 1 unless defined $self->{EndWithNewline};
-
- if (defined $self->{Newline})
- {
- $self->{ConvertNewlines} = 1;
- }
- else
- {
- # Use this when printing newlines in case the user didn't specify one
- $self->{Newline} = "\x0A";
- }
-
- $self->{DocTypeIndent} = $self->{Newline} . " "
- unless defined $self->{DocTypeIndent};
-
- $self->{IndentAttlist} = " " unless defined $self->{IndentAttlist};
-
- $self->{Print} = sub { print @_ } unless defined $self->{Print};
-
- $self->{Quote} ||= {};
- for my $q (keys %DEFAULT_QUOTES)
- {
- $self->{Quote}->{$q} ||= $DEFAULT_QUOTES{$q};
- }
-
- # Convert to UTF-8 by default, i.e. when <?xml encoding=...?> is missing
- # and no {Encoding} is specified.
- # Note that the internal representation *is* UTF-8, so we
- # simply return the (string) parameter.
- $self->{Encode} = sub { shift } unless defined $self->{Encode};
-
- # Convert unmapped characters to hexadecimal constants a la '号'
- $self->{EncodeUnmapped} = \&XML::UM::encode_unmapped_hex
- unless defined $self->{EncodeUnmapped};
-
- my $encoding = $self->{Encoding};
- $self->setEncoding ($encoding) if defined $encoding;
-
- $self->initMappers;
-
- $self;
-}
-
-#
-# Setup the mapping routines that convert '<' to '<' etc.
-# for the specific XML constructs.
-#
-sub initMappers
-{
- my $self = shift;
- my %escape;
- my $convert_newlines = $self->{ConvertNewlines};
-
- for my $n (qw{ Text Comment CDataSection Attr SystemLiteral
- ProcessingInstruction Entity })
- {
- $escape{$n} = $self->create_utf8_mapper ($n, $convert_newlines);
- }
-
- # Text with xml:space="preserve", should not have newlines converted.
- $escape{TextPreserveNL} = $self->create_utf8_mapper ('Text', 0);
- # (If newline conversion is inactive, $escape{TextPreserveNL} does the
- # same as $escape{Text} defined above ...)
-
- $self->{Escape} = \%escape;
-}
-
-sub setEncoding
-{
- my ($self, $encoding) = @_;
-
- $self->{Encode} = XML::UM::get_encode (
- Encoding => $encoding, EncodeUnmapped => $self->{EncodeUnmapped});
-}
-
-sub create_utf8_mapper
-{
- my ($self, $construct, $convert_newlines) = @_;
-
- my $c = $XML_MAPPING_CRITERIA{$construct};
- croak "no XML mapping criteria defined for $construct"
- unless defined $c;
-
- my %hash = %$c;
-
- # If this construct appears between quotes in the XML document
- # (and it has a quoting character defined),
- # ensure that the quoting character is appropriately converted
- # to " or '
- my $quote = $self->{Quote}->{$construct};
- if (defined $quote)
- {
- $hash{$quote} = $quote eq '"' ? '"' : ''';
- }
-
- if ($convert_newlines)
- {
- $hash{"\x0A"} = $self->{Newline};
- }
-
- gen_utf8_subst (%hash);
-}
-
-#
-# Converts a string literal e.g. "ABC" into '\x41\x42\x43'
-# so it can be stuffed into a regular expression
-#
-sub str_to_hex # static
-{
- my $s = shift;
-
- $s =~ s/(.)/ sprintf ("\\x%02x", ord ($1)) /egos;
-
- $s;
-}
-
-#
-# In later perl versions (5.005_55 and up) we can simply say:
-#
-# use utf8;
-# $literals = join ("|", map { str_to_hex ($_) } keys %hash);
-# $s =~ s/($literals)/$hash{$1}/ego;
-#
-
-sub gen_utf8_subst # static
-{
- my (%hash) = @_;
-
- my $code = 'sub { my $s = shift; $s =~ s/(';
- $code .= join ("|", map { str_to_hex ($_) } keys %hash);
- $code .= ')|(';
- $code .= '[\\x00-\\xBF]|[\\xC0-\\xDF].|[\\xE0-\\xEF]..|[\\xF0-\\xFF]...';
- $code .= ')/ defined ($1) ? $hash{$1} : $2 /ego; $s }';
-
- my $f = eval $code;
- croak "XML::Handler::Composer - can't eval code: $code\nReason: $@" if $@;
-
- $f;
-}
-
-# This should be optimized!
-sub print
-{
- my ($self, $str) = @_;
- $self->{Print}->($self->{Encode}->($str));
-}
-
-# Used by start_element. It determines the style in which empty elements
-# are printed. The default implementation returns "/>" so they are printed
-# like this: <a/>
-# Override this method to support e.g. XHTML style tags.
-sub get_compressed_element_suffix
-{
- my ($self, $event) = @_;
-
- "/>";
-
- # return " />" for XHTML style, or
- # "><$tagName/>" for uncompressed tags (where $tagName is $event->{Name})
-}
-
-#----- PerlSAX handlers -------------------------------------------------------
-
-sub start_document
-{
- my ($self) = @_;
-
- $self->{InCDATA} = 0;
- $self->{DTD} = undef;
- $self->{PreserveWS} = 0; # root element has xml:space="default"
- $self->{PreserveStack} = [];
- $self->{PrintedXmlDecl} = 0; # whether <?xml ...?> was printed
-}
-
-sub end_document
-{
- my ($self) = @_;
-
- # Print final Newline at the end of the XML document (if desired)
- $self->print ($self->{Newline}) if $self->{EndWithNewline};
-}
-
-# This event is received *AFTER* the Notation, Element, Attlist etc. events
-# that are contained within the DTD.
-sub doctype_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $q = $self->{Quote}->{SystemLiteral};
- my $escape_literal = $self->{Escape}->{SystemLiteral};
-
- my $name = $event->{Name};
- my $sysId = $event->{SystemId};
- $sysId = &$escape_literal ($sysId) if defined $sysId;
- my $pubId = $event->{PublicId};
- $pubId = &$escape_literal ($pubId) if defined $pubId;
-
- my $str = "<!DOCTYPE $name";
- if (defined $pubId)
- {
- $str .= " PUBLIC $q$pubId$q $q$sysId$q";
- }
- elsif (defined $sysId)
- {
- $str .= " SYSTEM $q$sysId$q";
- }
-
- my $dtd_contents = $self->{DTD};
- my $nl = $self->{Newline};
-
- if (defined $dtd_contents)
- {
- delete $self->{DTD};
-
- $str .= " [$dtd_contents$nl]>$nl";
- }
- else
- {
- $str .= ">$nl";
- }
- $self->print ($str);
-}
-
-sub start_element
-{
- my ($self, $event) = @_;
-
- my $preserve_stack = $self->{PreserveStack};
- if (@$preserve_stack == 0)
- {
- # This is the root element. Print the <?xml ...?> declaration now if
- # it wasn't printed and it should be.
- $self->flush_xml_decl;
- }
-
- my $str = "<" . $event->{Name};
-
- my $suffix = ">";
- if ($event->{Compress})
- {
- $suffix = $self->get_compressed_element_suffix ($event);
- }
-
- # Push PreserveWS state of parent element on the stack
- push @{ $preserve_stack }, $self->{PreserveWS};
- $self->{PreserveWS} = $event->{PreserveWS};
-
- my $ha = $event->{Attributes};
- my @attr;
- if (exists $event->{AttributeOrder})
- {
- my $defaulted = $event->{Defaulted};
- if (defined $defaulted && !$self->{PrintDefaultAttr})
- {
- if ($defaulted > 0)
- {
- @attr = @{ $event->{AttributeOrder} }[0 .. $defaulted - 1];
- }
- # else: all attributes are defaulted i.e. @attr = ();
- }
- else # no attr are defaulted
- {
- @attr = @{ $event->{AttributeOrder} };
- }
- }
- else # no attr order defined
- {
- @attr = keys %$ha;
- }
-
- my $escape = $self->{Escape}->{Attr};
- my $q = $self->{Quote}->{Attr};
-
- for (my $i = 0; $i < @attr; $i++)
- {
-#?? could print a newline every so often...
- my $name = $attr[$i];
- my $val = &$escape ($ha->{$name});
- $str .= " $name=$q$val$q";
- }
- $str .= $suffix;
-
- $self->print ($str);
-}
-
-sub end_element
-{
- my ($self, $event) = @_;
-
- $self->{PreserveWS} = pop @{ $self->{PreserveStack} };
-
- return if $event->{Compress};
-
- $self->print ("</" . $event->{Name} . ">");
-}
-
-sub characters
-{
- my ($self, $event) = @_;
-
- if ($self->{InCDATA})
- {
-#?? should this use $self->{PreserveWS} ?
-
- my $esc = $self->{Escape}->{CDataSection};
- $self->print (&$esc ($event->{Data}));
- }
- else # regular text
- {
- my $esc = $self->{PreserveWS} ?
- $self->{Escape}->{TextPreserveNL} :
- $self->{Escape}->{Text};
-
- $self->print (&$esc ($event->{Data}));
- }
-}
-
-sub start_cdata
-{
- my $self = shift;
- $self->{InCDATA} = 1;
-
- $self->print ("<![CDATA[");
-}
-
-sub end_cdata
-{
- my $self = shift;
- $self->{InCDATA} = 0;
-
- $self->print ("]]>");
-}
-
-sub comment
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $esc = $self->{Escape}->{Comment};
-#?? still need to support comments in the DTD
-
- $self->print ("<!--" . &$esc ($event->{Data}) . "-->");
-}
-
-sub entity_reference
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $par = $event->{Parameter} ? '%' : '&';
-#?? parameter entities (like %par;) are NOT supported!
-# PerlSAX::handle_default should be fixed!
-
- $self->print ($par . $event->{Name} . ";");
-}
-
-sub unparsed_entity_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- $self->entity_decl ($event);
-}
-
-sub notation_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $name = $event->{Name};
- my $sysId = $event->{SystemId};
- my $pubId = $event->{PublicId};
-
- my $q = $self->{Quote}->{SystemLiteral};
- my $escape = $self->{Escape}->{SystemLiteral};
-
- $sysId = &$escape ($sysId) if defined $sysId;
- $pubId = &$escape ($pubId) if defined $pubId;
-
- my $str = $self->{DocTypeIndent} . "<!NOTATION $name";
-
- if (defined $pubId)
- {
- $str .= " PUBLIC $q$pubId$q";
- }
- if (defined $sysId)
- {
- $str .= " SYSTEM $q$sysId$q";
- }
- $str .= ">";
-
- $self->{DTD} .= $str;
-}
-
-sub element_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $name = $event->{Name};
- my $model = $event->{Model};
-
- $self->{DTD} .= $self->{DocTypeIndent} . "<!ELEMENT $name $model>";
-}
-
-sub entity_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $name = $event->{Name};
-
- my $par = "";
- if ($name =~ /^%/)
- {
- # It's a parameter entity (i.e. %ent; instead of &ent;)
- $name = substr ($name, 1);
- $par = "% ";
- }
-
- my $str = $self->{DocTypeIndent} . "<!ENTITY $par$name";
-
- my $value = $event->{Value};
- my $sysId = $event->{SysId};
- my $pubId = $event->{PubId};
- my $ndata = $event->{Ndata};
-
- my $q = $self->{Quote}->{SystemLiteral};
- my $escape = $self->{Escape}->{SystemLiteral};
-
- if (defined $value)
- {
-#?? use {Entity} quote etc...
- my $esc = $self->{Escape}->{Entity};
- my $p = $self->{Quote}->{Entity};
- $str .= " $p" . &$esc ($value) . $p;
- }
- if (defined $pubId)
- {
- $str .= " PUBLIC $q" . &$escape ($pubId) . $q;
- }
- elsif (defined $sysId)
- {
- $str .= " SYSTEM";
- }
-
- if (defined $sysId)
- {
- $str .= " $q" . &$escape ($sysId) . $q;
- }
- $str .= " NDATA $ndata" if defined $ndata;
- $str .= ">";
-
- $self->{DTD} .= $str;
-}
-
-sub attlist_decl
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $elem = $event->{ElementName};
-
- my $str = $event->{AttributeName} . " " . $event->{Type};
- $str .= " #FIXED" if defined $event->{Fixed};
-
- $str = $str;
-
- my $def = $event->{Default};
- if (defined $def)
- {
- $str .= " $def";
-
- # Note sometimes Default is a value with quotes.
- # We'll use the existing quotes in that case...
- }
-
- my $indent;
- if (!exists($event->{First}) || $event->{First})
- {
- $self->{DTD} .= $self->{DocTypeIndent} . "<!ATTLIST $elem";
-
- if ($event->{MoreFollow})
- {
- $indent = $self->{Newline} . $self->{IndentAttlist};
- }
- else
- {
- $indent = " ";
- }
- }
- else
- {
- $indent = $self->{Newline} . $self->{IndentAttlist};
- }
-
- $self->{DTD} .= $indent . $str;
-
- unless ($event->{MoreFollow})
- {
- $self->{DTD} .= '>';
- }
-}
-
-sub xml_decl
-{
- my ($self, $event) = @_;
- return if $self->{PrintedXmlDecl}; # already printed it
-
- my $version = $event->{Version};
- my $encoding = $event->{Encoding};
- if (defined $self->{Encoding})
- {
- $encoding = $self->{Encoding};
- }
- else
- {
- $self->setEncoding ($encoding) if defined $encoding;
- }
-
- my $standalone = $event->{Standalone};
- $standalone = ($standalone ? "yes" : "no") if defined $standalone;
-
- my $q = $self->{Quote}->{XMLDecl};
- my $nl = $self->{Newline};
-
- my $str = "<?xml";
- $str .= " version=$q$version$q" if defined $version;
- $str .= " encoding=$q$encoding$q" if defined $encoding;
- $str .= " standalone=$q$standalone$q" if defined $standalone;
- $str .= "?>$nl$nl";
-
- $self->print ($str);
- $self->{PrintedXmlDecl} = 1;
-}
-
-#
-# Prints the <xml ...?> declaration if it wasn't already printed
-# *and* the user wanted it to be printed (because s/he set $self->{Encoding})
-#
-sub flush_xml_decl
-{
- my ($self) = @_;
- return if $self->{PrintedXmlDecl};
-
- if (defined $self->{Encoding})
- {
- $self->xml_decl ({ Version => '1.0', Encoding => $self->{Encoding} });
- }
-
- # If it wasn't printed just now, it doesn't need to be printed at all,
- # so pretend we did print it.
- $self->{PrintedXmlDecl} = 1;
-}
-
-sub processing_instruction
-{
- my ($self, $event) = @_;
- $self->flush_xml_decl;
-
- my $escape = $self->{Escape}->{ProcessingInstruction};
-
- my $str = "<?" . $event->{Target} . " " .
- &$escape ($event->{Data}). "?>";
-
- $self->print ($str);
-}
-
-1; # package return code
-
-__END__
-
-=head1 NAME
-
-XML::Handler::Composer - Another XML printer/writer/generator
-
-=head1 SYNOPSIS
-
-use XML::Handler::Composer;
-
-my $composer = new XML::Handler::Composer ( [OPTIONS] );
-
-=head1 DESCRIPTION
-
-XML::Handler::Composer is similar to XML::Writer, XML::Handler::XMLWriter,
-XML::Handler::YAWriter etc. in that it generates XML output.
-
-This implementation may not be fast and it may not be the best solution for
-your particular problem, but it has some features that may be missing in the
-other implementations:
-
-=over 4
-
-=item * Supports every output encoding that L<XML::UM> supports
-
-L<XML::UM> supports every encoding for which there is a mapping file
-in the L<XML::Encoding> distribution.
-
-=item * Pretty printing
-
-When used with L<XML::Filter::Reindent>.
-
-=item * Fine control over which kind of quotes are used
-
-See options below.
-
-=item * Supports PerlSAX interface
-
-=back
-
-=head1 Constructor Options
-
-=over 4
-
-=item * EndWithNewline (Default: 1)
-
-Whether to print a newline at the end of the file (i.e. after the root element)
-
-=item * Newline (Default: undef)
-
-If defined, which newline to use for printing.
-(Note that XML::Parser etc. convert newlines into "\x0A".)
-
-If undef, newlines will not be converted and XML::Handler::Composer will
-use "\x0A" when printing.
-
-A value of "\n" will convert the internal newlines into the platform
-specific line separator.
-
-See the PreserveWS option in the characters event (below) for finer control
-over when newline conversion is active.
-
-=item * DocTypeIndent (Default: a Newline and 2 spaces)
-
-Newline plus indent that is used to separate lines inside the DTD.
-
-=item * IndentAttList (Default: 8 spaces)
-
-Indent used when printing an <!ATTLIST> declaration that has more than one
-attribute definition, e.g.
-
- <!ATTLIST my_elem
- attr1 CDATA "foo"
- attr2 CDATA "bar"
- >
-
-=item * Quote (Default: { XMLDecl => '"', Attr => '"', Entity => '"', SystemLiteral => '"' })
-
-Quote contains a reference to a hash that defines which quoting characters
-to use when printing XML declarations (XMLDecl), attribute values (Attr),
-<!ENTITY> values (Entity) and system/public literals (SystemLiteral)
-as found in <!DOCTYPE>, <!ENTITY> declarations etc.
-
-=item * PrintDefaultAttr (Default: 0)
-
-If 1, prints attribute values regardless of whether they are default
-attribute values (as defined in <!ATTLIST> declarations.)
-Normally, default attributes are not printed.
-
-=item * Encoding (Default: undef)
-
-Defines the output encoding (if specified.)
-Note that future calls to the xml_decl() handler may override this setting
-(if they contain an Encoding definition.)
-
-=item * EncodeUnmapped (Default: \&XML::UM::encode_unmapped_dec)
-
-Defines how Unicode characters not found in the mapping file (of the
-specified encoding) are printed.
-By default, they are converted to decimal entity references, like '{'
-
-Use \&XML::UM::encode_unmapped_hex for hexadecimal constants, like '«'
-
-=item * Print (Default: sub { print @_ }, which prints to stdout)
-
-The subroutine that is used to print the encoded XML output.
-The default prints the string to stdout.
-
-=back
-
-=head1 Method: get_compressed_element_suffix ($event)
-
-Override this method to support the different styles for printing
-empty elements in compressed notation, e.g. <p/>, <p></p>, <p />, <p>.
-
-The default returns "/>", which results in <p/>.
-Use " />" for XHTML style elements or ">" for certain HTML style elements.
-
-The $event parameter is the hash reference that was received from the
-start_element() handler.
-
-=head1 Extra PerlSAX event information
-
-XML::Handler::Composer relies on hints from previous SAX filters to
-format certain parts of the XML.
-These SAX filters (e.g. XML::Filter::Reindent) pass extra information by adding
-name/value pairs to the appropriate PerlSAX events (the events themselves are
-hash references.)
-
-=over 4
-
-=item * entity_reference: Parameter => 1
-
-If Parameter is 1, it means that it is a parameter entity reference.
-A parameter entity is referenced with %ent; instead of &ent; and the
-entity declaration starts with <!ENTITY % ent ...> instead of <!ENTITY ent ...>
-
-NOTE: This should be added to the PerlSAX interface!
-
-=item * start_element/end_element: Compress => 1
-
-If Compress is 1 in both the start_element and end_element event, the element
-will be printed in compressed form, e.g. <a/> instead of <a></a>.
-
-=item * start_element: PreserveWS => 1
-
-If newline conversion is active (i.e. Newline was defined in the constructor),
-then newlines will *NOT* be converted in text (character events) within this
-element.
-
-=item * attlist_decl: First, MoreFollow
-
-The First and MoreFollow options can be used to force successive <!ATTLIST>
-declarations for the same element to be merged, e.g.
-
- <!ATTLIST my_elem
- attr1 CDATA "foo"
- attr2 CDATA "bar"
- attr3 CDATA "quux"
- >
-
-In this example, the attlist_decl event for foo should contain
-(First => 1, MoreFollow => 1) and the event for bar should contain
-(MoreFollow => 1). The quux event should have no extra info.
-
-'First' indicates that the event is the first of a sequence.
-'MoreFollow' indicates that more events will follow in this sequence.
-
-If neither option is set by the preceding PerlSAX filter, each attribute
-definition will be printed as a separate <!ATTLIST> line.
-
-=back
-
-=head1 CAVEATS
-
-This code is highly experimental!
-It has not been tested well and the API may change.
-
-=head1 AUTHOR
-
-Send bug reports, hints, tips, suggestions to Enno Derksen at
-<F<enno@att.com>>.
-
-=cut