# HG changeset patch # User tahirm@symbian.org # Date 1243501803 -3600 # Node ID 02cd6b52f378c4d1f882dc05a75eede55030855a adding synch hg to p4 & create dummy foundation structure scripts diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/generate_dummytree.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/generate_dummytree.pl Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,729 @@ +#! perl + +# Read a Foundation system model, mapping file, and System_Definition.xml +# and generate a Perforce branchspec to reflect the code reorg + +use strict; + +use FindBin; +use lib "."; +use lib "./lib"; +use lib "$FindBin::Bin"; +use lib "$FindBin::Bin/lib"; +use XML::DOM; +#use XML::DOM::ValParser; + +# produces the "Use of uninitialized value in concatenation (.) or string" warning +use XML::XQL; +use XML::XQL::DOM; + +# Read the command line to get the filenames + +sub Usage($) + { + my ($reason) = @_; + + print "Usage: $reason\n" if ($reason); + print < [options] + +params: +-s XML version of Symbian System_definition +-m XML version of Foundation System Model + +options: +-o XML file showing unreferenced + parts of the System Model +-r Remove matched objects from -o output +-c Tab separated file showing the Schedule 12 + component for each MRP file + +USAGE_EOF + exit(1); + } + +use Getopt::Long; + +my $foundationmodel = "output_attr.xml"; +my $foundationdirs = "foundation_dirs.xml"; +my $systemdefinition = "variability/vp_data/templates/System_Definition_template.xml"; +my $rootdir = "."; +my $remove = 0; +my $cbrmappingfile = ""; + +Usage("Bad arguments") unless GetOptions( + 'm=s' => \$foundationmodel, + 's=s' => \$systemdefinition, + 'o=s' => \$rootdir, + 'c=s' => \$cbrmappingfile); + +Usage("Too many arguments") if (scalar @ARGV > 0); +Usage("Cannot find $foundationmodel") if (!-f $foundationmodel); + + +my $xmlParser = new XML::DOM::Parser; +XML::DOM::ignoreReadOnly(1); + +my $foundationpath = "."; +my $sysdefpath = "."; +$foundationpath = $1 if ($foundationmodel =~ /^(.+)\\[^\\]+$/); +$sysdefpath = $1 if ($systemdefinition =~ /^(.+)\\[^\\]+$/); +#$xmlParser->set_sgml_search_path($foundationpath, $sysdefpath); + +my $foundationXML = $xmlParser->parsefile ($foundationmodel); +chdir($rootdir); + +# Collect the Schedule12 entries, checking for duplicates + +my %sch12refs; +my %componenttype; +my ($foundation) = $foundationXML->getElementsByTagName("SystemDefinition"); +Usage("No in $foundationmodel ?") if (!defined $foundation); + +# Process the Foundation model to get the directory names + +my %unique_names; +my %partnames; +my %dirnames; +my %component_dirs; +my %old_component_mapping; +my %component_object; # reference to XML objects +my %mrp_mapping; + +sub process_foundation($$); # declare the prototype for recursive call +sub process_foundation($$) + { + my ($node,$level) = @_; + + my @children = $node->getChildNodes; + foreach my $child (@children) + { + if ($child->getNodeTypeName ne "ELEMENT_NODE") + { + # text and comments don't count + next; + } + if ($level == 0) + { + process_foundation($child,1); + next; + } + + next if ($child->getAttribute("contribution") eq "excluded"); + + my $tagname = $child->getTagName; + my $name = $child->getAttribute("name"); + my $longname = $child->getAttribute("long-name"); + $longname = $name if ($longname eq ""); + + if ($name ne "") + { + if (defined $unique_names{$name}) + { + print "** duplicated name $name\n"; + } + $unique_names{$name} = 1; + } + if ($name eq "") + { + printf "No name in %s\n", $child->toString(); + next; + } + + my $dirname = $name; + $dirname =~ s/\s+//g; # remove the spaces + $dirname =~ s/[\(\)]/_/g; # map troublesome characters + $dirname =~ s/[ \.]*$//g; # trailing spaces or dots + $partnames{$tagname} = $name; + $dirnames{$tagname} = $dirname; + + print "making directory $dirname\n" if ($level <2); + mkdir $dirname; # create the directory + + if ($tagname eq "component") + { + $child->printToFile("$dirname/component.txt"); + next; + } + + chdir $dirname; + if ($tagname eq "block") + { + # Create a fragment which describes this package + open PACKAGE_MODEL, ">package_model.xml"; + print PACKAGE_MODEL "\n"; + print PACKAGE_MODEL $child->toString(); + print PACKAGE_MODEL "\n"; + close PACKAGE_MODEL; + } + + process_foundation($child,$level+1); + chdir ".."; + } + } + +my ($model) = $foundationXML->getElementsByTagName("SystemDefinition"); +process_foundation($model,0); + +exit 0; + +# Dump the old component -> new component -> directory mapping + +foreach my $component (sort keys %old_component_mapping) + { + my $new_component = $old_component_mapping{$component}; + printf "%s => %s => %s\n", + $component, $new_component, $component_dirs{$new_component}; + } + +# Find the old component entries in the XML file + +my %branchspec; +my %reverse_branchspec; +my %primary_mrp; +my %otherroots; +my %ignoreme; + +sub add_to_branchspec($$;$$); +sub add_to_branchspec($$;$$) + { + my ($olddir,$newdir,$primary,$noexpansion) = @_; + $primary = "generate_branchspec.pl" if (!defined $primary); + + if (defined $ignoreme{$olddir} && $primary !~ /^extra root/) + { + print "Ignoring $olddir - $ignoreme{$olddir}\n"; + next; + } + if (defined $branchspec{$olddir}) + { + if ($newdir eq $branchspec{$olddir}) + { + # reasserting the previous branchspec - not a problem + return; + } + # trying to change the old mapping + print "$primary attempted to redefine $olddir mapping\n"; + print "Was $branchspec{$olddir} instead of $newdir\n"; + exit(1); + } + + if (defined $reverse_branchspec{$newdir}) + { + print "Branchspec collision from $primary into $newdir\n"; + print "Can't send $olddir and $reverse_branchspec{$newdir} to same place\n"; + exit(1); + } + + if (defined $otherroots{$olddir} && !$noexpansion) + { + print "Adjusting branchspec for $primary to include the other roots\n"; + my $otherolddir = $olddir; + $otherolddir =~ s/([^\/]+)\/$//; + my $maindir = $1; + add_to_branchspec("$olddir","$newdir$maindir/",$primary,1); # avoid recursion + + foreach my $otherdir (split /\//, $otherroots{$olddir}) + { + next if (length($otherdir) == 0); + add_to_branchspec("$otherolddir$otherdir/","$newdir$otherdir/","extra root of $primary",1); + } + } + else + { + $branchspec{$olddir} = $newdir; + $reverse_branchspec{$newdir} = $olddir; + $primary_mrp{$olddir} = $primary; + } + } + +# Workaround for the common/product and cedar/product directories, which don't +# have associated CBR components + +add_to_branchspec("common/product/", "ostools/toolsandutils/ToolsandUtils/product/"); +add_to_branchspec("cedar/product/", "ostools/toolsandutils/ToolsandUtils/cedarproduct/"); + +# Add catchall mappings to get all do the other odds and ends +# LHS must be more specific than a single directory, otherwise apply_branchspec hits too many things +# RHS must be short, to avoid blowing the Windows path limit when syncing TBAS builds +add_to_branchspec("common/generic/", "os/unref/orphan/comgen/", "(Orphans)"); +add_to_branchspec("common/techview/", "os/unref/orphan/comtv/", "(Orphans)"); +add_to_branchspec("common/testtools/", "os/unref/orphan/comtt/", "(Orphans)"); +add_to_branchspec("common/connectqi/", "os/unref/orphan/comqi/", "(Orphans)"); +add_to_branchspec("cedar/generic/", "os/unref/orphan/cedgen/", "(Orphans)"); + +my @clumps = ( + "cedar/generic/base/e32/", + "cedar/generic/base/f32/", + "common/generic/comms-infras/esock/", + "common/generic/multimedia/ecam/", + "common/generic/multimedia/icl/", + "common/generic/multimedia/mmf/", + "common/generic/j2me/", # not really a clump, but must be called "j2me" + "common/generic/telephony/trp/", + "common/generic/security/caf2/test/", + "common/generic/networking/dialog/", + "common/generic/comms-infras/commsdat/", + "common/generic/connectivity/legacy/PLP/", # plpvariant shares PLPInc main PLP group + "common/testtools/ResourceHandler/", # entangled versions for Techview, UIQ and S60 +); + +# Force E32 into a faintly sensible place + +add_to_branchspec("cedar/generic/base/e32/", "os/kernelhwsrv/kernel/eka/", "(Hand coded E32 location)"); + +# Force j2me to be called j2me + +add_to_branchspec("common/generic/j2me/", "app/java/midpprofile/midpmidlet/j2me/", "(Hand coded J2ME location)"); + +# Peer relationships if x uses "..\y", then add this as $peers{"x"} = "y" + +my %peers; +$peers{"cedar/generic/tools/e32toolp/"} = "cedar/generic/tools/buildsystem/"; + +# multirooted components, which own several trees that have no common root +# Add these to the branchspec automatically alongside the root containing the MRP file + +$otherroots{"common/generic/networking/inhook6/"} = "inhook6example"; +$otherroots{"common/generic/networking/examplecode/"} = "anvltest/cgi/ping/udpecho/udpsend/webserver"; +$otherroots{"common/generic/networking/qos/"} = "qostest/QoSTesting"; +$otherroots{"common/generic/wap-stack/wapstack/"} = "documentation/confidential"; +$otherroots{"common/generic/bluetooth/latest/bluetooth/test/"} = "example/testui"; + + +my %hasbldfile; + +my %foundationrefs; +my %foundationbymrp; +my %modelnames; +sub match_names($); # declare the prototype for recursive call +sub match_names($) + { + my ($node) = @_; + + my @children = $node->getChildNodes; + foreach my $child (@children) + { + if ($child->getNodeTypeName ne "ELEMENT_NODE") + { + # text and comments don't count + next; + } + my $tagname = $child->getTagName; + if ($tagname eq "layer") + { + $partnames{"block"} = undef; + $partnames{"subblock"} = undef; + $partnames{"collection"} = undef; + } + if ($tagname eq "block") + { + $partnames{"subblock"} = undef; + $partnames{"collection"} = undef; + } + if ($tagname eq "subblock") + { + $partnames{"collection"} = undef; + } + if ($tagname eq "unit") + { + # units are the payload + + my $mrp = $child->getAttribute("mrp"); + $mrp =~ s/\\/\//g; # ensure that / separators are used + $child->setAttribute("mrp",$mrp); + + my $blockname = $partnames{"subblock"}; + $blockname = $partnames{"block"} if (!defined $blockname); # no subblock + $blockname = "Misc" if (!defined $blockname); # no block either + my $old_component = join("::", + $partnames{"layer"}, $blockname, + $partnames{"collection"},$partnames{"component"}); + + # find corresponding new component + + my $new_component; + + if (defined $mrp_mapping{$mrp}) + { + $new_component = $mrp_mapping{$mrp}; + my $othermapping = $old_component_mapping{$old_component}; + if (defined $othermapping && $othermapping eq $new_component) + { + # they agree - lovely. + } + else + { + print "MRP mapping $mrp -> $new_component, disagrees with $old_component mapping\n"; + } + delete $component_object{$new_component}; + } + if (!defined $new_component) + { + $new_component = $old_component_mapping{$old_component}; + } + if (!defined $new_component) + { + # Some "old_package" information is incorrect - scan for a close match + # Strategy 1 - match collection::component + my $tail = join ("::", $partnames{"collection"},$partnames{"component"}); + my $len = 0-length($tail); + + foreach my $guess (keys %old_component_mapping) + { + if (substr($guess,$len) eq $tail) + { + print "Guessed that $old_component should be $guess\n"; + $new_component = $old_component_mapping{$guess}; + last; + } + } + } + if (!defined $new_component) + { + # Some "old_package" information is incorrect - scan for a close match + # Strategy 2 - just match the component name, + # truncate after last / e.g. GPRS/UMTS QoS Framework => UMTS QoS Framework + my $tail = "::".$partnames{"component"}; + $tail =~ s/^.*\/([^\/]*)$/$1/; + my $len = 0-length($tail); + + foreach my $guess (keys %old_component_mapping) + { + if (substr($guess,$len) eq $tail) + { + print "Guessed that $old_component should be $guess\n"; + $new_component = $old_component_mapping{$guess}; + last; + } + } + } + if (!defined $new_component) + { + print "Rescuing unreferenced $old_component\n"; + # later we will infer the new_component directory from the mrp + } + else + { + if (!defined $mrp_mapping{$mrp}) + { + # Copy the unit into the Foundation model (we'll fix it later) + + my $foundation_comp = $component_object{$new_component}; + $node->removeChild($child); + $child->setOwnerDocument($foundation_comp->getOwnerDocument); + $foundation_comp->addText("\n "); + $foundation_comp->appendChild($child); + $foundation_comp->addText("\n "); + delete $component_object{$new_component}; # remove items after processing + } + } + + # determine the root of the component source tree from the mrp attribute + + if ($mrp =~ /^\//) + { + print "Skipping absolute MRP $mrp in $old_component\n"; + next; + } + + my $current_dir = $mrp; + $current_dir =~ s-/[^/]+$-/-; # remove filename; + + # tree structure special cases + $current_dir =~ s-/sms/multimode/Group/-/sms/-; + $current_dir =~ s-/agendaserver/TestAgendaSrv/-/agendaserver/-; + $current_dir =~ s-/alarmserver/TestAlarmSrv/-/alarmserver/-; + $current_dir =~ s-/trace/ulogger/group/-/trace/-; + $current_dir =~ s-/ucc/BuildScripts/group/-/ucc/-; + $current_dir =~ s-/worldserver/TestWorldSrv/-/worldserver/-; + $current_dir =~ s-/adapters/devman/Group/-/adapters/-; # avoid collision with syncml/devman + $current_dir =~ s-/mobiletv/hai/dvbh/group/-/mobiletv/-; + $current_dir =~ s-/plpgrp/-/-i; # connectivity/legacy/PLP/plpgrp + $current_dir =~ s-/(h2|h4)/.*$-/-i; # various baseports + + # more generic cases + $current_dir =~ s-/group/.*$-/-i; # group (& subdirs) + $current_dir =~ s-/group[^/]+/.*$-/-i; # groupsql, groupfuture (& subdirs) - cntmodel, s60 header compat + $current_dir =~ s-/mmpfiles/-/-i; # comp/mmpfiles + + # apply clumping rules + + foreach my $clump (@clumps) + { + if (substr($current_dir,0,length($clump)) eq $clump) + { + print "$mrp is part of the component group rooted at $clump\n"; + $current_dir = $clump; + last; + } + } + + # check for inseparable components + my $new_dir; + my $primary; + my $set_peer_directory = 0; + + if (defined $branchspec{$current_dir}) + { + $primary = $primary_mrp{$current_dir}; + print "Cannot separate $mrp from $primary\n"; + $new_dir = $branchspec{$current_dir}; # use the directory for the other component + } + elsif (defined $peers{$current_dir}) + { + # apply peering rules + my $peer = $peers{$current_dir}; + + if (defined $branchspec{$peer}) + { + # peer already defined - adjust our mapping + $new_dir = $branchspec{$peer}; + $new_dir =~ s/[^\/]+\/$//; + $current_dir =~ m/([^\/]+\/)$/; + $new_dir .= $1; + print "Mapping $mrp to $new_dir to be next to peer $peer\n"; + $primary = $mrp; + } + else + { + # we are the first to appear, so we determine the directory + $set_peer_directory = 1; + } + } + + if (!defined $new_dir) + { + if (defined $new_component) + { + $new_dir = $component_dirs{$new_component}; + } + else + { + $new_dir = "os/unref/$current_dir"; + $new_dir =~ s/common\/generic/comgen/; + $new_dir =~ s/common\/techview/comtv/; + $new_dir =~ s/common\/testtools/comtt/; + $new_dir =~ s/common\/connectqi/comqi/; + $new_dir =~ s/common\/developerlibrary/devlib/; + $new_dir =~ s/cedar\/generic/cedgen/; + } + $primary = $mrp; + } + + # Update the mrp attribute + + substr($mrp,0,length($current_dir)) = $new_dir; + # $child->setAttribute("mrp",$mrp); + + # update the bldFile attribute, if any + my $bldFile = $child->getAttribute("bldFile"); + if ($bldFile) + { + $bldFile =~ s/\\/\//g; # ensure that / separators are used + $child->setAttribute("bldFile",$bldFile); + $hasbldfile{$current_dir} = 1; + my $saved_bldFile = $bldFile; + $bldFile .= "/" if ($bldFile !~ /\/$/); # add trailing / + my $previous = substr($bldFile,0,length($current_dir),$new_dir); + if ($previous ne $current_dir) + { + print "*** $old_component bldFile=$saved_bldFile not in $current_dir\n"; + } + else + { + $bldFile =~ s/\/+$//; # remove trailing / + # $child->setAttribute("bldFile",$bldFile); + } + } + + add_to_branchspec($current_dir, $new_dir, $primary); + + if ($set_peer_directory) + { + # peer mapping implied by our mapping + my $peer = $peers{$current_dir}; + $new_dir =~ s/[^\/]+\/$//; + $peer =~ m/([^\/]+\/)$/; + $new_dir .= $1; + print "Implied mapping $peer to $new_dir to be next to $mrp\n"; + add_to_branchspec($peer, $new_dir, "$mrp (peer)"); + } + + next; + } + my $name = $child->getAttribute("name"); + $partnames{$tagname} = $name; + match_names($child); + } + } + +foreach my $missing (sort keys %component_object) + { + print "No mapping found for Symbian-derived component $missing\n"; + } + +# Output Perforce branchspec, taking care to "subtract" the +# places where a subtree is branched to a different place + +my $from = "//epoc/release/9.4"; +my $to = "//epoc/development/personal/williamro/seaside/31"; +my %processed; + +printf "\n\n========== branchspec with %d elements\n", scalar keys %branchspec; + +foreach my $olddir (sort keys %branchspec) + { + my $comment = $hasbldfile{$olddir} ? "" : "\t# src"; + + my $subtraction = ""; + my @parts = split /\//, $olddir; + my $root = ""; + while (@parts) + { + my $part = shift @parts; + $root .= "$part/"; + if (defined $processed{$root}) + { + # Found a containing tree + my $remainder = join("/",@parts); + $subtraction = sprintf "\t-$from/%s%s/... $to/%s%s/...\n", + $root, $remainder, $branchspec{$root}, $remainder; + # continue in case there is a containing sub-subtree. + } + } + print $subtraction; # usually empty + printf "\t$from/%s... $to/%s...%s\n", $olddir, $branchspec{$olddir},$comment; + $processed{$olddir} = 1; + } + +exit(0); + +# Report on the accuracy of Schedule 12 +print STDERR "\n"; +my @allnames = (); +my $unmatched = 0; +foreach my $name (sort keys %sch12refs) + { + next if (defined $modelnames{$name}); + push @allnames, "$name\t(Sch12 $foundationrefs{$name})\n"; + print STDERR "No match for $name (associated with $foundationrefs{$name})\n"; + $unmatched += 1; + } +if ($unmatched == 0) + { + print STDERR "All Schedule 12 entries matched in System Model\n"; + } +else + { + printf STDERR "%d Schedule 12 entry references not matched (from a total of %d)\n", $unmatched, scalar keys %sch12refs; + } + +# Remove the matched elements to leave the unmatched parts, +# and accumulate the MRP files for each Sch12 component + +my %sch12bymrp; +my %locationbymrp; + +sub list_mrps($$$); # declare the prototype for recursive call +sub list_mrps($$$) + { + my ($node,$location,$foundationname) = @_; + my @children = $node->getChildNodes; + my $nodename = $node->getAttribute("name"); + + my $sublocation = $nodename; + $sublocation = "$location/$nodename" if ($location ne ""); + + foreach my $child (@children) + { + if ($child->getNodeTypeName ne "ELEMENT_NODE") + { + # text and comments don't count + next; + } + my $tagname = $child->getTagName; + if ($tagname eq "unit" || $tagname eq "package" || $tagname eq "prebuilt") + { + # these elements have the mrp information, but no substructure + my $mrp = $child->getAttribute("mrp"); + $mrp = $1 if ($mrp =~ /\\([^\\]+)\.mrp$/i); + $foundationbymrp{$mrp} = $foundationname; + $locationbymrp{$mrp} = "$location\t$nodename"; + next; + } + my $submatch = $child->getAttribute("MATCHED"); + if ($submatch) + { + list_mrps($child,$sublocation,$submatch); + } + else + { + list_mrps($child,$sublocation,$foundationname); + } + } + } + +sub delete_matched($$); # declare the prototype for recursive call +sub delete_matched($$) + { + my ($node, $location) = @_; + my $nodename = $node->getAttribute("name"); + + my $sublocation = $nodename; + $sublocation = "$location/$nodename" if ($location ne ""); + + my @children = $node->getChildNodes; + return 0 if (scalar @children == 0); + my $now_empty = 1; + foreach my $child (@children) + { + if ($child->getNodeTypeName ne "ELEMENT_NODE") + { + # text and comments don't count + next; + } + my $foundationname = $child->getAttribute("MATCHED"); + if ($foundationname) + { + list_mrps($child, $sublocation, $foundationname); + $node->removeChild($child) if ($remove); + } + else + { + if (delete_matched($child,$sublocation) == 1) + { + # Child was empty and can be removed + $node->removeChild($child) if ($remove); + } + else + { + list_mrps($child, $sublocation, "*UNREFERENCED*"); + $now_empty = 0; # something left in due to this child + } + } + } + return $now_empty; + } + +# scan the tagged model, recording various details as a side-effect + +my $allgone = delete_matched($model,""); + +if ($cbrmappingfile ne "") + { + $componenttype{"*UNREFERENCED*"} = "??"; + open CBRMAP, ">$cbrmappingfile" or die("Unable to write to $cbrmappingfile: $!\n"); + foreach my $mrp (sort keys %sch12bymrp) + { + my $component = $foundationbymrp{$mrp}; + my $comptype = $componenttype{$component}; + my $location = $locationbymrp{$mrp}; + print CBRMAP "$mrp\t$location\t$component\t$comptype\n"; + } + close CBRMAP; + print STDERR "MRP -> Schedule 12 mapping written to $cbrmappingfile\n"; + } + +exit 0; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Date/Manip.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Date/Manip.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,7362 @@ +package Date::Manip; +# Copyright (c) 1995-2003 Sullivan Beck. All rights reserved. +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. + +########################################################################### +########################################################################### + +use vars qw($OS %Lang %Holiday %Events %Curr %Cnf %Zone $VERSION @ISA @EXPORT); + +# Determine the type of OS... +$OS="Unix"; +$OS="Windows" if ((defined $^O and + $^O =~ /MSWin32/i || + $^O =~ /Windows_95/i || + $^O =~ /Windows_NT/i) || + (defined $ENV{OS} and + $ENV{OS} =~ /MSWin32/i || + $ENV{OS} =~ /Windows_95/i || + $ENV{OS} =~ /Windows_NT/i)); +$OS="Netware" if (defined $^O and + $^O =~ /NetWare/i); +$OS="Mac" if ((defined $^O and + $^O =~ /MacOS/i) || + (defined $ENV{OS} and + $ENV{OS} =~ /MacOS/i)); +$OS="MPE" if (defined $^O and + $^O =~ /MPE/i); +$OS="OS2" if (defined $^O and + $^O =~ /os2/i); +$OS="VMS" if (defined $^O and + $^O =~ /VMS/i); + +# Determine if we're doing taint checking +$Date::Manip::NoTaint = eval { local $^W; unlink "$^X$^T"; 1 }; + +########################################################################### +# CUSTOMIZATION +########################################################################### +# +# See the section of the POD documentation section CUSTOMIZING DATE::MANIP +# below for a complete description of each of these variables. + + +# Location of a the global config file. Tilde (~) expansions are allowed. +# This should be set in Date_Init arguments. +$Cnf{"GlobalCnf"}=""; +$Cnf{"IgnoreGlobalCnf"}=""; + +# Name of a personal config file and the path to search for it. Tilde (~) +# expansions are allowed. This should be set in Date_Init arguments or in +# the global config file. + +@Date::Manip::DatePath=(); +if ($OS eq "Windows") { + $Cnf{"PathSep"} = ";"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = "."; + +} elsif ($OS eq "Netware") { + $Cnf{"PathSep"} = ";"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = "."; + +} elsif ($OS eq "MPE") { + $Cnf{"PathSep"} = ":"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = "."; + +} elsif ($OS eq "OS2") { + $Cnf{"PathSep"} = ":"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = "."; + +} elsif ($OS eq "Mac") { + $Cnf{"PathSep"} = ":"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = "."; + +} elsif ($OS eq "VMS") { + # VMS doesn't like files starting with "." + $Cnf{"PathSep"} = "\n"; + $Cnf{"PersonalCnf"} = "Manip.cnf"; + $Cnf{"PersonalCnfPath"} = ".\n~"; + +} else { + # Unix + $Cnf{"PathSep"} = ":"; + $Cnf{"PersonalCnf"} = ".DateManip.cnf"; + $Cnf{"PersonalCnfPath"} = ".:~"; + @Date::Manip::DatePath=qw(/bin /usr/bin /usr/local/bin); +} + +### Date::Manip variables set in the global or personal config file + +# Which language to use when parsing dates. +$Cnf{"Language"}="English"; + +# 12/10 = Dec 10 (US) or Oct 12 (anything else) +$Cnf{"DateFormat"}="US"; + +# Local timezone +$Cnf{"TZ"}=""; + +# Timezone to work in (""=local, "IGNORE", or a timezone) +$Cnf{"ConvTZ"}=""; + +# Date::Manip internal format (0=YYYYMMDDHH:MN:SS, 1=YYYYHHMMDDHHMNSS) +$Cnf{"Internal"}=0; + +# First day of the week (1=monday, 7=sunday). ISO 8601 says monday. +$Cnf{"FirstDay"}=1; + +# First and last day of the work week (1=monday, 7=sunday) +$Cnf{"WorkWeekBeg"}=1; +$Cnf{"WorkWeekEnd"}=5; + +# If non-nil, a work day is treated as 24 hours long (WorkDayBeg/WorkDayEnd +# ignored) +$Cnf{"WorkDay24Hr"}=0; + +# Start and end time of the work day (any time format allowed, seconds +# ignored) +$Cnf{"WorkDayBeg"}="08:00"; +$Cnf{"WorkDayEnd"}="17:00"; + +# If "today" is a holiday, we look either to "tomorrow" or "yesterday" for +# the nearest business day. By default, we'll always look "tomorrow" +# first. +$Cnf{"TomorrowFirst"}=1; + +# Erase the old holidays +$Cnf{"EraseHolidays"}=""; + +# Set this to non-zero to be produce completely backwards compatible deltas +$Cnf{"DeltaSigns"}=0; + +# If this is 0, use the ISO 8601 standard that Jan 4 is in week 1. If 1, +# make week 1 contain Jan 1. +$Cnf{"Jan1Week1"}=0; + +# 2 digit years fall into the 100 year period given by [ CURR-N, +# CURR+(99-N) ] where N is 0-99. Default behavior is 89, but other useful +# numbers might be 0 (forced to be this year or later) and 99 (forced to be +# this year or earlier). It can also be set to "c" (current century) or +# "cNN" (i.e. c18 forces the year to bet 1800-1899). Also accepts the +# form cNNNN to give the 100 year period NNNN to NNNN+99. +$Cnf{"YYtoYYYY"}=89; + +# Set this to 1 if you want a long-running script to always update the +# timezone. This will slow Date::Manip down. Read the POD documentation. +$Cnf{"UpdateCurrTZ"}=0; + +# Use an international character set. +$Cnf{"IntCharSet"}=0; + +# Use this to force the current date to be set to this: +$Cnf{"ForceDate"}=""; + +########################################################################### + +require 5.000; +require Exporter; +@ISA = qw(Exporter); +@EXPORT = qw( + DateManipVersion + Date_Init + ParseDateString + ParseDate + ParseRecur + Date_Cmp + DateCalc + ParseDateDelta + UnixDate + Delta_Format + Date_GetPrev + Date_GetNext + Date_SetTime + Date_SetDateField + Date_IsHoliday + Events_List + + Date_DaysInMonth + Date_DayOfWeek + Date_SecsSince1970 + Date_SecsSince1970GMT + Date_DaysSince1BC + Date_DayOfYear + Date_DaysInYear + Date_WeekOfYear + Date_LeapYear + Date_DaySuffix + Date_ConvTZ + Date_TimeZone + Date_IsWorkDay + Date_NextWorkDay + Date_PrevWorkDay + Date_NearestWorkDay + Date_NthDayOfYear +); +use strict; +use integer; +use Carp; + +use IO::File; + +$VERSION="5.42"; + +######################################################################## +######################################################################## + +$Curr{"InitLang"} = 1; # Whether a language is being init'ed +$Curr{"InitDone"} = 0; # Whether Init_Date has been called +$Curr{"InitFilesRead"} = 0; +$Curr{"ResetWorkDay"} = 1; +$Curr{"Debug"} = ""; +$Curr{"DebugVal"} = ""; + +$Holiday{"year"} = 0; +$Holiday{"dates"} = {}; +$Holiday{"desc"} = {}; + +$Events{"raw"} = []; +$Events{"parsed"} = 0; +$Events{"dates"} = []; +$Events{"recur"} = []; + +######################################################################## +######################################################################## +# THESE ARE THE MAIN ROUTINES +######################################################################## +######################################################################## + +# Get rid of a problem with old versions of perl +no strict "vars"; +# This sorts from longest to shortest element +sub sortByLength { + return (length $b <=> length $a); +} +use strict "vars"; + +sub DateManipVersion { + print "DEBUG: DateManipVersion\n" if ($Curr{"Debug"} =~ /trace/); + return $VERSION; +} + +sub Date_Init { + print "DEBUG: Date_Init\n" if ($Curr{"Debug"} =~ /trace/); + $Curr{"Debug"}=""; + + my(@args)=@_; + $Curr{"InitDone"}=1; + local($_)=(); + my($internal,$firstday)=(); + my($var,$val,$file,@tmp)=(); + + # InitFilesRead = 0 : no conf files read yet + # 1 : global read, no personal read + # 2 : personal read + + $Cnf{"EraseHolidays"}=0; + foreach (@args) { + s/\s*$//; + s/^\s*//; + /^(\S+) \s* = \s* (.+)$/x; + ($var,$val)=($1,$2); + if ($var =~ /^GlobalCnf$/i) { + $Cnf{"GlobalCnf"}=$val; + if ($val) { + $Curr{"InitFilesRead"}=0; + &EraseHolidays(); + } + } elsif ($var =~ /^PathSep$/i) { + $Cnf{"PathSep"}=$val; + } elsif ($var =~ /^PersonalCnf$/i) { + $Cnf{"PersonalCnf"}=$val; + $Curr{"InitFilesRead"}=1 if ($Curr{"InitFilesRead"}==2); + } elsif ($var =~ /^PersonalCnfPath$/i) { + $Cnf{"PersonalCnfPath"}=$val; + $Curr{"InitFilesRead"}=1 if ($Curr{"InitFilesRead"}==2); + } elsif ($var =~ /^IgnoreGlobalCnf$/i) { + $Curr{"InitFilesRead"}=1 if ($Curr{"InitFilesRead"}==0); + $Cnf{"IgnoreGlobalCnf"}=1; + } elsif ($var =~ /^EraseHolidays$/i) { + &EraseHolidays(); + } else { + push(@tmp,$_); + } + } + @args=@tmp; + + # Read global config file + if ($Curr{"InitFilesRead"}<1 && ! $Cnf{"IgnoreGlobalCnf"}) { + $Curr{"InitFilesRead"}=1; + + if ($Cnf{"GlobalCnf"}) { + $file=&ExpandTilde($Cnf{"GlobalCnf"}); + &Date_InitFile($file) if ($file); + } + } + + # Read personal config file + if ($Curr{"InitFilesRead"}<2) { + $Curr{"InitFilesRead"}=2; + + if ($Cnf{"PersonalCnf"} and $Cnf{"PersonalCnfPath"}) { + $file=&SearchPath($Cnf{"PersonalCnf"},$Cnf{"PersonalCnfPath"},"r"); + &Date_InitFile($file) if ($file); + } + } + + foreach (@args) { + s/\s*$//; + s/^\s*//; + /^(\S+) \s* = \s* (.*)$/x; + ($var,$val)=($1,$2); + $val="" if (! defined $val); + &Date_SetConfigVariable($var,$val); + } + + confess "ERROR: Unknown FirstDay in Date::Manip.\n" + if (! &IsInt($Cnf{"FirstDay"},1,7)); + confess "ERROR: Unknown WorkWeekBeg in Date::Manip.\n" + if (! &IsInt($Cnf{"WorkWeekBeg"},1,7)); + confess "ERROR: Unknown WorkWeekEnd in Date::Manip.\n" + if (! &IsInt($Cnf{"WorkWeekEnd"},1,7)); + confess "ERROR: Invalid WorkWeek in Date::Manip.\n" + if ($Cnf{"WorkWeekEnd"} <= $Cnf{"WorkWeekBeg"}); + + my(%lang, + $tmp,%tmp,$tmp2,@tmp2, + $i,$j,@tmp3, + $zonesrfc,@zones)=(); + + my($L)=$Cnf{"Language"}; + + if ($Curr{"InitLang"}) { + $Curr{"InitLang"}=0; + + if ($L eq "English") { + &Date_Init_English(\%lang); + + } elsif ($L eq "French") { + &Date_Init_French(\%lang); + + } elsif ($L eq "Swedish") { + &Date_Init_Swedish(\%lang); + + } elsif ($L eq "German") { + &Date_Init_German(\%lang); + + } elsif ($L eq "Polish") { + &Date_Init_Polish(\%lang); + + } elsif ($L eq "Dutch" || + $L eq "Nederlands") { + &Date_Init_Dutch(\%lang); + + } elsif ($L eq "Spanish") { + &Date_Init_Spanish(\%lang); + + } elsif ($L eq "Portuguese") { + &Date_Init_Portuguese(\%lang); + + } elsif ($L eq "Romanian") { + &Date_Init_Romanian(\%lang); + + } elsif ($L eq "Italian") { + &Date_Init_Italian(\%lang); + + } elsif ($L eq "Russian") { + &Date_Init_Russian(\%lang); + + } elsif ($L eq "Turkish") { + &Date_Init_Turkish(\%lang); + + } elsif ($L eq "Danish") { + &Date_Init_Danish(\%lang); + + } else { + confess "ERROR: Unknown language in Date::Manip.\n"; + } + + # variables for months + # Month = "(jan|january|feb|february ... )" + # MonL = [ "Jan","Feb",... ] + # MonthL = [ "January","February", ... ] + # MonthH = { "january"=>1, "jan"=>1, ... } + + $Lang{$L}{"MonthH"}={}; + $Lang{$L}{"MonthL"}=[]; + $Lang{$L}{"MonL"}=[]; + &Date_InitLists([$lang{"month_name"}, + $lang{"month_abb"}], + \$Lang{$L}{"Month"},"lc,sort,back", + [$Lang{$L}{"MonthL"}, + $Lang{$L}{"MonL"}], + [$Lang{$L}{"MonthH"},1]); + + # variables for day of week + # Week = "(mon|monday|tue|tuesday ... )" + # WL = [ "M","T",... ] + # WkL = [ "Mon","Tue",... ] + # WeekL = [ "Monday","Tudesday",... ] + # WeekH = { "monday"=>1,"mon"=>1,"m"=>1,... } + + $Lang{$L}{"WeekH"}={}; + $Lang{$L}{"WeekL"}=[]; + $Lang{$L}{"WkL"}=[]; + $Lang{$L}{"WL"}=[]; + &Date_InitLists([$lang{"day_name"}, + $lang{"day_abb"}], + \$Lang{$L}{"Week"},"lc,sort,back", + [$Lang{$L}{"WeekL"}, + $Lang{$L}{"WkL"}], + [$Lang{$L}{"WeekH"},1]); + &Date_InitLists([$lang{"day_char"}], + "","lc", + [$Lang{$L}{"WL"}], + [\%tmp,1]); + %{ $Lang{$L}{"WeekH"} } = + (%{ $Lang{$L}{"WeekH"} },%tmp); + + # variables for last + # Last = "(last)" + # LastL = [ "last" ] + # Each = "(each)" + # EachL = [ "each" ] + # variables for day of month + # DoM = "(1st|first ... 31st)" + # DoML = [ "1st","2nd",... "31st" ] + # DoMH = { "1st"=>1,"first"=>1, ... "31st"=>31 } + # variables for week of month + # WoM = "(1st|first| ... 5th|last)" + # WoMH = { "1st"=>1, ... "5th"=>5,"last"=>-1 } + + $Lang{$L}{"LastL"}=$lang{"last"}; + &Date_InitStrings($lang{"last"}, + \$Lang{$L}{"Last"},"lc,sort"); + + $Lang{$L}{"EachL"}=$lang{"each"}; + &Date_InitStrings($lang{"each"}, + \$Lang{$L}{"Each"},"lc,sort"); + + $Lang{$L}{"DoMH"}={}; + $Lang{$L}{"DoML"}=[]; + &Date_InitLists([$lang{"num_suff"}, + $lang{"num_word"}], + \$Lang{$L}{"DoM"},"lc,sort,back,escape", + [$Lang{$L}{"DoML"}, + \@tmp], + [$Lang{$L}{"DoMH"},1]); + + @tmp=(); + foreach $tmp (keys %{ $Lang{$L}{"DoMH"} }) { + $tmp2=$Lang{$L}{"DoMH"}{$tmp}; + if ($tmp2<6) { + $Lang{$L}{"WoMH"}{$tmp} = $tmp2; + push(@tmp,$tmp); + } + } + foreach $tmp (@{ $Lang{$L}{"LastL"} }) { + $Lang{$L}{"WoMH"}{$tmp} = -1; + push(@tmp,$tmp); + } + &Date_InitStrings(\@tmp,\$Lang{$L}{"WoM"}, + "lc,sort,back,escape"); + + # variables for AM or PM + # AM = "(am)" + # PM = "(pm)" + # AmPm = "(am|pm)" + # AMstr = "AM" + # PMstr = "PM" + + &Date_InitStrings($lang{"am"},\$Lang{$L}{"AM"},"lc,sort,escape"); + &Date_InitStrings($lang{"pm"},\$Lang{$L}{"PM"},"lc,sort,escape"); + &Date_InitStrings([ @{$lang{"am"}},@{$lang{"pm"}} ],\$Lang{$L}{"AmPm"}, + "lc,back,sort,escape"); + $Lang{$L}{"AMstr"}=$lang{"am"}[0]; + $Lang{$L}{"PMstr"}=$lang{"pm"}[0]; + + # variables for expressions used in parsing deltas + # Yabb = "(?:y|yr|year|years)" + # Mabb = similar for months + # Wabb = similar for weeks + # Dabb = similar for days + # Habb = similar for hours + # MNabb = similar for minutes + # Sabb = similar for seconds + # Repl = { "abb"=>"replacement" } + # Whenever an abbreviation could potentially refer to two different + # strings (M standing for Minutes or Months), the abbreviation must + # be listed in Repl instead of in the appropriate Xabb values. This + # only applies to abbreviations which are substrings of other values + # (so there is no confusion between Mn and Month). + + &Date_InitStrings($lang{"years"} ,\$Lang{$L}{"Yabb"}, "lc,sort"); + &Date_InitStrings($lang{"months"} ,\$Lang{$L}{"Mabb"}, "lc,sort"); + &Date_InitStrings($lang{"weeks"} ,\$Lang{$L}{"Wabb"}, "lc,sort"); + &Date_InitStrings($lang{"days"} ,\$Lang{$L}{"Dabb"}, "lc,sort"); + &Date_InitStrings($lang{"hours"} ,\$Lang{$L}{"Habb"}, "lc,sort"); + &Date_InitStrings($lang{"minutes"},\$Lang{$L}{"MNabb"},"lc,sort"); + &Date_InitStrings($lang{"seconds"},\$Lang{$L}{"Sabb"}, "lc,sort"); + $Lang{$L}{"Repl"}={}; + &Date_InitHash($lang{"replace"},undef,"lc",$Lang{$L}{"Repl"}); + + # variables for special dates that are offsets from now + # Now = "(now|today)" + # Offset = "(yesterday|tomorrow)" + # OffsetH = { "yesterday"=>"-0:0:0:1:0:0:0",... ] + # Times = "(noon|midnight)" + # TimesH = { "noon"=>"12:00:00","midnight"=>"00:00:00" } + # SepHM = hour/minute separator + # SepMS = minute/second separator + # SepSS = second/fraction separator + + $Lang{$L}{"TimesH"}={}; + &Date_InitHash($lang{"times"}, + \$Lang{$L}{"Times"},"lc,sort,back", + $Lang{$L}{"TimesH"}); + &Date_InitStrings($lang{"now"},\$Lang{$L}{"Now"},"lc,sort"); + $Lang{$L}{"OffsetH"}={}; + &Date_InitHash($lang{"offset"}, + \$Lang{$L}{"Offset"},"lc,sort,back", + $Lang{$L}{"OffsetH"}); + $Lang{$L}{"SepHM"}=$lang{"sephm"}; + $Lang{$L}{"SepMS"}=$lang{"sepms"}; + $Lang{$L}{"SepSS"}=$lang{"sepss"}; + + # variables for time zones + # zones = regular expression with all zone names (EST) + # n2o = a hash of all parsable zone names with their offsets + # tzones = reguar expression with all tzdata timezones (US/Eastern) + # tz2z = hash of all tzdata timezones to full timezone (EST#EDT) + + $zonesrfc= + "idlw -1200 ". # International Date Line West + "nt -1100 ". # Nome + "hst -1000 ". # Hawaii Standard + "cat -1000 ". # Central Alaska + "ahst -1000 ". # Alaska-Hawaii Standard + "akst -0900 ". # Alaska Standard + "yst -0900 ". # Yukon Standard + "hdt -0900 ". # Hawaii Daylight + "akdt -0800 ". # Alaska Daylight + "ydt -0800 ". # Yukon Daylight + "pst -0800 ". # Pacific Standard + "pdt -0700 ". # Pacific Daylight + "mst -0700 ". # Mountain Standard + "mdt -0600 ". # Mountain Daylight + "cst -0600 ". # Central Standard + "cdt -0500 ". # Central Daylight + "est -0500 ". # Eastern Standard + "act -0500 ". # Brazil, Acre + "sat -0400 ". # Chile + "bot -0400 ". # Bolivia + "amt -0400 ". # Brazil, Amazon + "acst -0400 ". # Brazil, Acre Daylight + "edt -0400 ". # Eastern Daylight + "ast -0400 ". # Atlantic Standard + #"nst -0330 ". # Newfoundland Standard nst=North Sumatra +0630 + "nft -0330 ". # Newfoundland + #"gst -0300 ". # Greenland Standard gst=Guam Standard +1000 + #"bst -0300 ". # Brazil Standard bst=British Summer +0100 + "brt -0300 ". # Brazil Standard (official time) + "brst -0300 ". # Brazil Standard + "adt -0300 ". # Atlantic Daylight + "art -0300 ". # Argentina + "amst -0300 ". # Brazil, Amazon Daylight + "ndt -0230 ". # Newfoundland Daylight + "brst -0200 ". # Brazil Daylight (official time) + "fnt -0200 ". # Brazil, Fernando de Noronha + "at -0200 ". # Azores + "wat -0100 ". # West Africa + "fnst -0100 ". # Brazil, Fernando de Noronha Daylight + "gmt +0000 ". # Greenwich Mean + "ut +0000 ". # Universal + "utc +0000 ". # Universal (Coordinated) + "wet +0000 ". # Western European + "cet +0100 ". # Central European + "fwt +0100 ". # French Winter + "met +0100 ". # Middle European + "mez +0100 ". # Middle European + "mewt +0100 ". # Middle European Winter + "swt +0100 ". # Swedish Winter + "bst +0100 ". # British Summer bst=Brazil standard -0300 + "gb +0100 ". # GMT with daylight savings + "west +0000 ". # Western European Daylight + "eet +0200 ". # Eastern Europe, USSR Zone 1 + "cest +0200 ". # Central European Summer + "fst +0200 ". # French Summer + "ist +0200 ". # Israel standard + "mest +0200 ". # Middle European Summer + "mesz +0200 ". # Middle European Summer + "metdst +0200 ". # An alias for mest used by HP-UX + "sast +0200 ". # South African Standard + "sst +0200 ". # Swedish Summer sst=South Sumatra +0700 + "bt +0300 ". # Baghdad, USSR Zone 2 + "eest +0300 ". # Eastern Europe Summer + "eetedt +0300 ". # Eastern Europe, USSR Zone 1 + "idt +0300 ". # Israel Daylight + "msk +0300 ". # Moscow + "eat +0300 ". # East Africa + "it +0330 ". # Iran + "zp4 +0400 ". # USSR Zone 3 + "msd +0400 ". # Moscow Daylight + "zp5 +0500 ". # USSR Zone 4 + "ist +0530 ". # Indian Standard + "zp6 +0600 ". # USSR Zone 5 + "novst +0600 ". # Novosibirsk time zone, Russia + "nst +0630 ". # North Sumatra nst=Newfoundland Std -0330 + #"sst +0700 ". # South Sumatra, USSR Zone 6 sst=Swedish Summer +0200 + "javt +0700 ". # Java + "hkt +0800 ". # Hong Kong + "sgt +0800 ". # Singapore + "cct +0800 ". # China Coast, USSR Zone 7 + "awst +0800 ". # Australian Western Standard + "wst +0800 ". # West Australian Standard + "pht +0800 ". # Asia Manila + "kst +0900 ". # Republic of Korea + "jst +0900 ". # Japan Standard, USSR Zone 8 + "rok +0900 ". # Republic of Korea + "acst +0930 ". # Australian Central Standard + "cast +0930 ". # Central Australian Standard + "aest +1000 ". # Australian Eastern Standard + "east +1000 ". # Eastern Australian Standard + "gst +1000 ". # Guam Standard, USSR Zone 9 gst=Greenland Std -0300 + "acdt +1030 ". # Australian Central Daylight + "cadt +1030 ". # Central Australian Daylight + "aedt +1100 ". # Australian Eastern Daylight + "eadt +1100 ". # Eastern Australian Daylight + "idle +1200 ". # International Date Line East + "nzst +1200 ". # New Zealand Standard + "nzt +1200 ". # New Zealand + "nzdt +1300 ". # New Zealand Daylight + "z +0000 ". + "a +0100 b +0200 c +0300 d +0400 e +0500 f +0600 g +0700 h +0800 ". + "i +0900 k +1000 l +1100 m +1200 ". + "n -0100 o -0200 p -0300 q -0400 r -0500 s -0600 t -0700 u -0800 ". + "v -0900 w -1000 x -1100 y -1200"; + + $Zone{"n2o"} = {}; + ($Zone{"zones"},%{ $Zone{"n2o"} })= + &Date_Regexp($zonesrfc,"sort,lc,under,back", + "keys"); + + $tmp= + "US/Pacific PST8PDT ". + "US/Mountain MST7MDT ". + "US/Central CST6CDT ". + "US/Eastern EST5EDT ". + "Canada/Pacific PST8PDT ". + "Canada/Mountain MST7MDT ". + "Canada/Central CST6CDT ". + "Canada/Eastern EST5EDT"; + + $Zone{"tz2z"} = {}; + ($Zone{"tzones"},%{ $Zone{"tz2z"} })= + &Date_Regexp($tmp,"lc,under,back","keys"); + $Cnf{"TZ"}=&Date_TimeZone; + + # misc. variables + # At = "(?:at)" + # Of = "(?:in|of)" + # On = "(?:on)" + # Future = "(?:in)" + # Later = "(?:later)" + # Past = "(?:ago)" + # Next = "(?:next)" + # Prev = "(?:last|previous)" + + &Date_InitStrings($lang{"at"}, \$Lang{$L}{"At"}, "lc,sort"); + &Date_InitStrings($lang{"on"}, \$Lang{$L}{"On"}, "lc,sort"); + &Date_InitStrings($lang{"future"},\$Lang{$L}{"Future"}, "lc,sort"); + &Date_InitStrings($lang{"later"}, \$Lang{$L}{"Later"}, "lc,sort"); + &Date_InitStrings($lang{"past"}, \$Lang{$L}{"Past"}, "lc,sort"); + &Date_InitStrings($lang{"next"}, \$Lang{$L}{"Next"}, "lc,sort"); + &Date_InitStrings($lang{"prev"}, \$Lang{$L}{"Prev"}, "lc,sort"); + &Date_InitStrings($lang{"of"}, \$Lang{$L}{"Of"}, "lc,sort"); + + # calc mode variables + # Approx = "(?:approximately)" + # Exact = "(?:exactly)" + # Business = "(?:business)" + + &Date_InitStrings($lang{"exact"}, \$Lang{$L}{"Exact"}, "lc,sort"); + &Date_InitStrings($lang{"approx"}, \$Lang{$L}{"Approx"}, "lc,sort"); + &Date_InitStrings($lang{"business"},\$Lang{$L}{"Business"},"lc,sort"); + + ############### END OF LANGUAGE INITIALIZATION + } + + if ($Curr{"ResetWorkDay"}) { + my($h1,$m1,$h2,$m2)=(); + if ($Cnf{"WorkDay24Hr"}) { + ($Curr{"WDBh"},$Curr{"WDBm"})=(0,0); + ($Curr{"WDEh"},$Curr{"WDEm"})=(24,0); + $Curr{"WDlen"}=24*60; + $Cnf{"WorkDayBeg"}="00:00"; + $Cnf{"WorkDayEnd"}="23:59"; + + } else { + confess "ERROR: Invalid WorkDayBeg in Date::Manip.\n" + if (! (($h1,$m1)=&CheckTime($Cnf{"WorkDayBeg"}))); + $Cnf{"WorkDayBeg"}="$h1:$m1"; + confess "ERROR: Invalid WorkDayEnd in Date::Manip.\n" + if (! (($h2,$m2)=&CheckTime($Cnf{"WorkDayEnd"}))); + $Cnf{"WorkDayEnd"}="$h2:$m2"; + + ($Curr{"WDBh"},$Curr{"WDBm"})=($h1,$m1); + ($Curr{"WDEh"},$Curr{"WDEm"})=($h2,$m2); + + # Work day length = h1:m1 or 0:len (len minutes) + $h1=$h2-$h1; + $m1=$m2-$m1; + if ($m1<0) { + $h1--; + $m1+=60; + } + $Curr{"WDlen"}=$h1*60+$m1; + } + $Curr{"ResetWorkDay"}=0; + } + + # current time + my($s,$mn,$h,$d,$m,$y,$wday,$yday,$isdst,$ampm,$wk)=(); + if ($Cnf{"ForceDate"}=~ + /^(\d{4})-(\d{2})-(\d{2})-(\d{2}):(\d{2}):(\d{2})$/) { + ($y,$m,$d,$h,$mn,$s)=($1,$2,$3,$4,$5,$6); + } else { + ($s,$mn,$h,$d,$m,$y,$wday,$yday,$isdst)=localtime(time); + $y+=1900; + $m++; + } + &Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk); + $Curr{"Y"}=$y; + $Curr{"M"}=$m; + $Curr{"D"}=$d; + $Curr{"H"}=$h; + $Curr{"Mn"}=$mn; + $Curr{"S"}=$s; + $Curr{"AmPm"}=$ampm; + $Curr{"Now"}=&Date_Join($y,$m,$d,$h,$mn,$s); + + $Curr{"Debug"}=$Curr{"DebugVal"}; + + # If we're in array context, let's return a list of config variables + # that could be passed to Date_Init to get the same state as we're + # currently in. + if (wantarray) { + # Some special variables that have to be in a specific order + my(@special)=qw(IgnoreGlobalCnf GlobalCnf PersonalCnf PersonalCnfPath); + my(%tmp)=map { $_,1 } @special; + my(@tmp,$key,$val); + foreach $key (@special) { + $val=$Cnf{$key}; + push(@tmp,"$key=$val"); + } + foreach $key (keys %Cnf) { + next if (exists $tmp{$key}); + $val=$Cnf{$key}; + push(@tmp,"$key=$val"); + } + return @tmp; + } + return (); +} + +sub ParseDateString { + print "DEBUG: ParseDateString\n" if ($Curr{"Debug"} =~ /trace/); + local($_)=@_; + return "" if (! $_); + + my($y,$m,$d,$h,$mn,$s,$i,$wofm,$dofw,$wk,$tmp,$z,$num,$err,$iso,$ampm)=(); + my($date,$z2,$delta,$from,$falsefrom,$to,$which,$midnight)=(); + + # We only need to reinitialize if we have to determine what NOW is. + &Date_Init() if (! $Curr{"InitDone"} or $Cnf{"UpdateCurrTZ"}); + + my($L)=$Cnf{"Language"}; + my($type)=$Cnf{"DateFormat"}; + + # Mode is set in DateCalc. ParseDate only overrides it if the string + # contains a mode. + if ($Lang{$L}{"Exact"} && + s/$Lang{$L}{"Exact"}//) { + $Curr{"Mode"}=0; + } elsif ($Lang{$L}{"Approx"} && + s/$Lang{$L}{"Approx"}//) { + $Curr{"Mode"}=1; + } elsif ($Lang{$L}{"Business"} && + s/$Lang{$L}{"Business"}//) { + $Curr{"Mode"}=2; + } elsif (! exists $Curr{"Mode"}) { + $Curr{"Mode"}=0; + } + + # Unfortunately, some deltas can be parsed as dates. An example is + # 1 second == 1 2nd == 1 2 + # But, some dates can be parsed as deltas. The most important being: + # 1998010101:00:00 + # We'll check to see if a "date" can be parsed as a delta. If so, we'll + # assume that it is a delta (since they are much simpler, it is much + # less likely that we'll mistake a delta for a date than vice versa) + # unless it is an ISO-8601 date. + # + # This is important because we are using DateCalc to test whether a + # string is a date or a delta. Dates are tested first, so we need to + # be able to pass a delta into this routine and have it correctly NOT + # interpreted as a date. + # + # We will insist that the string contain something other than digits and + # colons so that the following will get correctly interpreted as a date + # rather than a delta: + # 12:30 + # 19980101 + + $delta=""; + $delta=&ParseDateDelta($_) if (/[^:0-9]/); + + # Put parse in a simple loop for an easy exit. + PARSE: { + my(@tmp)=&Date_Split($_); + if (@tmp) { + ($y,$m,$d,$h,$mn,$s)=@tmp; + last PARSE; + } + + # Fundamental regular expressions + + my($month)=$Lang{$L}{"Month"}; # (jan|january|...) + my(%month)=%{ $Lang{$L}{"MonthH"} }; # { jan=>1, ... } + my($week)=$Lang{$L}{"Week"}; # (mon|monday|...) + my(%week)=%{ $Lang{$L}{"WeekH"} }; # { mon=>1, monday=>1, ... } + my($wom)=$Lang{$L}{"WoM"}; # (1st|...|fifth|last) + my(%wom)=%{ $Lang{$L}{"WoMH"} }; # { 1st=>1,... fifth=>5,last=>-1 } + my($dom)=$Lang{$L}{"DoM"}; # (1st|first|...31st) + my(%dom)=%{ $Lang{$L}{"DoMH"} }; # { 1st=>1, first=>1, ... } + my($ampmexp)=$Lang{$L}{"AmPm"}; # (am|pm) + my($timeexp)=$Lang{$L}{"Times"}; # (noon|midnight) + my($now)=$Lang{$L}{"Now"}; # (now|today) + my($offset)=$Lang{$L}{"Offset"}; # (yesterday|tomorrow) + my($zone)=$Zone{"zones"} . '(?:\s+|$)'; # (edt|est|...)\s+ + my($day)='\s*'.$Lang{$L}{"Dabb"}; # \s*(?:d|day|days) + my($mabb)='\s*'.$Lang{$L}{"Mabb"}; # \s*(?:mon|month|months) + my($wkabb)='\s*'.$Lang{$L}{"Wabb"}; # \s*(?:w|wk|week|weeks) + my($next)='\s*'.$Lang{$L}{"Next"}; # \s*(?:next) + my($prev)='\s*'.$Lang{$L}{"Prev"}; # \s*(?:last|previous) + my($past)='\s*'.$Lang{$L}{"Past"}; # \s*(?:ago) + my($future)='\s*'.$Lang{$L}{"Future"}; # \s*(?:in) + my($later)='\s*'.$Lang{$L}{"Later"}; # \s*(?:later) + my($at)=$Lang{$L}{"At"}; # (?:at) + my($of)='\s*'.$Lang{$L}{"Of"}; # \s*(?:in|of) + my($on)='(?:\s*'.$Lang{$L}{"On"}.'\s*|\s+)'; + # \s*(?:on)\s* or \s+ + my($last)='\s*'.$Lang{$L}{"Last"}; # \s*(?:last) + my($hm)=$Lang{$L}{"SepHM"}; # : + my($ms)=$Lang{$L}{"SepMS"}; # : + my($ss)=$Lang{$L}{"SepSS"}; # . + + # Other regular expressions + + my($D4)='(\d{4})'; # 4 digits (yr) + my($YY)='(\d{4}|\d{2})'; # 2 or 4 digits (yr) + my($DD)='(\d{2})'; # 2 digits (mon/day/hr/min/sec) + my($D) ='(\d{1,2})'; # 1 or 2 digit (mon/day/hr) + my($FS)="(?:$ss\\d+)?"; # fractional secs + my($sep)='[\/.-]'; # non-ISO8601 m/d/yy separators + # absolute time zone +0700 (GMT) + my($hzone)='(?:[0-1][0-9]|2[0-3])'; # 00 - 23 + my($mzone)='(?:[0-5][0-9])'; # 00 - 59 + my($zone2)='(?:\s*([+-](?:'."$hzone$mzone|$hzone:$mzone|$hzone))". + # +0700 +07:00 -07 + '(?:\s*\([^)]+\))?)'; # (GMT) + + # A regular expression for the time EXCEPT for the hour part + my($mnsec)="$hm$DD(?:$ms$DD$FS)?(?:\\s*$ampmexp)?"; + + # A special regular expression for /YYYY:HH:MN:SS used by Apache + my($apachetime)='(/\d{4}):' . "$DD$hm$DD$ms$DD"; + + my($time)=""; + $ampm=""; + $date=""; + + # Substitute all special time expressions. + if (/(^|[^a-z])$timeexp($|[^a-z])/i) { + $tmp=$2; + $tmp=$Lang{$L}{"TimesH"}{lc($tmp)}; + s/(^|[^a-z])$timeexp($|[^a-z])/$1 $tmp $3/i; + } + + # Remove some punctuation + s/[,]/ /g; + + # Make sure that ...7EST works (i.e. a timezone immediately following + # a digit. + s/(\d)$zone(\s+|$|[0-9])/$1 $2$3/i; + $zone = '\s+'.$zone; + + # Remove the time + $iso=1; + $midnight=0; + $from="24${hm}00(?:${ms}00)?"; + $falsefrom="${hm}24${ms}00"; # Don't trap XX:24:00 + $to="00${hm}00${ms}00"; + $midnight=1 if (!/$falsefrom/ && s/$from/$to/); + + $h=$mn=$s=0; + if (/$D$mnsec/i || /$ampmexp/i) { + $iso=0; + $tmp=0; + $tmp=1 if (/$mnsec$zone2?\s*$/i); # or /$mnsec$zone/ ?? + $tmp=0 if (/$ampmexp/i); + if (s/$apachetime$zone()/$1 /i || + s/$apachetime$zone2?/$1 /i || + s/(^|[^a-z])$at\s*$D$mnsec$zone()/$1 /i || + s/(^|[^a-z])$at\s*$D$mnsec$zone2?/$1 /i || + s/(^|[^0-9])(\d)$mnsec$zone()/$1 /i || + s/(^|[^0-9])(\d)$mnsec$zone2?/$1 /i || + (s/(t)$D$mnsec$zone()/$1 /i and (($iso=-$tmp) || 1)) || + (s/(t)$D$mnsec$zone2?/$1 /i and (($iso=-$tmp) || 1)) || + (s/()$DD$mnsec$zone()/ /i and (($iso=$tmp) || 1)) || + (s/()$DD$mnsec$zone2?/ /i and (($iso=$tmp) || 1)) || + s/(^|$at\s*|\s+)$D()()\s*$ampmexp$zone()/ /i || + s/(^|$at\s*|\s+)$D()()\s*$ampmexp$zone2?/ /i || + 0 + ) { + ($h,$mn,$s,$ampm,$z,$z2)=($2,$3,$4,$5,$6,$7); + if (defined ($z)) { + if ($z =~ /^[+-]\d{2}:\d{2}$/) { + $z=~ s/://; + } elsif ($z =~ /^[+-]\d{2}$/) { + $z .= "00"; + } + } + $time=1; + &Date_TimeCheck(\$h,\$mn,\$s,\$ampm); + $y=$m=$d=""; + # We're going to be calling TimeCheck again below (when we check the + # final date), so get rid of $ampm so that we don't have an error + # due to "15:30:00 PM". It'll get reset below. + $ampm=""; + if (/^\s*$/) { + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + last PARSE; + } + } + } + $time=0 if ($time ne "1"); + s/\s+$//; + s/^\s+//; + + # dateTtime ISO 8601 formats + my($orig)=$_; + s/t$//i if ($iso<0); + + # Parse ISO 8601 dates now (which may still have a zone stuck to it). + if ( ($iso && /^([0-9-]+(?:W[0-9-]+)?)$zone?$/i) || + ($iso && /^([0-9-]+(?:W[0-9-]+)?)$zone2?$/i) || + ($iso && /^([0-9-]+(?:T[0-9-]+)?)$zone?$/i) || + ($iso && /^([0-9-]+(?:T[0-9-]+)?)$zone2?$/i) || + 0) { + + # ISO 8601 dates + ($_,$z,$z2) = ($1,$2); + s,-, ,g; # Change all ISO8601 seps to spaces + s/^\s+//; + s/\s+$//; + + if (/^$D4\s*$DD\s*$DD\s*t?$DD(?:$DD(?:$DD(\d*))?)?$/i || + /^$DD\s+$DD\s*$DD\s*t?$DD(?:$DD(?:$DD(\d*))?)?$/i || + 0 + ) { + # ISO 8601 Dates with times + # YYYYMMDDHHMNSSFFFF... + # YYYYMMDDHHMNSS + # YYYYMMDDHHMN + # YYYYMMDDHH + # YY MMDDHHMNSSFFFF... + # YY MMDDHHMNSS + # YY MMDDHHMN + # YY MMDDHH + ($y,$m,$d,$h,$mn,$s,$tmp)=($1,$2,$3,$4,$5,$6,$7); + if ($h==24 && (! defined $mn || $mn==0) && (! defined $s || $s==0)) { + $h=0; + $midnight=1; + } + $z = "" if (! defined $h); + return "" if ($time && defined $h); + last PARSE; + + } elsif (/^$D4(?:\s*$DD(?:\s*$DD)?)?$/ || + /^$DD(?:\s+$DD(?:\s*$DD)?)?$/) { + # ISO 8601 Dates + # YYYYMMDD + # YYYYMM + # YYYY + # YY MMDD + # YY MM + # YY + ($y,$m,$d)=($1,$2,$3); + last PARSE; + + } elsif (/^$YY\s+$D\s+$D/) { + # YY-M-D + ($y,$m,$d)=($1,$2,$3); + last PARSE; + + } elsif (/^$YY\s*W$DD\s*(\d)?$/i) { + # YY-W##-D + ($y,$wofm,$dofw)=($1,$2,$3); + ($y,$m,$d)=&Date_NthWeekOfYear($y,$wofm,$dofw); + last PARSE; + + } elsif (/^$D4\s*(\d{3})$/ || + /^$DD\s*(\d{3})$/) { + # YYDOY + ($y,$which)=($1,$2); + ($y,$m,$d)=&Date_NthDayOfYear($y,$which); + last PARSE; + + } elsif ($iso<0) { + # We confused something like 1999/August12:00:00 + # with a dateTtime format + $_=$orig; + + } else { + return ""; + } + } + + # All deltas that are not ISO-8601 dates are NOT dates. + return "" if ($Curr{"InCalc"} && $delta); + if ($delta) { + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + return &DateCalc_DateDelta($Curr{"Now"},$delta); + } + + # Check for some special types of dates (next, prev) + foreach $from (keys %{ $Lang{$L}{"Repl"} }) { + $to=$Lang{$L}{"Repl"}{$from}; + s/(^|[^a-z])$from($|[^a-z])/$1$to$2/i; + } + if (/$wom/i || /$future/i || /$later/i || /$past/i || + /$next/i || /$prev/i || /^$week$/i || /$wkabb/i) { + $tmp=0; + + if (/^$wom\s*$week$of\s*$month\s*$YY?$/i) { + # last friday in October 95 + ($wofm,$dofw,$m,$y)=($1,$2,$3,$4); + # fix $m, $y + return "" if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + $dofw=$week{lc($dofw)}; + $wofm=$wom{lc($wofm)}; + # Get the first day of the month + $date=&Date_Join($y,$m,1,$h,$mn,$s); + if ($wofm==-1) { + $date=&DateCalc_DateDelta($date,"+0:1:0:0:0:0:0",\$err,0); + $date=&Date_GetPrev($date,$dofw,0); + } else { + for ($i=0; $i<$wofm; $i++) { + if ($i==0) { + $date=&Date_GetNext($date,$dofw,1); + } else { + $date=&Date_GetNext($date,$dofw,0); + } + } + } + last PARSE; + + } elsif (/^$last$day$of\s*$month(?:$of?\s*$YY)?/i) { + # last day in month + ($m,$y)=($1,$2); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $y=&Date_FixYear($y) if (! defined $y or length($y)<4); + $m=$month{lc($m)}; + $d=&Date_DaysInMonth($m,$y); + last PARSE; + + } elsif (/^$week$/i) { + # friday + ($dofw)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&Date_GetPrev($Curr{"Now"},$Cnf{"FirstDay"},1); + $date=&Date_GetNext($date,$dofw,1,$h,$mn,$s); + last PARSE; + + } elsif (/^$next\s*$week$/i) { + # next friday + ($dofw)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&Date_GetNext($Curr{"Now"},$dofw,0,$h,$mn,$s); + last PARSE; + + } elsif (/^$prev\s*$week$/i) { + # last friday + ($dofw)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&Date_GetPrev($Curr{"Now"},$dofw,0,$h,$mn,$s); + last PARSE; + + } elsif (/^$next$wkabb$/i) { + # next week + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:1:0:0:0:0",\$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^$prev$wkabb$/i) { + # last week + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:1:0:0:0:0",\$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + + } elsif (/^$next$mabb$/i) { + # next month + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:1:0:0:0:0:0",\$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^$prev$mabb$/i) { + # last month + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"-0:1:0:0:0:0:0",\$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + + } elsif (/^$future\s*(\d+)$day$/i || + /^(\d+)$day$later$/i) { + # in 2 days + # 2 days later + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:0:$num:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^(\d+)$day$past$/i) { + # 2 days ago + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:0:$num:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + + } elsif (/^$future\s*(\d+)$wkabb$/i || + /^(\d+)$wkabb$later$/i) { + # in 2 weeks + # 2 weeks later + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:$num:0:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^(\d+)$wkabb$past$/i) { + # 2 weeks ago + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"-0:0:$num:0:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + + } elsif (/^$future\s*(\d+)$mabb$/i || + /^(\d+)$mabb$later$/i) { + # in 2 months + # 2 months later + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:$num:0:0:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^(\d+)$mabb$past$/i) { + # 2 months ago + ($num)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"-0:$num:0:0:0:0:0", + \$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + + } elsif (/^$week$future\s*(\d+)$wkabb$/i || + /^$week\s*(\d+)$wkabb$later$/i) { + # friday in 2 weeks + # friday 2 weeks later + ($dofw,$num)=($1,$2); + $tmp="+"; + } elsif (/^$week\s*(\d+)$wkabb$past$/i) { + # friday 2 weeks ago + ($dofw,$num)=($1,$2); + $tmp="-"; + } elsif (/^$future\s*(\d+)$wkabb$on$week$/i || + /^(\d+)$wkabb$later$on$week$/i) { + # in 2 weeks on friday + # 2 weeks later on friday + ($num,$dofw)=($1,$2); + $tmp="+" + } elsif (/^(\d+)$wkabb$past$on$week$/i) { + # 2 weeks ago on friday + ($num,$dofw)=($1,$2); + $tmp="-"; + } elsif (/^$week\s*$wkabb$/i) { + # monday week (British date: in 1 week on monday) + $dofw=$1; + $num=1; + $tmp="+"; + } elsif (/^$now\s*$wkabb$/i) { + # today week (British date: 1 week from today) + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"},"+0:0:1:0:0:0:0",\$err,0); + $date=&Date_SetTime($date,$h,$mn,$s) if (defined $h); + last PARSE; + } elsif (/^$offset\s*$wkabb$/i) { + # tomorrow week (British date: 1 week from tomorrow) + ($offset)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $offset=$Lang{$L}{"OffsetH"}{lc($offset)}; + $date=&DateCalc_DateDelta($Curr{"Now"},$offset,\$err,0); + $date=&DateCalc_DateDelta($date,"+0:0:1:0:0:0:0",\$err,0); + if ($time) { + return "" + if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + $date=&Date_SetTime($date,$h,$mn,$s); + } + last PARSE; + } + + if ($tmp) { + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=&DateCalc_DateDelta($Curr{"Now"}, + $tmp . "0:0:$num:0:0:0:0",\$err,0); + $date=&Date_GetPrev($date,$Cnf{"FirstDay"},1); + $date=&Date_GetNext($date,$dofw,1,$h,$mn,$s); + last PARSE; + } + } + + # Change (2nd, second) to 2 + $tmp=0; + if (/(^|[^a-z0-9])$dom($|[^a-z0-9])/i) { + if (/^\s*$dom\s*$/) { + ($d)=($1); + $d=$dom{lc($d)}; + $m=$Curr{"M"}; + last PARSE; + } + my $from = $2; + my $to = $dom{ lc($from) }; + s/(^|[^a-z])$from($|[^a-z])/$1 $to $2/i; + s/^\s+//; + s/\s+$//; + } + + # Another set of special dates (Nth week) + if (/^$D\s*$week(?:$of?\s*$YY)?$/i) { + # 22nd sunday in 1996 + ($which,$dofw,$y)=($1,$2,$3); + $y=$Curr{"Y"} if (! $y); + $y--; # previous year + $tmp=&Date_GetNext("$y-12-31",$dofw,0); + if ($which>1) { + $tmp=&DateCalc_DateDelta($tmp,"+0:0:".($which-1).":0:0:0:0",\$err,0); + } + ($y,$m,$d)=(&Date_Split($tmp, 1))[0..2]; + last PARSE; + } elsif (/^$week$wkabb\s*$D(?:$of?\s*$YY)?$/i || + /^$week\s*$D$wkabb(?:$of?\s*$YY)?$/i) { + # sunday week 22 in 1996 + # sunday 22nd week in 1996 + ($dofw,$which,$y)=($1,$2,$3); + ($y,$m,$d)=&Date_NthWeekOfYear($y,$which,$dofw); + last PARSE; + } + + # Get rid of day of week + if (/(^|[^a-z])$week($|[^a-z])/i) { + $wk=$2; + (s/(^|[^a-z])$week,/$1 /i) || + s/(^|[^a-z])$week($|[^a-z])/$1 $3/i; + s/^\s+//; + s/\s+$//; + } + + { + # So that we can handle negative epoch times, let's convert + # things like "epoch -" to "epochNEGATIVE " before we strip out + # the $sep chars, which include '-'. + s,epoch\s*-,epochNEGATIVE ,g; + + # Non-ISO8601 dates + s,\s*$sep\s*, ,g; # change all non-ISO8601 seps to spaces + s,^\s*,,; # remove leading/trailing space + s,\s*$,,; + + if (/^$D\s+$D(?:\s+$YY)?$/) { + # MM DD YY (DD MM YY non-US) + ($m,$d,$y)=($1,$2,$3); + ($m,$d)=($d,$m) if ($type ne "US"); + last PARSE; + + } elsif (/^$D4\s*$D\s*$D$/) { + # YYYY MM DD + ($y,$m,$d)=($1,$2,$3); + last PARSE; + + } elsif (s/(^|[^a-z])$month($|[^a-z])/$1 $3/i) { + ($m)=($2); + + if (/^\s*$D(?:\s+$YY)?\s*$/) { + # mmm DD YY + # DD mmm YY + # DD YY mmm + ($d,$y)=($1,$2); + last PARSE; + + } elsif (/^\s*$D$D4\s*$/) { + # mmm DD YYYY + # DD mmm YYYY + # DD YYYY mmm + ($d,$y)=($1,$2); + last PARSE; + + } elsif (/^\s*$D4\s*$D\s*$/) { + # mmm YYYY DD + # YYYY mmm DD + # YYYY DD mmm + ($y,$d)=($1,$2); + last PARSE; + + } elsif (/^\s*$D4\s*$/) { + # mmm YYYY + # YYYY mmm + ($y,$d)=($1,1); + last PARSE; + + } else { + return ""; + } + + } elsif (/^epochNEGATIVE (\d+)$/) { + $s=$1; + $date=&DateCalc("1970-01-01 00:00 GMT","-0:0:$s"); + } elsif (/^epoch\s*(\d+)$/i) { + $s=$1; + $date=&DateCalc("1970-01-01 00:00 GMT","+0:0:$s"); + + } elsif (/^$now$/i) { + # now, today + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $date=$Curr{"Now"}; + if ($time) { + return "" + if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + $date=&Date_SetTime($date,$h,$mn,$s); + } + last PARSE; + + } elsif (/^$offset$/i) { + # yesterday, tomorrow + ($offset)=($1); + &Date_Init() if (! $Cnf{"UpdateCurrTZ"}); + $offset=$Lang{$L}{"OffsetH"}{lc($offset)}; + $date=&DateCalc_DateDelta($Curr{"Now"},$offset,\$err,0); + if ($time) { + return "" + if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + $date=&Date_SetTime($date,$h,$mn,$s); + } + last PARSE; + + } else { + return ""; + } + } + } + + if (! $date) { + return "" if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + $date=&Date_Join($y,$m,$d,$h,$mn,$s); + } + $date=&Date_ConvTZ($date,$z); + if ($midnight) { + $date=&DateCalc_DateDelta($date,"+0:0:0:1:0:0:0"); + } + return $date; +} + +sub ParseDate { + print "DEBUG: ParseDate\n" if ($Curr{"Debug"} =~ /trace/); + &Date_Init() if (! $Curr{"InitDone"}); + my($args,@args,@a,$ref,$date)=(); + @a=@_; + + # @a : is the list of args to ParseDate. Currently, only one argument + # is allowed and it must be a scalar (or a reference to a scalar) + # or a reference to an array. + + if ($#a!=0) { + print "ERROR: Invalid number of arguments to ParseDate.\n"; + return ""; + } + $args=$a[0]; + $ref=ref $args; + if (! $ref) { + return $args if (&Date_Split($args)); + @args=($args); + } elsif ($ref eq "ARRAY") { + @args=@$args; + } elsif ($ref eq "SCALAR") { + return $$args if (&Date_Split($$args)); + @args=($$args); + } else { + print "ERROR: Invalid arguments to ParseDate.\n"; + return ""; + } + @a=@args; + + # @args : a list containing all the arguments (dereferenced if appropriate) + # @a : a list containing all the arguments currently being examined + # $ref : nil, "SCALAR", or "ARRAY" depending on whether a scalar, a + # reference to a scalar, or a reference to an array was passed in + # $args : the scalar or refererence passed in + + PARSE: while($#a>=0) { + $date=join(" ",@a); + $date=&ParseDateString($date); + last if ($date); + pop(@a); + } # PARSE + + splice(@args,0,$#a + 1); + @$args= @args if (defined $ref and $ref eq "ARRAY"); + $date; +} + +sub Date_Cmp { + my($D1,$D2)=@_; + my($date1)=&ParseDateString($D1); + my($date2)=&ParseDateString($D2); + return $date1 cmp $date2; +} + +# **NOTE** +# The calc routines all call parse routines, so it is never necessary to +# call Date_Init in the calc routines. +sub DateCalc { + print "DEBUG: DateCalc\n" if ($Curr{"Debug"} =~ /trace/); + my($D1,$D2,@arg)=@_; + my($ref,$err,$errref,$mode)=(); + + $errref=shift(@arg); + $ref=0; + if (defined $errref) { + if (ref $errref) { + $mode=shift(@arg); + $ref=1; + } else { + $mode=$errref; + $errref=""; + } + } + + my(@date,@delta,$ret,$tmp,$old)=(); + + if (defined $mode and $mode>=0 and $mode<=3) { + $Curr{"Mode"}=$mode; + } else { + $Curr{"Mode"}=0; + } + + $old=$Curr{"InCalc"}; + $Curr{"InCalc"}=1; + + if ($tmp=&ParseDateString($D1)) { + # If we've already parsed the date, we don't want to do it a second + # time (so we don't convert timezones twice). + if (&Date_Split($D1)) { + push(@date,$D1); + } else { + push(@date,$tmp); + } + } elsif ($tmp=&ParseDateDelta($D1)) { + push(@delta,$tmp); + } else { + $$errref=1 if ($ref); + return; + } + + if ($tmp=&ParseDateString($D2)) { + if (&Date_Split($D2)) { + push(@date,$D2); + } else { + push(@date,$tmp); + } + } elsif ($tmp=&ParseDateDelta($D2)) { + push(@delta,$tmp); + } else { + $$errref=2 if ($ref); + return; + } + $mode=$Curr{"Mode"}; + $Curr{"InCalc"}=$old; + + if ($#date==1) { + $ret=&DateCalc_DateDate(@date,$mode); + } elsif ($#date==0) { + $ret=&DateCalc_DateDelta(@date,@delta,\$err,$mode); + $$errref=$err if ($ref); + } else { + $ret=&DateCalc_DeltaDelta(@delta,$mode); + } + $ret; +} + +sub ParseDateDelta { + print "DEBUG: ParseDateDelta\n" if ($Curr{"Debug"} =~ /trace/); + my($args,@args,@a,$ref)=(); + local($_)=(); + @a=@_; + + # @a : is the list of args to ParseDateDelta. Currently, only one argument + # is allowed and it must be a scalar (or a reference to a scalar) + # or a reference to an array. + + if ($#a!=0) { + print "ERROR: Invalid number of arguments to ParseDateDelta.\n"; + return ""; + } + $args=$a[0]; + $ref=ref $args; + if (! $ref) { + @args=($args); + } elsif ($ref eq "ARRAY") { + @args=@$args; + } elsif ($ref eq "SCALAR") { + @args=($$args); + } else { + print "ERROR: Invalid arguments to ParseDateDelta.\n"; + return ""; + } + @a=@args; + + # @args : a list containing all the arguments (dereferenced if appropriate) + # @a : a list containing all the arguments currently being examined + # $ref : nil, "SCALAR", or "ARRAY" depending on whether a scalar, a + # reference to a scalar, or a reference to an array was passed in + # $args : the scalar or refererence passed in + + my(@colon,@delta,$delta,$dir,$colon,$sign,$val)=(); + my($len,$tmp,$tmp2,$tmpl)=(); + my($from,$to)=(); + my($workweek)=$Cnf{"WorkWeekEnd"}-$Cnf{"WorkWeekBeg"}+1; + + &Date_Init() if (! $Curr{"InitDone"}); + # A sign can be a sequence of zero or more + and - signs, this + # allows for deltas like '+ -2 days'. + my($signexp)='((?:[+-]\s*)*)'; + my($numexp)='(\d+)'; + my($exp1)="(?: \\s* $signexp \\s* $numexp \\s*)"; + my($yexp,$mexp,$wexp,$dexp,$hexp,$mnexp,$sexp,$i)=(); + $yexp=$mexp=$wexp=$dexp=$hexp=$mnexp=$sexp="()()"; + $yexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Yabb"} .")?"; + $mexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Mabb"} .")?"; + $wexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Wabb"} .")?"; + $dexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Dabb"} .")?"; + $hexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Habb"} .")?"; + $mnexp="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"MNabb"}.")?"; + $sexp ="(?: $exp1 ". $Lang{$Cnf{"Language"}}{"Sabb"} ."?)?"; + my($future)=$Lang{$Cnf{"Language"}}{"Future"}; + my($later)=$Lang{$Cnf{"Language"}}{"Later"}; + my($past)=$Lang{$Cnf{"Language"}}{"Past"}; + + $delta=""; + PARSE: while (@a) { + $_ = join(" ", grep {defined;} @a); + s/\s+$//; + last if ($_ eq ""); + + # Mode is set in DateCalc. ParseDateDelta only overrides it if the + # string contains a mode. + if ($Lang{$Cnf{"Language"}}{"Exact"} && + s/$Lang{$Cnf{"Language"}}{"Exact"}//) { + $Curr{"Mode"}=0; + } elsif ($Lang{$Cnf{"Language"}}{"Approx"} && + s/$Lang{$Cnf{"Language"}}{"Approx"}//) { + $Curr{"Mode"}=1; + } elsif ($Lang{$Cnf{"Language"}}{"Business"} && + s/$Lang{$Cnf{"Language"}}{"Business"}//) { + $Curr{"Mode"}=2; + } elsif (! exists $Curr{"Mode"}) { + $Curr{"Mode"}=0; + } + $workweek=7 if ($Curr{"Mode"} != 2); + + foreach $from (keys %{ $Lang{$Cnf{"Language"}}{"Repl"} }) { + $to=$Lang{$Cnf{"Language"}}{"Repl"}{$from}; + s/(^|[^a-z])$from($|[^a-z])/$1$to$2/i; + } + + # in or ago + # + # We need to make sure that $later, $future, and $past don't contain each + # other... Romanian pointed this out where $past is "in urma" and $future + # is "in". When they do, we have to take this into account. + # $len length of best match (greatest wins) + # $tmp string after best match + # $dir direction (prior, after) of best match + # + # $tmp2 string before/after current match + # $tmpl length of current match + + $len=0; + $tmp=$_; + $dir=1; + + $tmp2=$_; + if ($tmp2 =~ s/(^|[^a-z])($future)($|[^a-z])/$1 $3/i) { + $tmpl=length($2); + if ($tmpl>$len) { + $tmp=$tmp2; + $dir=1; + $len=$tmpl; + } + } + + $tmp2=$_; + if ($tmp2 =~ s/(^|[^a-z])($later)($|[^a-z])/$1 $3/i) { + $tmpl=length($2); + if ($tmpl>$len) { + $tmp=$tmp2; + $dir=1; + $len=$tmpl; + } + } + + $tmp2=$_; + if ($tmp2 =~ s/(^|[^a-z])($past)($|[^a-z])/$1 $3/i) { + $tmpl=length($2); + if ($tmpl>$len) { + $tmp=$tmp2; + $dir=-1; + $len=$tmpl; + } + } + + $_ = $tmp; + s/\s*$//; + + # the colon part of the delta + $colon=""; + if (s/($signexp?$numexp?(:($signexp?$numexp)?){1,6})$//) { + $colon=$1; + s/\s+$//; + } + @colon=split(/:/,$colon); + + # the non-colon part of the delta + $sign="+"; + @delta=(); + $i=6; + foreach $exp1 ($yexp,$mexp,$wexp,$dexp,$hexp,$mnexp,$sexp) { + last if ($#colon>=$i--); + $val=0; + if (s/^$exp1//ix) { + $val=$2 if ($2); + $sign=$1 if ($1); + } + + # Collapse a sign like '+ -' into a single character like '-', + # by counting the occurrences of '-'. + # + $sign =~ s/\s+//g; + $sign =~ tr/+//d; + my $count = ($sign =~ tr/-//d); + die "bad characters in sign: $sign" if length $sign; + $sign = $count % 2 ? '-' : '+'; + + push(@delta,"$sign$val"); + } + if (! /^\s*$/) { + pop(@a); + next PARSE; + } + + # make sure that the colon part has a sign + for ($i=0; $i<=$#colon; $i++) { + $val=0; + if ($colon[$i] =~ /^$signexp$numexp?/) { + $val=$2 if ($2); + $sign=$1 if ($1); + } + $colon[$i] = "$sign$val"; + } + + # combine the two + push(@delta,@colon); + if ($dir<0) { + for ($i=0; $i<=$#delta; $i++) { + $delta[$i] =~ tr/-+/+-/; + } + } + + # form the delta and shift off the valid part + $delta=join(":",@delta); + splice(@args,0,$#a+1); + @$args=@args if (defined $ref and $ref eq "ARRAY"); + last PARSE; + } + + $delta=&Delta_Normalize($delta,$Curr{"Mode"}); + return $delta; +} + +sub UnixDate { + print "DEBUG: UnixDate\n" if ($Curr{"Debug"} =~ /trace/); + my($date,@format)=@_; + local($_)=(); + my($format,%f,$out,@out,$c,$date1,$date2,$tmp)=(); + my($scalar)=(); + $date=&ParseDateString($date); + return if (! $date); + + my($y,$m,$d,$h,$mn,$s)=($f{"Y"},$f{"m"},$f{"d"},$f{"H"},$f{"M"},$f{"S"})= + &Date_Split($date, 1); + $f{"y"}=substr $f{"Y"},2; + &Date_Init() if (! $Curr{"InitDone"}); + + if (! wantarray) { + $format=join(" ",@format); + @format=($format); + $scalar=1; + } + + # month, week + $_=$m; + s/^0//; + $f{"b"}=$f{"h"}=$Lang{$Cnf{"Language"}}{"MonL"}[$_-1]; + $f{"B"}=$Lang{$Cnf{"Language"}}{"MonthL"}[$_-1]; + $_=$m; + s/^0/ /; + $f{"f"}=$_; + $f{"U"}=&Date_WeekOfYear($m,$d,$y,7); + $f{"W"}=&Date_WeekOfYear($m,$d,$y,1); + + # check week 52,53 and 0 + $f{"G"}=$f{"L"}=$y; + if ($f{"W"}>=52 || $f{"U"}>=52) { + my($dd,$mm,$yy)=($d,$m,$y); + $dd+=7; + if ($dd>31) { + $dd-=31; + $mm=1; + $yy++; + if (&Date_WeekOfYear($mm,$dd,$yy,1)==2) { + $f{"G"}=$yy; + $f{"W"}=1; + } + if (&Date_WeekOfYear($mm,$dd,$yy,7)==2) { + $f{"L"}=$yy; + $f{"U"}=1; + } + } + } + if ($f{"W"}==0) { + my($dd,$mm,$yy)=($d,$m,$y); + $dd-=7; + $dd+=31 if ($dd<1); + $yy--; + $mm=12; + $f{"G"}=$yy; + $f{"W"}=&Date_WeekOfYear($mm,$dd,$yy,1)+1; + } + if ($f{"U"}==0) { + my($dd,$mm,$yy)=($d,$m,$y); + $dd-=7; + $dd+=31 if ($dd<1); + $yy--; + $mm=12; + $f{"L"}=$yy; + $f{"U"}=&Date_WeekOfYear($mm,$dd,$yy,7)+1; + } + + $f{"U"}="0".$f{"U"} if (length $f{"U"} < 2); + $f{"W"}="0".$f{"W"} if (length $f{"W"} < 2); + + # day + $f{"j"}=&Date_DayOfYear($m,$d,$y); + $f{"j"} = "0" . $f{"j"} while (length($f{"j"})<3); + $_=$d; + s/^0/ /; + $f{"e"}=$_; + $f{"w"}=&Date_DayOfWeek($m,$d,$y); + $f{"v"}=$Lang{$Cnf{"Language"}}{"WL"}[$f{"w"}-1]; + $f{"v"}=" ".$f{"v"} if (length $f{"v"} < 2); + $f{"a"}=$Lang{$Cnf{"Language"}}{"WkL"}[$f{"w"}-1]; + $f{"A"}=$Lang{$Cnf{"Language"}}{"WeekL"}[$f{"w"}-1]; + $f{"E"}=&Date_DaySuffix($f{"e"}); + + # hour + $_=$h; + s/^0/ /; + $f{"k"}=$_; + $f{"i"}=$f{"k"}+1; + $f{"i"}=$f{"k"}; + $f{"i"}=12 if ($f{"k"}==0); + $f{"i"}=$f{"k"}-12 if ($f{"k"}>12); + $f{"i"}=$f{"i"}-12 if ($f{"i"}>12); + $f{"i"}=" ".$f{"i"} if (length($f{"i"})<2); + $f{"I"}=$f{"i"}; + $f{"I"}=~ s/^ /0/; + $f{"p"}=$Lang{$Cnf{"Language"}}{"AMstr"}; + $f{"p"}=$Lang{$Cnf{"Language"}}{"PMstr"} if ($f{"k"}>11); + + # minute, second, timezone + $f{"o"}=&Date_SecsSince1970($m,$d,$y,$h,$mn,$s); + $f{"s"}=&Date_SecsSince1970GMT($m,$d,$y,$h,$mn,$s); + $f{"Z"}=($Cnf{"ConvTZ"} eq "IGNORE" or $Cnf{"ConvTZ"} eq "") ? + $Cnf{"TZ"} : $Cnf{"ConvTZ"}; + $f{"z"}=($f{"Z"}=~/^[+-]\d{4}/) ? $f{"Z"} : ($Zone{"n2o"}{lc $f{"Z"}} || ""); + + # date, time + $f{"c"}=qq|$f{"a"} $f{"b"} $f{"e"} $h:$mn:$s $y|; + $f{"C"}=$f{"u"}= + qq|$f{"a"} $f{"b"} $f{"e"} $h:$mn:$s $f{"z"} $y|; + $f{"g"}=qq|$f{"a"}, $d $f{"b"} $y $h:$mn:$s $f{"z"}|; + $f{"D"}=$f{"x"}=qq|$m/$d/$f{"y"}|; + $f{"r"}=qq|$f{"I"}:$mn:$s $f{"p"}|; + $f{"R"}=qq|$h:$mn|; + $f{"T"}=$f{"X"}=qq|$h:$mn:$s|; + $f{"V"}=qq|$m$d$h$mn$f{"y"}|; + $f{"Q"}="$y$m$d"; + $f{"q"}=qq|$y$m$d$h$mn$s|; + $f{"P"}=qq|$y$m$d$h:$mn:$s|; + $f{"F"}=qq|$f{"A"}, $f{"B"} $f{"e"}, $f{"Y"}|; + if ($f{"W"}==0) { + $y--; + $tmp=&Date_WeekOfYear(12,31,$y,1); + $tmp="0$tmp" if (length($tmp) < 2); + $f{"J"}=qq|$y-W$tmp-$f{"w"}|; + } else { + $f{"J"}=qq|$f{"G"}-W$f{"W"}-$f{"w"}|; + } + $f{"K"}=qq|$y-$f{"j"}|; + # %l is a special case. Since it requires the use of the calculator + # which requires this routine, an infinite recursion results. To get + # around this, %l is NOT determined every time this is called so the + # recursion breaks. + + # other formats + $f{"n"}="\n"; + $f{"t"}="\t"; + $f{"%"}="%"; + $f{"+"}="+"; + + foreach $format (@format) { + $format=reverse($format); + $out=""; + while ($format ne "") { + $c=chop($format); + if ($c eq "%") { + $c=chop($format); + if ($c eq "l") { + &Date_Init(); + $date1=&DateCalc_DateDelta($Curr{"Now"},"-0:6:0:0:0:0:0"); + $date2=&DateCalc_DateDelta($Curr{"Now"},"+0:6:0:0:0:0:0"); + if (&Date_Cmp($date,$date1)>=0 && &Date_Cmp($date,$date2)<=0) { + $f{"l"}=qq|$f{"b"} $f{"e"} $h:$mn|; + } else { + $f{"l"}=qq|$f{"b"} $f{"e"} $f{"Y"}|; + } + $out .= $f{"$c"}; + } elsif (exists $f{"$c"}) { + $out .= $f{"$c"}; + } else { + $out .= $c; + } + } else { + $out .= $c; + } + } + push(@out,$out); + } + if ($scalar) { + return $out[0]; + } else { + return (@out); + } +} + +# Can't be in "use integer" because we're doing decimal arithmatic +no integer; +sub Delta_Format { + print "DEBUG: Delta_Format\n" if ($Curr{"Debug"} =~ /trace/); + my($delta,$dec,@format)=@_; + $delta=&ParseDateDelta($delta); + return "" if (! $delta); + my(@out,%f,$out,$c1,$c2,$scalar,$format)=(); + local($_)=$delta; + my($y,$M,$w,$d,$h,$m,$s)=&Delta_Split($delta); + # Get rid of positive signs. + ($y,$M,$w,$d,$h,$m,$s)=map { 1*$_; }($y,$M,$w,$d,$h,$m,$s); + + if (defined $dec && $dec>0) { + $dec="%." . ($dec*1) . "f"; + } else { + $dec="%f"; + } + + if (! wantarray) { + $format=join(" ",@format); + @format=($format); + $scalar=1; + } + + # Length of each unit in seconds + my($sl,$ml,$hl,$dl,$wl,$yl)=(); + $sl = 1; + $ml = $sl*60; + $hl = $ml*60; + $dl = $hl*24; + $wl = $dl*7; + $yl = $dl*365.25; + + # The decimal amount of each unit contained in all smaller units + my($yd,$Md,$sd,$md,$hd,$dd,$wd)=(); + if ($M) { + $yd = $M/12; + $Md = 0; + } else { + $yd = ($w*$wl + $d*$dl + $h*$hl + $m*$ml + $s*$sl)/$yl; + $Md = 0; + } + + $wd = ($d*$dl + $h*$hl + $m*$ml + $s*$sl)/$wl; + $dd = ($h*$hl + $m*$ml + $s*$sl)/$dl; + $hd = ($m*$ml + $s*$sl)/$hl; + $md = ($s*$sl)/$ml; + $sd = 0; + + # The amount of each unit contained in higher units. + my($yh,$Mh,$sh,$mh,$hh,$dh,$wh)=(); + $yh = 0; + + if ($M) { + $Mh = ($yh+$y)*12; + $wh = 0; + $dh = ($wh+$w)*7; + } else { + $Mh = 0; + $wh = ($yh+$y)*365.25/7; + $dh = ($yh+$y)*365.25 + $w*7; + } + + $hh = ($dh+$d)*24; + $mh = ($hh+$h)*60; + $sh = ($mh+$m)*60; + + # Set up the formats + + $f{"yv"} = $y; + $f{"Mv"} = $M; + $f{"wv"} = $w; + $f{"dv"} = $d; + $f{"hv"} = $h; + $f{"mv"} = $m; + $f{"sv"} = $s; + + $f{"yh"} = $y+$yh; + $f{"Mh"} = $M+$Mh; + $f{"wh"} = $w+$wh; + $f{"dh"} = $d+$dh; + $f{"hh"} = $h+$hh; + $f{"mh"} = $m+$mh; + $f{"sh"} = $s+$sh; + + $f{"yd"} = sprintf($dec,$y+$yd); + $f{"Md"} = sprintf($dec,$M+$Md); + $f{"wd"} = sprintf($dec,$w+$wd); + $f{"dd"} = sprintf($dec,$d+$dd); + $f{"hd"} = sprintf($dec,$h+$hd); + $f{"md"} = sprintf($dec,$m+$md); + $f{"sd"} = sprintf($dec,$s+$sd); + + $f{"yt"} = sprintf($dec,$yh+$y+$yd); + $f{"Mt"} = sprintf($dec,$Mh+$M+$Md); + $f{"wt"} = sprintf($dec,$wh+$w+$wd); + $f{"dt"} = sprintf($dec,$dh+$d+$dd); + $f{"ht"} = sprintf($dec,$hh+$h+$hd); + $f{"mt"} = sprintf($dec,$mh+$m+$md); + $f{"st"} = sprintf($dec,$sh+$s+$sd); + + $f{"%"} = "%"; + + foreach $format (@format) { + $format=reverse($format); + $out=""; + PARSE: while ($format) { + $c1=chop($format); + if ($c1 eq "%") { + $c1=chop($format); + if (exists($f{$c1})) { + $out .= $f{$c1}; + next PARSE; + } + $c2=chop($format); + if (exists($f{"$c1$c2"})) { + $out .= $f{"$c1$c2"}; + next PARSE; + } + $out .= $c1; + $format .= $c2; + } else { + $out .= $c1; + } + } + push(@out,$out); + } + if ($scalar) { + return $out[0]; + } else { + return (@out); + } +} +use integer; + +sub ParseRecur { + print "DEBUG: ParseRecur\n" if ($Curr{"Debug"} =~ /trace/); + &Date_Init() if (! $Curr{"InitDone"}); + + my($recur,$dateb,$date0,$date1,$flag)=@_; + local($_)=$recur; + + my($recur_0,$recur_1,@recur0,@recur1)=(); + my(@tmp,$tmp,$each,$num,$y,$m,$d,$w,$h,$mn,$s,$delta,$y0,$y1,$yb)=(); + my($yy,$n,$dd,@d,@tmp2,$date,@date,@w,@tmp3,@m,@y,$tmp2,$d2,@flags)=(); + + # $date0, $date1, $dateb, $flag : passed in (these are always the final say + # in determining whether a date matches a + # recurrence IF they are present. + # $date_b, $date_0, $date_1 : if a value can be determined from the + # $flag_t recurrence, they are stored here. + # + # If values can be determined from the recurrence AND are passed in, the + # following are used: + # max($date0,$date_0) i.e. the later of the two dates + # min($date1,$date_1) i.e. the earlier of the two dates + # + # The base date that is used is the first one defined from + # $dateb $date_b + # The base date is only used if necessary (as determined by the recur). + # For example, "every other friday" requires a base date, but "2nd + # friday of every month" doesn't. + + my($date_b,$date_0,$date_1,$flag_t); + + # + # Check the arguments passed in. + # + + $date0="" if (! defined $date0); + $date1="" if (! defined $date1); + $dateb="" if (! defined $dateb); + $flag ="" if (! defined $flag); + + if ($dateb) { + $dateb=&ParseDateString($dateb); + return "" if (! $dateb); + } + if ($date0) { + $date0=&ParseDateString($date0); + return "" if (! $date0); + } + if ($date1) { + $date1=&ParseDateString($date1); + return "" if (! $date1); + } + + # + # Parse the recur. $date_b, $date_0, and $date_e are values obtained + # from the recur. + # + + @tmp=&Recur_Split($_); + + if (@tmp) { + ($recur_0,$recur_1,$flag_t,$date_b,$date_0,$date_1)=@tmp; + $recur_0 = "" if (! defined $recur_0); + $recur_1 = "" if (! defined $recur_1); + $flag_t = "" if (! defined $flag_t); + $date_b = "" if (! defined $date_b); + $date_0 = "" if (! defined $date_0); + $date_1 = "" if (! defined $date_1); + + @recur0 = split(/:/,$recur_0); + @recur1 = split(/:/,$recur_1); + return "" if ($#recur0 + $#recur1 + 2 != 7); + + if ($date_b) { + $date_b=&ParseDateString($date_b); + return "" if (! $date_b); + } + if ($date_0) { + $date_0=&ParseDateString($date_0); + return "" if (! $date_0); + } + if ($date_1) { + $date_1=&ParseDateString($date_1); + return "" if (! $date_1); + } + + } else { + + my($mmm)='\s*'.$Lang{$Cnf{"Language"}}{"Month"}; # \s*(jan|january|...) + my(%mmm)=%{ $Lang{$Cnf{"Language"}}{"MonthH"} }; # { jan=>1, ... } + my($wkexp)='\s*'.$Lang{$Cnf{"Language"}}{"Week"}; # \s*(mon|monday|...) + my(%week)=%{ $Lang{$Cnf{"Language"}}{"WeekH"} }; # { monday=>1, ... } + my($day)='\s*'.$Lang{$Cnf{"Language"}}{"Dabb"}; # \s*(?:d|day|days) + my($month)='\s*'.$Lang{$Cnf{"Language"}}{"Mabb"}; # \s*(?:mon|month|months) + my($week)='\s*'.$Lang{$Cnf{"Language"}}{"Wabb"}; # \s*(?:w|wk|week|weeks) + my($daysexp)=$Lang{$Cnf{"Language"}}{"DoM"}; # (1st|first|...31st) + my(%dayshash)=%{ $Lang{$Cnf{"Language"}}{"DoMH"} }; + # { 1st=>1,first=>1,...} + my($of)='\s*'.$Lang{$Cnf{"Language"}}{"Of"}; # \s*(?:in|of) + my($lastexp)=$Lang{$Cnf{"Language"}}{"Last"}; # (?:last) + my($each)=$Lang{$Cnf{"Language"}}{"Each"}; # (?:each|every) + + my($D)='\s*(\d+)'; + my($Y)='\s*(\d{4}|\d{2})'; + + # Change 1st to 1 + if (/(^|[^a-z])$daysexp($|[^a-z])/i) { + $tmp=lc($2); + $tmp=$dayshash{"$tmp"}; + s/(^|[^a-z])$daysexp($|[^a-z])/$1 $tmp $3/i; + } + s/\s*$//; + + # Get rid of "each" + if (/(^|[^a-z])$each($|[^a-z])/i) { + s/(^|[^a-z])$each($|[^a-z])/$1 $2/i; + $each=1; + } else { + $each=0; + } + + if ($each) { + + if (/^$D?$day(?:$of$mmm?$Y)?$/i || + /^$D?$day(?:$of$mmm())?$/i) { + # every [2nd] day in [june] 1997 + # every [2nd] day [in june] + ($num,$m,$y)=($1,$2,$3); + $num=1 if (! defined $num); + $m="" if (! defined $m); + $y="" if (! defined $y); + + $y=$Curr{"Y"} if (! $y); + if ($m) { + $m=$mmm{lc($m)}; + $date_0=&Date_Join($y,$m,1,0,0,0); + $date_1=&DateCalc_DateDelta($date_0,"+0:1:0:0:0:0:0",0); + } else { + $date_0=&Date_Join($y, 1,1,0,0,0); + $date_1=&Date_Join($y+1,1,1,0,0,0); + } + $date_b=&DateCalc($date_0,"-0:0:0:1:0:0:0",0); + @recur0=(0,0,0,$num,0,0,0); + @recur1=(); + + } elsif (/^$D$day?$of$month(?:$of?$Y)?$/) { + # 2nd [day] of every month [in 1997] + ($num,$y)=($1,$2); + $y=$Curr{"Y"} if (! $y); + + $date_0=&Date_Join($y, 1,1,0,0,0); + $date_1=&Date_Join($y+1,1,1,0,0,0); + $date_b=$date_0; + + @recur0=(0,1,0); + @recur1=($num,0,0,0); + + } elsif (/^$D$wkexp$of$month(?:$of?$Y)?$/ || + /^($lastexp)$wkexp$of$month(?:$of?$Y)?$/) { + # 2nd tuesday of every month [in 1997] + # last tuesday of every month [in 1997] + ($num,$d,$y)=($1,$2,$3); + $y=$Curr{"Y"} if (! $y); + $d=$week{lc($d)}; + $num=-1 if ($num !~ /^$D$/); + + $date_0=&Date_Join($y,1,1,0,0,0); + $date_1=&Date_Join($y+1,1,1,0,0,0); + $date_b=$date_0; + + @recur0=(0,1); + @recur1=($num,$d,0,0,0); + + } elsif (/^$D?$wkexp(?:$of$mmm?$Y)?$/i || + /^$D?$wkexp(?:$of$mmm())?$/i) { + # every tuesday in june 1997 + # every 2nd tuesday in june 1997 + ($num,$d,$m,$y)=($1,$2,$3,$4); + $y=$Curr{"Y"} if (! $y); + $num=1 if (! defined $num); + $m="" if (! defined $m); + $d=$week{lc($d)}; + + if ($m) { + $m=$mmm{lc($m)}; + $date_0=&Date_Join($y,$m,1,0,0,0); + $date_1=&DateCalc_DateDelta($date_0,"+0:1:0:0:0:0:0",0); + } else { + $date_0=&Date_Join($y,1,1,0,0,0); + $date_1=&Date_Join($y+1,1,1,0,0,0); + } + $date_b=&DateCalc($date_0,"-0:0:0:1:0:0:0",0); + + @recur0=(0,0,$num); + @recur1=($d,0,0,0); + + } else { + return ""; + } + + $date_0="" if ($date0); + $date_1="" if ($date1); + } else { + return ""; + } + } + + # + # Override with any values passed in + # + + if ($date0 && $date_0) { + $date0=( &Date_Cmp($date0,$date_0) > 1 ? $date0 : $date_0); + } elsif ($date_0) { + $date0 = $date_0; + } + + if ($date1 && $date_1) { + $date1=( &Date_Cmp($date1,$date_1) > 1 ? $date_1 : $date1); + } elsif ($date_1) { + $date1 = $date_1; + } + + $dateb=$date_b if (! $dateb); + + if ($flag =~ s/^\+//) { + if ($flag_t) { + $flag="$flag_t,$flag"; + } + } + $flag =$flag_t if (! $flag && $flag_t); + + if (! wantarray) { + $tmp = join(":",@recur0); + $tmp .= "*" . join(":",@recur1) if (@recur1); + $tmp .= "*$flag*$dateb*$date0*$date1"; + return $tmp; + } + if (@recur0) { + return () if (! $date0 || ! $date1); # dateb is NOT required in all case + } + + # + # Some flags affect parsing. + # + + @flags = split(/,/,$flag); + my($MDn) = 0; + my($MWn) = 7; + my($f); + foreach $f (@flags) { + if ($f =~ /^MW([1-7])$/i) { + $MWn=$1; + $MDn=0; + + } elsif ($f =~ /^MD([1-7])$/i) { + $MDn=$1; + $MWn=0; + + } elsif ($f =~ /^EASTER$/i) { + ($y,$m,$w,$d,$h,$mn,$s)=(@recur0,@recur1); + # We want something that will return Jan 1 for the given years. + if ($#recur0==-1) { + @recur1=($y,1,0,1,$h,$mn,$s); + } elsif ($#recur0<=3) { + @recur0=($y,0,0,0); + @recur1=($h,$mn,$s); + } elsif ($#recur0==4) { + @recur0=($y,0,0,0,0); + @recur1=($mn,$s); + } elsif ($#recur0==5) { + @recur0=($y,0,0,0,0,0); + @recur1=($s); + } else { + @recur0=($y,0,0,0,0,0,0); + } + } + } + + # + # Determine the dates referenced by the recur. Also, fix the base date + # as necessary for the recurrences which require it. + # + + ($y,$m,$w,$d,$h,$mn,$s)=(@recur0,@recur1); + @y=@m=@w=@d=(); + my(@time)=($h,$mn,$s); + + RECUR: while (1) { + + if ($#recur0==-1) { + # * Y-M-W-D-H-MN-S + if ($y eq "0") { + push(@recur0,0); + shift(@recur1); + + } else { + @y=&ReturnList($y); + foreach $y (@y) { + $y=&Date_FixYear($y) if (length($y)==2); + return () if (length($y)!=4 || ! &IsInt($y)); + } + @y=sort { $a<=>$b } @y; + + $date0=&ParseDate("0000-01-01") if (! $date0); + $date1=&ParseDate("9999-12-31 23:59:59") if (! $date1); + + if ($m eq "0" and $w eq "0") { + # * Y-0-0-0-H-MN-S + # * Y-0-0-DOY-H-MN-S + if ($d eq "0") { + @d=(1); + } else { + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,366)); + } + @d=sort { $a<=>$b } (@d); + } + + @date=(); + foreach $yy (@y) { + foreach $d (@d) { + ($y,$m,$dd)=&Date_NthDayOfYear($yy,$d); + push(@date, &Date_Join($y,$m,$dd,0,0,0)); + } + } + last RECUR; + + } elsif ($w eq "0") { + # * Y-M-0-0-H-MN-S + # * Y-M-0-DOM-H-MN-S + + @m=&ReturnList($m); + return () if (! @m); + foreach $m (@m) { + return () if (! &IsInt($m,1,12)); + } + @m=sort { $a<=>$b } (@m); + + if ($d eq "0") { + @d=(1); + } else { + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,31)); + } + @d=sort { $a<=>$b } (@d); + } + + @date=(); + foreach $y (@y) { + foreach $m (@m) { + foreach $d (@d) { + $date=&Date_Join($y,$m,$d,0,0,0); + push(@date,$date) if ($d<29 || &Date_Split($date)); + } + } + } + last RECUR; + + } elsif ($m eq "0") { + # * Y-0-WOY-DOW-H-MN-S + # * Y-0-WOY-0-H-MN-S + @w=&ReturnList($w); + return () if (! @w); + foreach $w (@w) { + return () if (! &IsInt($w,1,53)); + } + + if ($d eq "0") { + @d=($Cnf{"FirstDay"}); + } else { + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,7)); + } + @d=sort { $a<=>$b } (@d); + } + + @date=(); + foreach $y (@y) { + foreach $w (@w) { + $w="0$w" if (length($w)==1); + foreach $d (@d) { + $date=&ParseDateString("$y-W$w-$d"); + push(@date,$date); + } + } + } + last RECUR; + + } else { + # * Y-M-WOM-DOW-H-MN-S + # * Y-M-WOM-0-H-MN-S + + @m=&ReturnList($m); + return () if (! @m); + foreach $m (@m) { + return () if (! &IsInt($m,1,12)); + } + @m=sort { $a<=>$b } (@m); + + @w=&ReturnList($w); + + if ($d eq "0") { + @d=(); + } else { + @d=&ReturnList($d); + } + + @date=&Date_Recur_WoM(\@y,\@m,\@w,\@d,$MWn,$MDn); + last RECUR; + } + } + } + + if ($#recur0==0) { + # Y * M-W-D-H-MN-S + $n=$y; + $n=1 if ($n==0); + + @m=&ReturnList($m); + return () if (! @m); + foreach $m (@m) { + return () if (! &IsInt($m,1,12)); + } + @m=sort { $a<=>$b } (@m); + + if ($m eq "0") { + # Y * 0-W-D-H-MN-S (equiv to Y-0 * W-D-H-MN-S) + push(@recur0,0); + shift(@recur1); + + } elsif ($w eq "0") { + # Y * M-0-DOM-H-MN-S + return () if (! $dateb); + $d=1 if ($d eq "0"); + + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,31)); + } + @d=sort { $a<=>$b } (@d); + + # We need to find years that are a multiple of $n from $y(base) + ($y0)=( &Date_Split($date0, 1) )[0]; + ($y1)=( &Date_Split($date1, 1) )[0]; + ($yb)=( &Date_Split($dateb, 1) )[0]; + @date=(); + for ($yy=$y0; $yy<=$y1; $yy++) { + if (($yy-$yb)%$n == 0) { + foreach $m (@m) { + foreach $d (@d) { + $date=&Date_Join($yy,$m,$d,0,0,0); + push(@date,$date) if ($d<29 || &Date_Split($date)); + } + } + } + } + last RECUR; + + } else { + # Y * M-WOM-DOW-H-MN-S + # Y * M-WOM-0-H-MN-S + return () if (! $dateb); + @m=&ReturnList($m); + @w=&ReturnList($w); + if ($d eq "0") { + @d=(); + } else { + @d=&ReturnList($d); + } + + ($y0)=( &Date_Split($date0, 1) )[0]; + ($y1)=( &Date_Split($date1, 1) )[0]; + ($yb)=( &Date_Split($dateb, 1) )[0]; + @y=(); + for ($yy=$y0; $yy<=$y1; $yy++) { + if (($yy-$yb)%$n == 0) { + push(@y,$yy); + } + } + + @date=&Date_Recur_WoM(\@y,\@m,\@w,\@d,$MWn,$MDn); + last RECUR; + } + } + + if ($#recur0==1) { + # Y-M * W-D-H-MN-S + + if ($w eq "0") { + # Y-M * 0-D-H-MN-S (equiv to Y-M-0 * D-H-MN-S) + push(@recur0,0); + shift(@recur1); + + } elsif ($m==0) { + # Y-0 * WOY-0-H-MN-S + # Y-0 * WOY-DOW-H-MN-S + return () if (! $dateb); + $n=$y; + $n=1 if ($n==0); + + @w=&ReturnList($w); + return () if (! @w); + foreach $w (@w) { + return () if (! &IsInt($w,1,53)); + } + + if ($d eq "0") { + @d=($Cnf{"FirstDay"}); + } else { + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,7)); + } + @d=sort { $a<=>$b } (@d); + } + + # We need to find years that are a multiple of $n from $y(base) + ($y0)=( &Date_Split($date0, 1) )[0]; + ($y1)=( &Date_Split($date1, 1) )[0]; + ($yb)=( &Date_Split($dateb, 1) )[0]; + @date=(); + for ($yy=$y0; $yy<=$y1; $yy++) { + if (($yy-$yb)%$n == 0) { + foreach $w (@w) { + $w="0$w" if (length($w)==1); + foreach $tmp (@d) { + $date=&ParseDateString("$yy-W$w-$tmp"); + push(@date,$date); + } + } + } + } + last RECUR; + + } else { + # Y-M * WOM-0-H-MN-S + # Y-M * WOM-DOW-H-MN-S + return () if (! $dateb); + @tmp=(@recur0); + push(@tmp,0) while ($#tmp<6); + $delta=join(":",@tmp); + @tmp=&Date_Recur($date0,$date1,$dateb,$delta); + + @w=&ReturnList($w); + @m=(); + if ($d eq "0") { + @d=(); + } else { + @d=&ReturnList($d); + } + + @date=&Date_Recur_WoM(\@tmp,\@m,\@w,\@d,$MWn,$MDn); + last RECUR; + } + } + + if ($#recur0==2) { + # Y-M-W * D-H-MN-S + + if ($d eq "0") { + # Y-M-W * 0-H-MN-S + return () if (! $dateb); + $y=1 if ($y==0 && $m==0 && $w==0); + $delta="$y:$m:$w:0:0:0:0"; + @date=&Date_Recur($date0,$date1,$dateb,$delta); + last RECUR; + + } elsif ($m==0 && $w==0) { + # Y-0-0 * DOY-H-MN-S + $y=1 if ($y==0); + $n=$y; + return () if (! $dateb && $y!=1); + + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,366)); + } + @d=sort { $a<=>$b } (@d); + + # We need to find years that are a multiple of $n from $y(base) + ($y0)=( &Date_Split($date0, 1) )[0]; + ($y1)=( &Date_Split($date1, 1) )[0]; + ($yb)=( &Date_Split($dateb, 1) )[0]; + @date=(); + for ($yy=$y0; $yy<=$y1; $yy++) { + if (($yy-$yb)%$n == 0) { + foreach $d (@d) { + ($y,$m,$dd)=&Date_NthDayOfYear($yy,$d); + push(@date, &Date_Join($y,$m,$dd,0,0,0)); + } + } + } + last RECUR; + + } elsif ($w>0) { + # Y-M-W * DOW-H-MN-S + return () if (! $dateb); + @tmp=(@recur0); + push(@tmp,0) while ($#tmp<6); + $delta=join(":",@tmp); + + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,1,7)); + } + + # Find out what DofW the basedate is. + @tmp2=&Date_Split($dateb, 1); + $tmp=&Date_DayOfWeek($tmp2[1],$tmp2[2],$tmp2[0]); + + @date=(); + foreach $d (@d) { + $date_b=$dateb; + # Move basedate to DOW + if ($d != $tmp) { + if (($tmp>=$Cnf{"FirstDay"} && $d<$Cnf{"FirstDay"}) || + ($tmp>=$Cnf{"FirstDay"} && $d>$tmp) || + ($tmp<$d && $d<$Cnf{"FirstDay"})) { + $date_b=&Date_GetNext($date_b,$d); + } else { + $date_b=&Date_GetPrev($date_b,$d); + } + } + push(@date,&Date_Recur($date0,$date1,$date_b,$delta)); + } + @date=sort(@date); + last RECUR; + + } elsif ($m>0) { + # Y-M-0 * DOM-H-MN-S + return () if (! $dateb); + @tmp=(@recur0); + push(@tmp,0) while ($#tmp<6); + $delta=join(":",@tmp); + + @d=&ReturnList($d); + return () if (! @d); + foreach $d (@d) { + return () if (! &IsInt($d,-31,31) || $d==0); + } + @d=sort { $a<=>$b } (@d); + + @tmp2=&Date_Recur($date0,$date1,$dateb,$delta); + @date=(); + foreach $date (@tmp2) { + ($y,$m)=( &Date_Split($date, 1) )[0..1]; + $tmp2=&Date_DaysInMonth($m,$y); + foreach $d (@d) { + $d2=$d; + $d2=$tmp2+1+$d if ($d<0); + push(@date,&Date_Join($y,$m,$d2,0,0,0)) if ($d2<=$tmp2); + } + } + @date=sort (@date); + last RECUR; + + } else { + return (); + } + } + + if ($#recur0>2) { + # Y-M-W-D * H-MN-S + # Y-M-W-D-H * MN-S + # Y-M-W-D-H-MN * S + # Y-M-W-D-H-S + return () if (! $dateb); + @tmp=(@recur0); + push(@tmp,0) while ($#tmp<6); + $delta=join(":",@tmp); + return () if ($delta !~ /[1-9]/); # return if "0:0:0:0:0:0:0" + @date=&Date_Recur($date0,$date1,$dateb,$delta); + if (@recur1) { + unshift(@recur1,-1) while ($#recur1<2); + @time=@recur1; + } else { + shift(@date); + pop(@date); + @time=(); + } + } + + last RECUR; + } + @date=&Date_RecurSetTime($date0,$date1,\@date,@time) if (@time); + + # + # We've got a list of dates. Operate on them with the flags. + # + + my($sign,$forw,$today,$df,$db,$work,$i); + if (@flags) { + FLAG: foreach $f (@flags) { + $f = uc($f); + + if ($f =~ /^(P|N)(D|T)([1-7])$/) { + @tmp=($1,$2,$3); + $forw =($tmp[0] eq "P" ? 0 : 1); + $today=($tmp[1] eq "D" ? 0 : 1); + $d=$tmp[2]; + @tmp=(); + foreach $date (@date) { + if ($forw) { + push(@tmp, &Date_GetNext($date,$d,$today)); + } else { + push(@tmp, &Date_GetPrev($date,$d,$today)); + } + } + @date=@tmp; + next FLAG; + } + + # We want to go forward exact amounts of time instead of + # business mode calculations so that we don't change the time + # (which may have been set in the recur). + if ($f =~ /^(F|B)(D|W)(\d+)$/) { + @tmp=($1,$2,$3); + $sign="+"; + $sign="-" if ($tmp[0] eq "B"); + $work=0; + $work=1 if ($tmp[1] eq "W"); + $n=$tmp[2]; + @tmp=(); + foreach $date (@date) { + for ($i=1; $i<=$n; $i++) { + while (1) { + $date=&DateCalc($date,"${sign}0:0:0:1:0:0:0"); + last if (! $work || &Date_IsWorkDay($date,0)); + } + } + push(@tmp,$date); + } + @date=@tmp; + next FLAG; + } + + if ($f =~ /^CW(N|P|D)$/ || $f =~ /^(N|P|D)W(D)$/) { + $tmp=$1; + my $noalt = $2 ? 1 : 0; + if ($tmp eq "N" || ($tmp eq "D" && $Cnf{"TomorrowFirst"})) { + $forw=1; + } else { + $forw=0; + } + + @tmp=(); + DATE: foreach $date (@date) { + $df=$db=$date; + if (&Date_IsWorkDay($date)) { + push(@tmp,$date); + next DATE; + } + while (1) { + if ($forw) { + $d=$df=&DateCalc($df,"+0:0:0:1:0:0:0"); + } else { + $d=$db=&DateCalc($db,"-0:0:0:1:0:0:0"); + } + if (&Date_IsWorkDay($d)) { + push(@tmp,$d); + next DATE; + } + $forw=1-$forw if (! $noalt); + } + } + @date=@tmp; + next FLAG; + } + + if ($f eq "EASTER") { + @tmp=(); + foreach $date (@date) { + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date, 1); + ($m,$d)=&Date_Easter($y); + $date=&Date_Join($y,$m,$d,$h,$mn,$s); + next if (&Date_Cmp($date,$date0)<0 || + &Date_Cmp($date,$date1)>0); + push(@tmp,$date); + } + @date=@tmp; + } + } + @date = sort(@date); + } + @date; +} + +sub Date_GetPrev { + print "DEBUG: Date_GetPrev\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$dow,$today,$hr,$min,$sec)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + my($y,$m,$d,$h,$mn,$s,$err,$curr_dow,%dow,$num,$delta,$th,$tm,$ts, + $adjust,$curr)=(); + $hr="00" if (defined $hr && $hr eq "0"); + $min="00" if (defined $min && $min eq "0"); + $sec="00" if (defined $sec && $sec eq "0"); + + if (! &Date_Split($date)) { + $date=&ParseDateString($date); + return "" if (! $date); + } + $curr=$date; + ($y,$m,$d)=( &Date_Split($date, 1) )[0..2]; + + if ($dow) { + $curr_dow=&Date_DayOfWeek($m,$d,$y); + %dow=%{ $Lang{$Cnf{"Language"}}{"WeekH"} }; + if (&IsInt($dow)) { + return "" if ($dow<1 || $dow>7); + } else { + return "" if (! exists $dow{lc($dow)}); + $dow=$dow{lc($dow)}; + } + if ($dow == $curr_dow) { + $date=&DateCalc_DateDelta($date,"-0:0:1:0:0:0:0",\$err,0) if (! $today); + $adjust=1 if ($today==2); + } else { + $dow -= 7 if ($dow>$curr_dow); # make sure previous day is less + $num = $curr_dow - $dow; + $date=&DateCalc_DateDelta($date,"-0:0:0:$num:0:0:0",\$err,0); + } + $date=&Date_SetTime($date,$hr,$min,$sec) if (defined $hr); + $date=&DateCalc_DateDelta($date,"-0:0:1:0:0:0:0",\$err,0) + if ($adjust && &Date_Cmp($date,$curr)>0); + + } else { + ($h,$mn,$s)=( &Date_Split($date, 1) )[3..5]; + ($th,$tm,$ts)=&Date_ParseTime($hr,$min,$sec); + if ($hr) { + ($hr,$min,$sec)=($th,$tm,$ts); + $delta="-0:0:0:1:0:0:0"; + } elsif ($min) { + ($hr,$min,$sec)=($h,$tm,$ts); + $delta="-0:0:0:0:1:0:0"; + } elsif ($sec) { + ($hr,$min,$sec)=($h,$mn,$ts); + $delta="-0:0:0:0:0:1:0"; + } else { + confess "ERROR: invalid arguments in Date_GetPrev.\n"; + } + + $d=&Date_SetTime($date,$hr,$min,$sec); + if ($today) { + $d=&DateCalc_DateDelta($d,$delta,\$err,0) if (&Date_Cmp($d,$date)>0); + } else { + $d=&DateCalc_DateDelta($d,$delta,\$err,0) if (&Date_Cmp($d,$date)>=0); + } + $date=$d; + } + return $date; +} + +sub Date_GetNext { + print "DEBUG: Date_GetNext\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$dow,$today,$hr,$min,$sec)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + my($y,$m,$d,$h,$mn,$s,$err,$curr_dow,%dow,$num,$delta,$th,$tm,$ts, + $adjust,$curr)=(); + $hr="00" if (defined $hr && $hr eq "0"); + $min="00" if (defined $min && $min eq "0"); + $sec="00" if (defined $sec && $sec eq "0"); + + if (! &Date_Split($date)) { + $date=&ParseDateString($date); + return "" if (! $date); + } + $curr=$date; + ($y,$m,$d)=( &Date_Split($date, 1) )[0..2]; + + if ($dow) { + $curr_dow=&Date_DayOfWeek($m,$d,$y); + %dow=%{ $Lang{$Cnf{"Language"}}{"WeekH"} }; + if (&IsInt($dow)) { + return "" if ($dow<1 || $dow>7); + } else { + return "" if (! exists $dow{lc($dow)}); + $dow=$dow{lc($dow)}; + } + if ($dow == $curr_dow) { + $date=&DateCalc_DateDelta($date,"+0:0:1:0:0:0:0",\$err,0) if (! $today); + $adjust=1 if ($today==2); + } else { + $curr_dow -= 7 if ($curr_dow>$dow); # make sure next date is greater + $num = $dow - $curr_dow; + $date=&DateCalc_DateDelta($date,"+0:0:0:$num:0:0:0",\$err,0); + } + $date=&Date_SetTime($date,$hr,$min,$sec) if (defined $hr); + $date=&DateCalc_DateDelta($date,"+0:0:1:0:0:0:0",\$err,0) + if ($adjust && &Date_Cmp($date,$curr)<0); + + } else { + ($h,$mn,$s)=( &Date_Split($date, 1) )[3..5]; + ($th,$tm,$ts)=&Date_ParseTime($hr,$min,$sec); + if ($hr) { + ($hr,$min,$sec)=($th,$tm,$ts); + $delta="+0:0:0:1:0:0:0"; + } elsif ($min) { + ($hr,$min,$sec)=($h,$tm,$ts); + $delta="+0:0:0:0:1:0:0"; + } elsif ($sec) { + ($hr,$min,$sec)=($h,$mn,$ts); + $delta="+0:0:0:0:0:1:0"; + } else { + confess "ERROR: invalid arguments in Date_GetNext.\n"; + } + + $d=&Date_SetTime($date,$hr,$min,$sec); + if ($today) { + $d=&DateCalc_DateDelta($d,$delta,\$err,0) if (&Date_Cmp($d,$date)<0); + } else { + $d=&DateCalc_DateDelta($d,$delta,\$err,0) if (&Date_Cmp($d,$date)<1); + } + $date=$d; + } + + return $date; +} + +sub Date_IsHoliday { + print "DEBUG: Date_IsHoliday\n" if ($Curr{"Debug"} =~ /trace/); + my($date)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $date=&ParseDateString($date); + return undef if (! $date); + $date=&Date_SetTime($date,0,0,0); + my($y)=(&Date_Split($date, 1))[0]; + &Date_UpdateHolidays($y) if (! exists $Holiday{"dates"}{$y}); + return undef if (! exists $Holiday{"dates"}{$y}{$date}); + my($name)=$Holiday{"dates"}{$y}{$date}; + return "" if (! $name); + $name; +} + +sub Events_List { + print "DEBUG: Events_List\n" if ($Curr{"Debug"} =~ /trace/); + my(@args)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + &Events_ParseRaw(); + + my($tmp,$date0,$date1,$flag); + $date0=&ParseDateString($args[0]); + warn "Invalid date $args[0]", return undef if (! $date0); + + if ($#args == 0) { + return &Events_Calc($date0); + } + + if ($args[1]) { + $date1=&ParseDateString($args[1]); + warn "Invalid date $args[1]\n", return undef if (! $date1); + if (&Date_Cmp($date0,$date1)>0) { + $tmp=$date1; + $date1=$date0; + $date0=$tmp; + } + } else { + $date0=&Date_SetTime($date0,"00:00:00"); + $date1=&DateCalc_DateDelta($date0,"+0:0:0:1:0:0:0"); + } + + $tmp=&Events_Calc($date0,$date1); + + $flag=$args[2]; + return $tmp if (! $flag); + + my(@tmp,%ret,$delta)=(); + @tmp=@$tmp; + push(@tmp,$date1); + + if ($flag==1) { + while ($#tmp>0) { + ($date0,$tmp)=splice(@tmp,0,2); + $date1=$tmp[0]; + $delta=&DateCalc_DateDate($date0,$date1); + foreach $flag (@$tmp) { + if (exists $ret{$flag}) { + $ret{$flag}=&DateCalc_DeltaDelta($ret{$flag},$delta); + } else { + $ret{$flag}=$delta; + } + } + } + return \%ret; + + } elsif ($flag==2) { + while ($#tmp>0) { + ($date0,$tmp)=splice(@tmp,0,2); + $date1=$tmp[0]; + $delta=&DateCalc_DateDate($date0,$date1); + $flag=join("+",sort @$tmp); + next if (! $flag); + if (exists $ret{$flag}) { + $ret{$flag}=&DateCalc_DeltaDelta($ret{$flag},$delta); + } else { + $ret{$flag}=$delta; + } + } + return \%ret; + } + + warn "Invalid flag $flag\n"; + return undef; +} + +### +# NOTE: The following routines may be called in the routines below with very +# little time penalty. +### +sub Date_SetTime { + print "DEBUG: Date_SetTime\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$h,$mn,$s)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + my($y,$m,$d)=(); + + if (! &Date_Split($date)) { + $date=&ParseDateString($date); + return "" if (! $date); + } + + ($y,$m,$d)=( &Date_Split($date, 1) )[0..2]; + ($h,$mn,$s)=&Date_ParseTime($h,$mn,$s); + + my($ampm,$wk); + return "" if (&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk)); + &Date_Join($y,$m,$d,$h,$mn,$s); +} + +sub Date_SetDateField { + print "DEBUG: Date_SetDateField\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$field,$val,$nocheck)=@_; + my($y,$m,$d,$h,$mn,$s)=(); + $nocheck=0 if (! defined $nocheck); + + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date); + + if (! $y) { + $date=&ParseDateString($date); + return "" if (! $date); + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date, 1); + } + + if (lc($field) eq "y") { + $y=$val; + } elsif (lc($field) eq "m") { + $m=$val; + } elsif (lc($field) eq "d") { + $d=$val; + } elsif (lc($field) eq "h") { + $h=$val; + } elsif (lc($field) eq "mn") { + $mn=$val; + } elsif (lc($field) eq "s") { + $s=$val; + } else { + confess "ERROR: Date_SetDateField: invalid field: $field\n"; + } + + $date=&Date_Join($y,$m,$d,$h,$mn,$s); + return $date if ($nocheck || &Date_Split($date)); + return ""; +} + +######################################################################## +# OTHER SUBROUTINES +######################################################################## +# NOTE: These routines should not call any of the routines above as +# there will be a severe time penalty (and the possibility of +# infinite recursion). The last couple routines above are +# exceptions. +# NOTE: Date_Init is a special case. It should be called (conditionally) +# in every routine that uses any variable from the Date::Manip +# namespace. +######################################################################## + +sub Date_DaysInMonth { + print "DEBUG: Date_DaysInMonth\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + my(@d_in_m)=(0,31,28,31,30,31,30,31,31,30,31,30,31); + $d_in_m[2]=29 if (&Date_LeapYear($y)); + return $d_in_m[$m]; +} + +sub Date_DayOfWeek { + print "DEBUG: Date_DayOfWeek\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + my($dayofweek,$dec31)=(); + + $dec31=5; # Dec 31, 1BC was Friday + $dayofweek=(&Date_DaysSince1BC($m,$d,$y)+$dec31) % 7; + $dayofweek=7 if ($dayofweek==0); + return $dayofweek; +} + +# Can't be in "use integer" because the numbers are too big. +no integer; +sub Date_SecsSince1970 { + print "DEBUG: Date_SecsSince1970\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y,$h,$mn,$s)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + my($sec_now,$sec_70)=(); + $sec_now=(&Date_DaysSince1BC($m,$d,$y)-1)*24*3600 + $h*3600 + $mn*60 + $s; +# $sec_70 =(&Date_DaysSince1BC(1,1,1970)-1)*24*3600; + $sec_70 =62167219200; + return ($sec_now-$sec_70); +} + +sub Date_SecsSince1970GMT { + print "DEBUG: Date_SecsSince1970GMT\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y,$h,$mn,$s)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $y=&Date_FixYear($y) if (length($y)!=4); + + my($sec)=&Date_SecsSince1970($m,$d,$y,$h,$mn,$s); + return $sec if ($Cnf{"ConvTZ"} eq "IGNORE"); + + my($tz)=$Cnf{"ConvTZ"}; + $tz=$Cnf{"TZ"} if (! $tz); + $tz=$Zone{"n2o"}{lc($tz)} if ($tz !~ /^[+-]\d{4}$/); + + my($tzs)=1; + $tzs=-1 if ($tz<0); + $tz=~/.(..)(..)/; + my($tzh,$tzm)=($1,$2); + $sec - $tzs*($tzh*3600+$tzm*60); +} +use integer; + +sub Date_DaysSince1BC { + print "DEBUG: Date_DaysSince1BC\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + my($Ny,$N4,$N100,$N400,$dayofyear,$days)=(); + my($cc,$yy)=(); + + $y=~ /(\d{2})(\d{2})/; + ($cc,$yy)=($1,$2); + + # Number of full years since Dec 31, 1BC (counting the year 0000). + $Ny=$y; + + # Number of full 4th years (incl. 0000) since Dec 31, 1BC + $N4=($Ny-1)/4 + 1; + $N4=0 if ($y==0); + + # Number of full 100th years (incl. 0000) + $N100=$cc + 1; + $N100-- if ($yy==0); + $N100=0 if ($y==0); + + # Number of full 400th years (incl. 0000) + $N400=($N100-1)/4 + 1; + $N400=0 if ($y==0); + + $dayofyear=&Date_DayOfYear($m,$d,$y); + $days= $Ny*365 + $N4 - $N100 + $N400 + $dayofyear; + + return $days; +} + +sub Date_DayOfYear { + print "DEBUG: Date_DayOfYear\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + # DinM = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + my(@days) = ( 0, 31, 59, 90,120,151,181,212,243,273,304,334,365); + my($ly)=0; + $ly=1 if ($m>2 && &Date_LeapYear($y)); + return ($days[$m-1]+$d+$ly); +} + +sub Date_DaysInYear { + print "DEBUG: Date_DaysInYear\n" if ($Curr{"Debug"} =~ /trace/); + my($y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + return 366 if (&Date_LeapYear($y)); + return 365; +} + +sub Date_WeekOfYear { + print "DEBUG: Date_WeekOfYear\n" if ($Curr{"Debug"} =~ /trace/); + my($m,$d,$y,$f)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $y=&Date_FixYear($y) if (length($y)!=4); + + my($day,$dow,$doy)=(); + $doy=&Date_DayOfYear($m,$d,$y); + + # The current DayOfYear and DayOfWeek + if ($Cnf{"Jan1Week1"}) { + $day=1; + } else { + $day=4; + } + $dow=&Date_DayOfWeek(1,$day,$y); + + # Move back to the first day of week 1. + $f-=7 if ($f>$dow); + $day-= ($dow-$f); + + return 0 if ($day>$doy); # Day is in last week of previous year + return (($doy-$day)/7 + 1); +} + +sub Date_LeapYear { + print "DEBUG: Date_LeapYear\n" if ($Curr{"Debug"} =~ /trace/); + my($y)=@_; + $y=&Date_FixYear($y) if (length($y)!=4); + return 0 unless $y % 4 == 0; + return 1 unless $y % 100 == 0; + return 0 unless $y % 400 == 0; + return 1; +} + +sub Date_DaySuffix { + print "DEBUG: Date_DaySuffix\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + return $Lang{$Cnf{"Language"}}{"DoML"}[$d-1]; +} + +sub Date_ConvTZ { + print "DEBUG: Date_ConvTZ\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$from,$to)=@_; + if (not Date_Split($date)) { + croak "date passed in ('$date') is not a Date::Manip object"; + } + + &Date_Init() if (! $Curr{"InitDone"}); + my($gmt)=(); + + if (! $from) { + + if (! $to) { + # TZ -> ConvTZ + return $date if ($Cnf{"ConvTZ"} eq "IGNORE" or ! $Cnf{"ConvTZ"}); + $from=$Cnf{"TZ"}; + $to=$Cnf{"ConvTZ"}; + + } else { + # ConvTZ,TZ -> $to + $from=$Cnf{"ConvTZ"}; + $from=$Cnf{"TZ"} if (! $from); + } + + } else { + + if (! $to) { + # $from -> ConvTZ,TZ + return $date if ($Cnf{"ConvTZ"} eq "IGNORE"); + $to=$Cnf{"ConvTZ"}; + $to=$Cnf{"TZ"} if (! $to); + + } else { + # $from -> $to + } + } + + $to=$Zone{"n2o"}{lc($to)} + if (exists $Zone{"n2o"}{lc($to)}); + $from=$Zone{"n2o"}{lc($from)} + if (exists $Zone{"n2o"}{lc($from)}); + $gmt=$Zone{"n2o"}{"gmt"}; + + return $date if ($from !~ /^[+-]\d{4}$/ or $to !~ /^[+-]\d{4}$/); + return $date if ($from eq $to); + + my($s1,$h1,$m1,$s2,$h2,$m2,$d,$h,$m,$sign,$delta,$err,$yr,$mon,$sec)=(); + # We're going to try to do the calculation without calling DateCalc. + ($yr,$mon,$d,$h,$m,$sec)=&Date_Split($date, 1); + + # Convert $date from $from to GMT + $from=~/([+-])(\d{2})(\d{2})/; + ($s1,$h1,$m1)=($1,$2,$3); + $s1= ($s1 eq "-" ? "+" : "-"); # switch sign + $sign=$s1 . "1"; # + or - 1 + + # and from GMT to $to + $to=~/([+-])(\d{2})(\d{2})/; + ($s2,$h2,$m2)=($1,$2,$3); + + if ($s1 eq $s2) { + # Both the same sign + $m+= $sign*($m1+$m2); + $h+= $sign*($h1+$h2); + } else { + $sign=($s2 eq "-" ? +1 : -1) if ($h1<$h2 || ($h1==$h2 && $m1<$m2)); + $m+= $sign*($m1-$m2); + $h+= $sign*($h1-$h2); + } + + if ($m>59) { + $h+= $m/60; + $m-= ($m/60)*60; + } elsif ($m<0) { + $h+= ($m/60 - 1); + $m-= ($m/60 - 1)*60; + } + + if ($h>23) { + $delta=$h/24; + $h -= $delta*24; + if (($d + $delta) > 28) { + $date=&Date_Join($yr,$mon,$d,$h,$m,$sec); + return &DateCalc_DateDelta($date,"+0:0:0:$delta:0:0:0",\$err,0); + } + $d+= $delta; + } elsif ($h<0) { + $delta=-$h/24 + 1; + $h += $delta*24; + if (($d - $delta) < 1) { + $date=&Date_Join($yr,$mon,$d,$h,$m,$sec); + return &DateCalc_DateDelta($date,"-0:0:0:$delta:0:0:0",\$err,0); + } + $d-= $delta; + } + return &Date_Join($yr,$mon,$d,$h,$m,$sec); +} + +sub Date_TimeZone { + print "DEBUG: Date_TimeZone\n" if ($Curr{"Debug"} =~ /trace/); + my($null,$tz,@tz,$std,$dst,$time,$isdst,$tmp,$in)=(); + &Date_Init() if (! $Curr{"InitDone"}); + + # Get timezones from all of the relevant places + + push(@tz,$Cnf{"TZ"}) if (defined $Cnf{"TZ"}); # TZ config var + push(@tz,$ENV{"TZ"}) if (defined $ENV{"TZ"}); # TZ environ var + push(@tz,$ENV{'SYS$TIMEZONE_RULE'}) + if defined $ENV{'SYS$TIMEZONE_RULE'}; # VMS TZ environ var + push(@tz,$ENV{'SYS$TIMEZONE_NAME'}) + if defined $ENV{'SYS$TIMEZONE_NAME'}; # VMS TZ name environ var + push(@tz,$ENV{'UCX$TZ'}) + if defined $ENV{'UCX$TZ'}; # VMS TZ environ var + push(@tz,$ENV{'TCPIP$TZ'}) + if defined $ENV{'TCPIP$TZ'}; # VMS TZ environ var + + # The `date` command... if we're doing taint checking, we need to + # always call it with a full path... otherwise, use the user's path. + # + # Microsoft operating systems don't have a date command built in. Try + # to trap all the various ways of knowing we are on one of these systems. + # + # We'll try `date +%Z` first, and if that fails, we'll take just the + # `date` program and assume the output is of the format: + # Thu Aug 31 14:57:46 EDT 2000 + + unless (($^X =~ /perl\.exe$/i) or + ($OS eq "Windows") or + ($OS eq "Netware") or + ($OS eq "VMS")) { + if ($Date::Manip::NoTaint) { + if ($OS eq "VMS") { + $tz=$ENV{'SYS$TIMEZONE_NAME'}; + if (! $tz) { + $tz=$ENV{'MULTINET_TIMEZONE'}; + if (! $tz) { + $tz=$ENV{'SYS$TIMEZONE_DIFFERENTIAL'}/3600.; # e.g. '-4' for EDT + } + } + } else { + $tz=`date +%Z 2> /dev/null`; + chomp($tz); + if (! $tz) { + $tz=`date 2> /dev/null`; + chomp($tz); + $tz=(split(/\s+/,$tz))[4]; + } + } + push(@tz,$tz); + } else { + # We need to satisfy taint checking, but also look in all the + # directories in @DatePath. + # + local $ENV{PATH} = join(':', @Date::Manip::DatePath); + local $ENV{BASH_ENV} = ''; + $tz=`date +%Z 2> /dev/null`; + chomp($tz); + if (! $tz) { + $tz=`date 2> /dev/null`; + chomp($tz); + $tz=(split(/\s+/,$tz))[4]; + } + push(@tz,$tz); + } + } + + push(@tz,$main::TZ) if (defined $main::TZ); # $main::TZ + + if (-s "/etc/TIMEZONE") { # /etc/TIMEZONE + $in=new IO::File; + $in->open("/etc/TIMEZONE","r"); + while (! eof($in)) { + $tmp=<$in>; + if ($tmp =~ /^TZ\s*=\s*(.*?)\s*$/) { + push(@tz,$1); + last; + } + } + $in->close; + } + + if (-s "/etc/timezone") { # /etc/timezone + $in=new IO::File; + $in->open("/etc/timezone","r"); + while (! eof($in)) { + $tmp=<$in>; + next if ($tmp =~ /^\s*\043/); + chomp($tmp); + if ($tmp =~ /^\s*(.*?)\s*$/) { + push(@tz,$1); + last; + } + } + $in->close; + } + + # Now parse each one to find the first valid one. + foreach $tz (@tz) { + $tz =~ s/\s*$//; + $tz =~ s/^\s*//; + next if (! $tz); + + return uc($tz) + if (defined $Zone{"n2o"}{lc($tz)}); + + if ($tz =~ /^[+-]\d{4}$/) { + return $tz; + } elsif ($tz =~ /^([+-]\d{2})(?::(\d{2}))?$/) { + my($h,$m)=($1,$2); + $m="00" if (! $m); + return "$h$m"; + } + + # Handle US/Eastern format + if ($tz =~ /^$Zone{"tzones"}$/i) { + $tmp=lc $1; + $tz=$Zone{"tz2z"}{$tmp}; + } + + # Handle STD#DST# format (and STD-#DST-# formats) + if ($tz =~ /^([a-z]+)-?\d([a-z]+)-?\d?$/i) { + ($std,$dst)=($1,$2); + next if (! defined $Zone{"n2o"}{lc($std)} or + ! defined $Zone{"n2o"}{lc($dst)}); + $time = time(); + ($null,$null,$null,$null,$null,$null,$null,$null,$isdst) = + localtime($time); + return uc($dst) if ($isdst); + return uc($std); + } + } + + confess "ERROR: Date::Manip unable to determine TimeZone.\n"; +} + +# Returns 1 if $date is a work day. If $time is non-zero, the time is +# also checked to see if it falls within work hours. Returns "" if +# an invalid date is passed in. +sub Date_IsWorkDay { + print "DEBUG: Date_IsWorkDay\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$time)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $date=&ParseDateString($date); + return "" if (! $date); + my($d)=$date; + $d=&Date_SetTime($date,$Cnf{"WorkDayBeg"}) if (! $time); + + my($y,$mon,$day,$tmp,$h,$m,$dow)=(); + ($y,$mon,$day,$h,$m,$tmp)=&Date_Split($d, 1); + $dow=&Date_DayOfWeek($mon,$day,$y); + + return 0 if ($dow<$Cnf{"WorkWeekBeg"} or + $dow>$Cnf{"WorkWeekEnd"} or + "$h:$m" lt $Cnf{"WorkDayBeg"} or + "$h:$m" gt $Cnf{"WorkDayEnd"}); + + if (! exists $Holiday{"dates"}{$y}) { + # There will be recursion problems if we ever end up here twice. + $Holiday{"dates"}{$y}={}; + &Date_UpdateHolidays($y) + } + $d=&Date_SetTime($date,"00:00:00"); + return 0 if (exists $Holiday{"dates"}{$y}{$d}); + 1; +} + +# Finds the day $off work days from now. If $time is passed in, we must +# also take into account the time of day. +# +# If $time is not passed in, day 0 is today (if today is a workday) or the +# next work day if it isn't. In any case, the time of day is unaffected. +# +# If $time is passed in, day 0 is now (if now is part of a workday) or the +# start of the very next work day. +sub Date_NextWorkDay { + print "DEBUG: Date_NextWorkDay\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$off,$time)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $date=&ParseDateString($date); + my($err)=(); + + if (! &Date_IsWorkDay($date,$time)) { + if ($time) { + while (1) { + $date=&Date_GetNext($date,undef,0,$Cnf{"WorkDayBeg"}); + last if (&Date_IsWorkDay($date,$time)); + } + } else { + while (1) { + $date=&DateCalc_DateDelta($date,"+0:0:0:1:0:0:0",\$err,0); + last if (&Date_IsWorkDay($date,$time)); + } + } + } + + while ($off>0) { + while (1) { + $date=&DateCalc_DateDelta($date,"+0:0:0:1:0:0:0",\$err,0); + last if (&Date_IsWorkDay($date,$time)); + } + $off--; + } + + return $date; +} + +# Finds the day $off work days before now. If $time is passed in, we must +# also take into account the time of day. +# +# If $time is not passed in, day 0 is today (if today is a workday) or the +# previous work day if it isn't. In any case, the time of day is unaffected. +# +# If $time is passed in, day 0 is now (if now is part of a workday) or the +# end of the previous work period. Note that since the end of a work day +# will automatically be turned into the start of the next one, this time +# may actually be treated as AFTER the current time. +sub Date_PrevWorkDay { + print "DEBUG: Date_PrevWorkDay\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$off,$time)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $date=&ParseDateString($date); + my($err)=(); + + if (! &Date_IsWorkDay($date,$time)) { + if ($time) { + while (1) { + $date=&Date_GetPrev($date,undef,0,$Cnf{"WorkDayEnd"}); + last if (&Date_IsWorkDay($date,$time)); + } + while (1) { + $date=&Date_GetNext($date,undef,0,$Cnf{"WorkDayBeg"}); + last if (&Date_IsWorkDay($date,$time)); + } + } else { + while (1) { + $date=&DateCalc_DateDelta($date,"-0:0:0:1:0:0:0",\$err,0); + last if (&Date_IsWorkDay($date,$time)); + } + } + } + + while ($off>0) { + while (1) { + $date=&DateCalc_DateDelta($date,"-0:0:0:1:0:0:0",\$err,0); + last if (&Date_IsWorkDay($date,$time)); + } + $off--; + } + + return $date; +} + +# This finds the nearest workday to $date. If $date is a workday, it +# is returned. +sub Date_NearestWorkDay { + print "DEBUG: Date_NearestWorkDay\n" if ($Curr{"Debug"} =~ /trace/); + my($date,$tomorrow)=@_; + &Date_Init() if (! $Curr{"InitDone"}); + $date=&ParseDateString($date); + my($a,$b,$dela,$delb,$err)=(); + $tomorrow=$Cnf{"TomorrowFirst"} if (! defined $tomorrow); + + return $date if (&Date_IsWorkDay($date)); + + # Find the nearest one. + if ($tomorrow) { + $dela="+0:0:0:1:0:0:0"; + $delb="-0:0:0:1:0:0:0"; + } else { + $dela="-0:0:0:1:0:0:0"; + $delb="+0:0:0:1:0:0:0"; + } + $a=$b=$date; + + while (1) { + $a=&DateCalc_DateDelta($a,$dela,\$err); + return $a if (&Date_IsWorkDay($a)); + $b=&DateCalc_DateDelta($b,$delb,\$err); + return $b if (&Date_IsWorkDay($b)); + } +} + +# &Date_NthDayOfYear($y,$n); +# Returns a list of (YYYY,MM,DD,HH,MM,SS) for the Nth day of the year. +sub Date_NthDayOfYear { + no integer; + print "DEBUG: Date_NthDayOfYear\n" if ($Curr{"Debug"} =~ /trace/); + my($y,$n)=@_; + $y=$Curr{"Y"} if (! $y); + $n=1 if (! defined $n or $n eq ""); + $n+=0; # to turn 023 into 23 + $y=&Date_FixYear($y) if (length($y)<4); + my $leap=&Date_LeapYear($y); + return () if ($n<1); + return () if ($n >= ($leap ? 367 : 366)); + + my(@d_in_m)=(31,28,31,30,31,30,31,31,30,31,30,31); + $d_in_m[1]=29 if ($leap); + + # Calculate the hours, minutes, and seconds into the day. + my $remain=($n - int($n))*24; + my $h=int($remain); + $remain=($remain - $h)*60; + my $mn=int($remain); + $remain=($remain - $mn)*60; + my $s=$remain; + + # Calculate the month and the day. + my($m,$d)=(0,0); + $n=int($n); + while ($n>0) { + $m++; + if ($n<=$d_in_m[0]) { + $d=int($n); + $n=0; + } else { + $n-= $d_in_m[0]; + shift(@d_in_m); + } + } + + ($y,$m,$d,$h,$mn,$s); +} + +######################################################################## +# NOT FOR EXPORT +######################################################################## + +# This is used in Date_Init to fill in a hash based on international +# data. It takes a list of keys and values and returns both a hash +# with these values and a regular expression of keys. +# +# IN: +# $data = [ key1 val1 key2 val2 ... ] +# $opts = lc : lowercase the keys in the regexp +# sort : sort (by length) the keys in the regexp +# back : create a regexp with a back reference +# escape : escape all strings in the regexp +# +# OUT: +# $regexp = '(?:key1|key2|...)' +# $hash = { key1=>val1 key2=>val2 ... } + +sub Date_InitHash { + print "DEBUG: Date_InitHash\n" if ($Curr{"Debug"} =~ /trace/); + my($data,$regexp,$opts,$hash)=@_; + my(@data)=@$data; + my($key,$val,@list)=(); + + # Parse the options + my($lc,$sort,$back,$escape)=(0,0,0,0); + $lc=1 if ($opts =~ /lc/i); + $sort=1 if ($opts =~ /sort/i); + $back=1 if ($opts =~ /back/i); + $escape=1 if ($opts =~ /escape/i); + + # Create the hash + while (@data) { + ($key,$val,@data)=@data; + $key=lc($key) if ($lc); + $$hash{$key}=$val; + } + + # Create the regular expression + if ($regexp) { + @list=keys(%$hash); + @list=sort sortByLength(@list) if ($sort); + if ($escape) { + foreach $val (@list) { + $val="\Q$val\E"; + } + } + if ($back) { + $$regexp="(" . join("|",@list) . ")"; + } else { + $$regexp="(?:" . join("|",@list) . ")"; + } + } +} + +# This is used in Date_Init to fill in regular expressions, lists, and +# hashes based on international data. It takes a list of lists which have +# to be stored as regular expressions (to find any element in the list), +# lists, and hashes (indicating the location in the lists). +# +# IN: +# $data = [ [ [ valA1 valA2 ... ][ valA1' valA2' ... ] ... ] +# [ [ valB1 valB2 ... ][ valB1' valB2' ... ] ... ] +# ... +# [ [ valZ1 valZ2 ... ] [valZ1' valZ1' ... ] ... ] ] +# $lists = [ \@listA \@listB ... \@listZ ] +# $opts = lc : lowercase the values in the regexp +# sort : sort (by length) the values in the regexp +# back : create a regexp with a back reference +# escape : escape all strings in the regexp +# $hash = [ \%hash, TYPE ] +# TYPE 0 : $hash{ valBn=>n-1 } +# TYPE 1 : $hash{ valBn=>n } +# +# OUT: +# $regexp = '(?:valA1|valA2|...|valB1|...)' +# $lists = [ [ valA1 valA2 ... ] # only the 1st list (or +# [ valB1 valB2 ... ] ... ] # 2nd for int. characters) +# $hash + +sub Date_InitLists { + print "DEBUG: Date_InitLists\n" if ($Curr{"Debug"} =~ /trace/); + my($data,$regexp,$opts,$lists,$hash)=@_; + my(@data)=@$data; + my(@lists)=@$lists; + my($i,@ele,$ele,@list,$j,$tmp)=(); + + # Parse the options + my($lc,$sort,$back,$escape)=(0,0,0,0); + $lc=1 if ($opts =~ /lc/i); + $sort=1 if ($opts =~ /sort/i); + $back=1 if ($opts =~ /back/i); + $escape=1 if ($opts =~ /escape/i); + + # Set each of the lists + if (@lists) { + confess "ERROR: Date_InitLists: lists must be 1 per data\n" + if ($#lists != $#data); + for ($i=0; $i<=$#data; $i++) { + @ele=@{ $data[$i] }; + if ($Cnf{"IntCharSet"} && $#ele>0) { + @{ $lists[$i] } = @{ $ele[1] }; + } else { + @{ $lists[$i] } = @{ $ele[0] }; + } + } + } + + # Create the hash + my($hashtype,$hashsave,%hash)=(); + if (@$hash) { + ($hash,$hashtype)=@$hash; + $hashsave=1; + } else { + $hashtype=0; + $hashsave=0; + } + for ($i=0; $i<=$#data; $i++) { + @ele=@{ $data[$i] }; + foreach $ele (@ele) { + @list = @{ $ele }; + for ($j=0; $j<=$#list; $j++) { + $tmp=$list[$j]; + next if (! $tmp); + $tmp=lc($tmp) if ($lc); + $hash{$tmp}= $j+$hashtype; + } + } + } + %$hash = %hash if ($hashsave); + + # Create the regular expression + if ($regexp) { + @list=keys(%hash); + @list=sort sortByLength(@list) if ($sort); + if ($escape) { + foreach $ele (@list) { + $ele="\Q$ele\E"; + } + } + if ($back) { + $$regexp="(" . join("|",@list) . ")"; + } else { + $$regexp="(?:" . join("|",@list) . ")"; + } + } +} + +# This is used in Date_Init to fill in regular expressions and lists based +# on international data. This takes a list of strings and returns a regular +# expression (to find any one of them). +# +# IN: +# $data = [ string1 string2 ... ] +# $opts = lc : lowercase the values in the regexp +# sort : sort (by length) the values in the regexp +# back : create a regexp with a back reference +# escape : escape all strings in the regexp +# +# OUT: +# $regexp = '(string1|string2|...)' + +sub Date_InitStrings { + print "DEBUG: Date_InitStrings\n" if ($Curr{"Debug"} =~ /trace/); + my($data,$regexp,$opts)=@_; + my(@list)=@{ $data }; + + # Parse the options + my($lc,$sort,$back,$escape)=(0,0,0,0); + $lc=1 if ($opts =~ /lc/i); + $sort=1 if ($opts =~ /sort/i); + $back=1 if ($opts =~ /back/i); + $escape=1 if ($opts =~ /escape/i); + + # Create the regular expression + my($ele)=(); + @list=sort sortByLength(@list) if ($sort); + if ($escape) { + foreach $ele (@list) { + $ele="\Q$ele\E"; + } + } + if ($back) { + $$regexp="(" . join("|",@list) . ")"; + } else { + $$regexp="(?:" . join("|",@list) . ")"; + } + $$regexp=lc($$regexp) if ($lc); +} + +# items is passed in (either as a space separated string, or a reference to +# a list) and a regular expression which matches any one of the items is +# prepared. The regular expression will be of one of the forms: +# "(a|b)" @list not empty, back option included +# "(?:a|b)" @list not empty +# "()" @list empty, back option included +# "" @list empty +# $options is a string which contains any of the following strings: +# back : the regular expression has a backreference +# opt : the regular expression is optional and a "?" is appended in +# the first two forms +# optws : the regular expression is optional and may be replaced by +# whitespace +# optWs : the regular expression is optional, but if not present, must +# be replaced by whitespace +# sort : the items in the list are sorted by length (longest first) +# lc : the string is lowercased +# under : any underscores are converted to spaces +# pre : it may be preceded by whitespace +# Pre : it must be preceded by whitespace +# PRE : it must be preceded by whitespace or the start +# post : it may be followed by whitespace +# Post : it must be followed by whitespace +# POST : it must be followed by whitespace or the end +# Spaces due to pre/post options will not be included in the back reference. +# +# If $array is included, then the elements will also be returned as a list. +# $array is a string which may contain any of the following: +# keys : treat the list as a hash and only the keys go into the regexp +# key0 : treat the list as the values of a hash with keys 0 .. N-1 +# key1 : treat the list as the values of a hash with keys 1 .. N +# val0 : treat the list as the keys of a hash with values 0 .. N-1 +# val1 : treat the list as the keys of a hash with values 1 .. N + +# &Date_InitLists([$lang{"month_name"},$lang{"month_abb"}], +# [\$Month,"lc,sort,back"], +# [\@Month,\@Mon], +# [\%Month,1]); + +# This is used in Date_Init to prepare regular expressions. A list of +# items is passed in (either as a space separated string, or a reference to +# a list) and a regular expression which matches any one of the items is +# prepared. The regular expression will be of one of the forms: +# "(a|b)" @list not empty, back option included +# "(?:a|b)" @list not empty +# "()" @list empty, back option included +# "" @list empty +# $options is a string which contains any of the following strings: +# back : the regular expression has a backreference +# opt : the regular expression is optional and a "?" is appended in +# the first two forms +# optws : the regular expression is optional and may be replaced by +# whitespace +# optWs : the regular expression is optional, but if not present, must +# be replaced by whitespace +# sort : the items in the list are sorted by length (longest first) +# lc : the string is lowercased +# under : any underscores are converted to spaces +# pre : it may be preceded by whitespace +# Pre : it must be preceded by whitespace +# PRE : it must be preceded by whitespace or the start +# post : it may be followed by whitespace +# Post : it must be followed by whitespace +# POST : it must be followed by whitespace or the end +# Spaces due to pre/post options will not be included in the back reference. +# +# If $array is included, then the elements will also be returned as a list. +# $array is a string which may contain any of the following: +# keys : treat the list as a hash and only the keys go into the regexp +# key0 : treat the list as the values of a hash with keys 0 .. N-1 +# key1 : treat the list as the values of a hash with keys 1 .. N +# val0 : treat the list as the keys of a hash with values 0 .. N-1 +# val1 : treat the list as the keys of a hash with values 1 .. N +sub Date_Regexp { + print "DEBUG: Date_Regexp\n" if ($Curr{"Debug"} =~ /trace/); + my($list,$options,$array)=@_; + my(@list,$ret,%hash,$i)=(); + local($_)=(); + $options="" if (! defined $options); + $array="" if (! defined $array); + + my($sort,$lc,$under)=(0,0,0); + $sort =1 if ($options =~ /sort/i); + $lc =1 if ($options =~ /lc/i); + $under=1 if ($options =~ /under/i); + my($back,$opt,$pre,$post,$ws)=("?:","","","",""); + $back ="" if ($options =~ /back/i); + $opt ="?" if ($options =~ /opt/i); + $pre ='\s*' if ($options =~ /pre/); + $pre ='\s+' if ($options =~ /Pre/); + $pre ='(?:\s+|^)' if ($options =~ /PRE/); + $post ='\s*' if ($options =~ /post/); + $post ='\s+' if ($options =~ /Post/); + $post ='(?:$|\s+)' if ($options =~ /POST/); + $ws ='\s*' if ($options =~ /optws/); + $ws ='\s+' if ($options =~ /optws/); + + my($hash,$keys,$key0,$key1,$val0,$val1)=(0,0,0,0,0,0); + $keys =1 if ($array =~ /keys/i); + $key0 =1 if ($array =~ /key0/i); + $key1 =1 if ($array =~ /key1/i); + $val0 =1 if ($array =~ /val0/i); + $val1 =1 if ($array =~ /val1/i); + $hash =1 if ($keys or $key0 or $key1 or $val0 or $val1); + + my($ref)=ref $list; + if (! $ref) { + $list =~ s/\s*$//; + $list =~ s/^\s*//; + $list =~ s/\s+/&&&/g; + } elsif ($ref eq "ARRAY") { + $list = join("&&&",@$list); + } else { + confess "ERROR: Date_Regexp.\n"; + } + + if (! $list) { + if ($back eq "") { + return "()"; + } else { + return ""; + } + } + + $list=lc($list) if ($lc); + $list=~ s/_/ /g if ($under); + @list=split(/&&&/,$list); + if ($keys) { + %hash=@list; + @list=keys %hash; + } elsif ($key0 or $key1 or $val0 or $val1) { + $i=0; + $i=1 if ($key1 or $val1); + if ($key0 or $key1) { + %hash= map { $_,$i++ } @list; + } else { + %hash= map { $i++,$_ } @list; + } + } + @list=sort sortByLength(@list) if ($sort); + + $ret="($back" . join("|",@list) . ")"; + $ret="(?:$pre$ret$post)" if ($pre or $post); + $ret.=$opt; + $ret="(?:$ret|$ws)" if ($ws); + + if ($array and $hash) { + return ($ret,%hash); + } elsif ($array) { + return ($ret,@list); + } else { + return $ret; + } +} + +# This will produce a delta with the correct number of signs. At most two +# signs will be in it normally (one before the year, and one in front of +# the day), but if appropriate, signs will be in front of all elements. +# Also, as many of the signs will be equivalent as possible. +sub Delta_Normalize { + print "DEBUG: Delta_Normalize\n" if ($Curr{"Debug"} =~ /trace/); + my($delta,$mode)=@_; + return "" if (! $delta); + return "+0:+0:+0:+0:+0:+0:+0" + if ($delta =~ /^([+-]?0+:){6}[+-]?0+$/ and $Cnf{"DeltaSigns"}); + return "+0:0:0:0:0:0:0" if ($delta =~ /^([+-]?0+:){6}[+-]?0+$/); + + my($tmp,$sign1,$sign2,$len)=(); + + # Calculate the length of the day in minutes + $len=24*60; + $len=$Curr{"WDlen"} if ($mode==2 || $mode==3); + + # We have to get the sign of every component explicitely so that a "-0" + # or "+0" doesn't get lost by treating it numerically (i.e. "-0:0:2" must + # be a negative delta). + + my($y,$mon,$w,$d,$h,$m,$s)=&Delta_Split($delta); + + # We need to make sure that the signs of all parts of a delta are the + # same. The easiest way to do this is to convert all of the large + # components to the smallest ones, then convert the smaller components + # back to the larger ones. + + # Do the year/month part + + $mon += $y*12; # convert y to m + $sign1="+"; + if ($mon<0) { + $mon *= -1; + $sign1="-"; + } + + $y = $mon/12; # convert m to y + $mon -= $y*12; + + $y=0 if ($y eq "-0"); # get around silly -0 problem + $mon=0 if ($mon eq "-0"); + + # Do the wk/day/hour/min/sec part + + { + # Unfortunately, $s is overflowing for dates more than ~70 years + # apart. + no integer; + + if ($mode==3 || $mode==2) { + $s += $d*$len*60 + $h*3600 + $m*60; # convert d/h/m to s + } else { + $s += ($d+7*$w)*$len*60 + $h*3600 + $m*60; # convert w/d/h/m to s + } + $sign2="+"; + if ($s<0) { + $s*=-1; + $sign2="-"; + } + + $m = int($s/60); # convert s to m + $s -= $m*60; + $d = int($m/$len); # convert m to d + $m -= $d*$len; + + # The rest should be fine. + } + $h = $m/60; # convert m to h + $m -= $h*60; + if ($mode == 3 || $mode == 2) { + $w = $w*1; # get around +0 problem + } else { + $w = $d/7; # convert d to w + $d -= $w*7; + } + + $w=0 if ($w eq "-0"); # get around silly -0 problem + $d=0 if ($d eq "-0"); + $h=0 if ($h eq "-0"); + $m=0 if ($m eq "-0"); + $s=0 if ($s eq "-0"); + + # Only include two signs if necessary + $sign1=$sign2 if ($y==0 and $mon==0); + $sign2=$sign1 if ($w==0 and $d==0 and $h==0 and $m==0 and $s==0); + $sign2="" if ($sign1 eq $sign2 and ! $Cnf{"DeltaSigns"}); + + if ($Cnf{"DeltaSigns"}) { + return "$sign1$y:$sign1$mon:$sign2$w:$sign2$d:$sign2$h:$sign2$m:$sign2$s"; + } else { + return "$sign1$y:$mon:$sign2$w:$d:$h:$m:$s"; + } +} + +# This checks a delta to make sure it is valid. If it is, it splits +# it and returns the elements with a sign on each. The 2nd argument +# specifies the default sign. Blank elements are set to 0. If the +# third element is non-nil, exactly 7 elements must be included. +sub Delta_Split { + print "DEBUG: Delta_Split\n" if ($Curr{"Debug"} =~ /trace/); + my($delta,$sign,$exact)=@_; + my(@delta)=split(/:/,$delta); + return () if ($exact and $#delta != 6); + my($i)=(); + $sign="+" if (! defined $sign); + for ($i=0; $i<=$#delta; $i++) { + $delta[$i]="0" if (! $delta[$i]); + return () if ($delta[$i] !~ /^[+-]?\d+$/); + $sign = ($delta[$i] =~ s/^([+-])// ? $1 : $sign); + $delta[$i] = $sign.$delta[$i]; + } + @delta; +} + +# Reads up to 3 arguments. $h may contain the time in any international +# format. Any empty elements are set to 0. +sub Date_ParseTime { + print "DEBUG: Date_ParseTime\n" if ($Curr{"Debug"} =~ /trace/); + my($h,$m,$s)=@_; + my($t)=&CheckTime("one"); + + if (defined $h and $h =~ /$t/) { + $h=$1; + $m=$2; + $s=$3 if (defined $3); + } + $h="00" if (! defined $h); + $m="00" if (! defined $m); + $s="00" if (! defined $s); + + ($h,$m,$s); +} + +# Forms a date with the 6 elements passed in (all of which must be defined). +# No check as to validity is made. +sub Date_Join { + print "DEBUG: Date_Join\n" if ($Curr{"Debug"} =~ /trace/); + foreach (0 .. $#_) { + croak "undefined arg $_ to Date_Join()" if not defined $_[$_]; + } + my($y,$m,$d,$h,$mn,$s)=@_; + my($ym,$md,$dh,$hmn,$mns)=(); + + if ($Cnf{"Internal"} == 0) { + $ym=$md=$dh=""; + $hmn=$mns=":"; + + } elsif ($Cnf{"Internal"} == 1) { + $ym=$md=$dh=$hmn=$mns=""; + + } elsif ($Cnf{"Internal"} == 2) { + $ym=$md="-"; + $dh=" "; + $hmn=$mns=":"; + + } else { + confess "ERROR: Invalid internal format in Date_Join.\n"; + } + $m="0$m" if (length($m)==1); + $d="0$d" if (length($d)==1); + $h="0$h" if (length($h)==1); + $mn="0$mn" if (length($mn)==1); + $s="0$s" if (length($s)==1); + "$y$ym$m$md$d$dh$h$hmn$mn$mns$s"; +} + +# This checks a time. If it is valid, it splits it and returns 3 elements. +# If "one" or "two" is passed in, a regexp with 1/2 or 2 digit hours is +# returned. +sub CheckTime { + print "DEBUG: CheckTime\n" if ($Curr{"Debug"} =~ /trace/); + my($time)=@_; + my($h)='(?:0?[0-9]|1[0-9]|2[0-3])'; + my($h2)='(?:0[0-9]|1[0-9]|2[0-3])'; + my($m)='[0-5][0-9]'; + my($s)=$m; + my($hm)="(?:". $Lang{$Cnf{"Language"}}{"SepHM"} ."|:)"; + my($ms)="(?:". $Lang{$Cnf{"Language"}}{"SepMS"} ."|:)"; + my($ss)=$Lang{$Cnf{"Language"}}{"SepSS"}; + my($t)="^($h)$hm($m)(?:$ms($s)(?:$ss\\d+)?)?\$"; + if ($time eq "one") { + return $t; + } elsif ($time eq "two") { + $t="^($h2)$hm($m)(?:$ms($s)(?:$ss\\d+)?)?\$"; + return $t; + } + + if ($time =~ /$t/i) { + ($h,$m,$s)=($1,$2,$3); + $h="0$h" if (length($h)<2); + $m="0$m" if (length($m)<2); + $s="00" if (! defined $s); + return ($h,$m,$s); + } else { + return (); + } +} + +# This checks a recurrence. If it is valid, it splits it and returns the +# elements. Otherwise, it returns an empty list. +# ($recur0,$recur1,$flags,$dateb,$date0,$date1)=&Recur_Split($recur); +sub Recur_Split { + print "DEBUG: Recur_Split\n" if ($Curr{"Debug"} =~ /trace/); + my($recur)=@_; + my(@ret,@tmp); + + my($R) = '(\*?(?:[-,0-9]+[:\*]){6}[-,0-9]+)'; + my($F) = '(?:\*([^*]*))'; + my($DB,$D0,$D1); + $DB=$D0=$D1=$F; + + if ($recur =~ /^$R$F?$DB?$D0?$D1?$/) { + @ret=($1,$2,$3,$4,$5); + @tmp=split(/\*/,shift(@ret)); + return () if ($#tmp>1); + return (@tmp,"",@ret) if ($#tmp==0); + return (@tmp,@ret); + } + return (); +} + +# This checks a date. If it is valid, it splits it and returns the elements. +# If no date is passed in, it returns a regular expression for the date. +# +# The optional second argument says 'I really expect this to be a +# valid Date::Manip object, please throw an exception if it is +# not'. Otherwise, errors are signalled by returning (). +# +sub Date_Split { + print "DEBUG: Date_Split\n" if ($Curr{"Debug"} =~ /trace/); + my($date, $definitely_valid)=@_; + $definitely_valid = 0 if not defined $definitely_valid; + my($ym,$md,$dh,$hmn,$mns)=(); + my($y)='(\d{4})'; + my($m)='(0[1-9]|1[0-2])'; + my($d)='(0[1-9]|[1-2][0-9]|3[0-1])'; + my($h)='([0-1][0-9]|2[0-3])'; + my($mn)='([0-5][0-9])'; + my($s)=$mn; + + if ($Cnf{"Internal"} == 0) { + $ym=$md=$dh=""; + $hmn=$mns=":"; + + } elsif ($Cnf{"Internal"} == 1) { + $ym=$md=$dh=$hmn=$mns=""; + + } elsif ($Cnf{"Internal"} == 2) { + $ym=$md="-"; + $dh=" "; + $hmn=$mns=":"; + + } else { + confess "ERROR: Invalid internal format in Date_Split.\n"; + } + + my($t)="^$y$ym$m$md$d$dh$h$hmn$mn$mns$s\$"; + + if (not defined $date or $date eq '') { + if ($definitely_valid) { + die "bad date '$date'"; + } else { + return $t; + } + } + + if ($date =~ /$t/) { + ($y,$m,$d,$h,$mn,$s)=($1,$2,$3,$4,$5,$6); + my(@d_in_m)=(0,31,28,31,30,31,30,31,31,30,31,30,31); + $d_in_m[2]=29 if (&Date_LeapYear($y)); + if ($d>$d_in_m[$m]) { + my $msg = "invalid date $date: day $d of month $m, but only $d_in_m[$m] days in that month"; + if ($definitely_valid) { + die $msg; + } + else { + warn $msg; + return (); + } + } + return ($y,$m,$d,$h,$mn,$s); + } + + if ($definitely_valid) { + die "invalid date $date: doesn't match regexp $t"; + } + return (); +} + +# This returns the date easter occurs on for a given year as ($month,$day). +# This is from the Calendar FAQ. +sub Date_Easter { + my($y)=@_; + $y=&Date_FixYear($y) if (length($y)==2); + + my($c) = $y/100; + my($g) = $y % 19; + my($k) = ($c-17)/25; + my($i) = ($c - $c/4 - ($c-$k)/3 + 19*$g + 15) % 30; + $i = $i - ($i/28)*(1 - ($i/28)*(29/($i+1))*((21-$g)/11)); + my($j) = ($y + $y/4 + $i + 2 - $c + $c/4) % 7; + my($l) = $i-$j; + my($m) = 3 + ($l+40)/44; + my($d) = $l + 28 - 31*($m/4); + return ($m,$d); +} + +# This takes a list of years, months, WeekOfMonth's, and optionally +# DayOfWeek's, and returns a list of dates. Optionally, a list of dates +# can be passed in as the 1st argument (with the 2nd argument the null list) +# and the year/month of these will be used. +# +# If $FDn is non-zero, the first week of the month contains the first +# occurence of this day (1=Monday). If $FIn is non-zero, the first week of +# the month contains the date (i.e. $FIn'th day of the month). +sub Date_Recur_WoM { + my($y,$m,$w,$d,$FDn,$FIn)=@_; + my(@y)=@$y; + my(@m)=@$m; + my(@w)=@$w; + my(@d)=@$d; + my($date0,$date1,@tmp,@date,$d0,$d1,@tmp2)=(); + + if (@m) { + @tmp=(); + foreach $y (@y) { + return () if (length($y)==1 || length($y)==3 || ! &IsInt($y,0,9999)); + $y=&Date_FixYear($y) if (length($y)==2); + push(@tmp,$y); + } + @y=sort { $a<=>$b } (@tmp); + + return () if (! @m); + foreach $m (@m) { + return () if (! &IsInt($m,1,12)); + } + @m=sort { $a<=>$b } (@m); + + @tmp=@tmp2=(); + foreach $y (@y) { + foreach $m (@m) { + push(@tmp,$y); + push(@tmp2,$m); + } + } + + @y=@tmp; + @m=@tmp2; + + } else { + foreach $d0 (@y) { + @tmp=&Date_Split($d0); + return () if (! @tmp); + push(@tmp2,$tmp[0]); + push(@m,$tmp[1]); + } + @y=@tmp2; + } + + return () if (! @w); + foreach $w (@w) { + return () if ($w==0 || ! &IsInt($w,-5,5)); + } + + if (@d) { + foreach $d (@d) { + return () if (! &IsInt($d,1,7)); + } + @d=sort { $a<=>$b } (@d); + } + + @date=(); + foreach $y (@y) { + $m=shift(@m); + + # Find 1st day of this month and next month + $date0=&Date_Join($y,$m,1,0,0,0); + $date1=&DateCalc($date0,"+0:1:0:0:0:0:0"); + + if (@d) { + foreach $d (@d) { + # Find 1st occurence of DOW (in both months) + $d0=&Date_GetNext($date0,$d,1); + $d1=&Date_GetNext($date1,$d,1); + + @tmp=(); + while (&Date_Cmp($d0,$d1)<0) { + push(@tmp,$d0); + $d0=&DateCalc($d0,"+0:0:1:0:0:0:0"); + } + + @tmp2=(); + foreach $w (@w) { + if ($w>0) { + push(@tmp2,$tmp[$w-1]); + } else { + push(@tmp2,$tmp[$#tmp+1+$w]); + } + } + @tmp2=sort(@tmp2); + push(@date,@tmp2); + } + + } else { + # Find 1st day of 1st week + if ($FDn != 0) { + $date0=&Date_GetNext($date0,$FDn,1); + } else { + $date0=&Date_Join($y,$m,$FIn,0,0,0); + } + $date0=&Date_GetPrev($date0,$Cnf{"FirstDay"},1); + + # Find 1st day of 1st week of next month + if ($FDn != 0) { + $date1=&Date_GetNext($date1,$FDn,1); + } else { + $date1=&DateCalc($date1,"+0:0:0:".($FIn-1).":0:0:0") if ($FIn>1); + } + $date1=&Date_GetPrev($date1,$Cnf{"FirstDay"},1); + + @tmp=(); + while (&Date_Cmp($date0,$date1)<0) { + push(@tmp,$date0); + $date0=&DateCalc($date0,"+0:0:1:0:0:0:0"); + } + + @tmp2=(); + foreach $w (@w) { + if ($w>0) { + push(@tmp2,$tmp[$w-1]); + } else { + push(@tmp2,$tmp[$#tmp+1+$w]); + } + } + @tmp2=sort(@tmp2); + push(@date,@tmp2); + } + } + + @date; +} + +# This returns a sorted list of dates formed by adding/subtracting +# $delta to $dateb in the range $date0<=$d<$dateb. The first date int +# the list is actually the first date<$date0 and the last date in the +# list is the first date>=$date1 (because sometimes the set part will +# move the date back into the range). +sub Date_Recur { + my($date0,$date1,$dateb,$delta)=@_; + my(@ret,$d)=(); + + while (&Date_Cmp($dateb,$date0)<0) { + $dateb=&DateCalc_DateDelta($dateb,$delta); + } + while (&Date_Cmp($dateb,$date1)>=0) { + $dateb=&DateCalc_DateDelta($dateb,"-$delta"); + } + + # Add the dates $date0..$dateb + $d=$dateb; + while (&Date_Cmp($d,$date0)>=0) { + unshift(@ret,$d); + $d=&DateCalc_DateDelta($d,"-$delta"); + } + # Add the first date earler than the range + unshift(@ret,$d); + + # Add the dates $dateb..$date1 + $d=&DateCalc_DateDelta($dateb,$delta); + while (&Date_Cmp($d,$date1)<0) { + push(@ret,$d); + $d=&DateCalc_DateDelta($d,$delta); + } + # Add the first date later than the range + push(@ret,$d); + + @ret; +} + +# This sets the values in each date of a recurrence. +# +# $h,$m,$s can each be values or lists "1-2,4". If any are equal to "-1", +# they are not set (and none of the larger elements are set). +sub Date_RecurSetTime { + my($date0,$date1,$dates,$h,$m,$s)=@_; + my(@dates)=@$dates; + my(@h,@m,@s,$date,@tmp)=(); + + $m="-1" if ($s eq "-1"); + $h="-1" if ($m eq "-1"); + + if ($h ne "-1") { + @h=&ReturnList($h); + return () if ! (@h); + @h=sort { $a<=>$b } (@h); + + @tmp=(); + foreach $date (@dates) { + foreach $h (@h) { + push(@tmp,&Date_SetDateField($date,"h",$h,1)); + } + } + @dates=@tmp; + } + + if ($m ne "-1") { + @m=&ReturnList($m); + return () if ! (@m); + @m=sort { $a<=>$b } (@m); + + @tmp=(); + foreach $date (@dates) { + foreach $m (@m) { + push(@tmp,&Date_SetDateField($date,"mn",$m,1)); + } + } + @dates=@tmp; + } + + if ($s ne "-1") { + @s=&ReturnList($s); + return () if ! (@s); + @s=sort { $a<=>$b } (@s); + + @tmp=(); + foreach $date (@dates) { + foreach $s (@s) { + push(@tmp,&Date_SetDateField($date,"s",$s,1)); + } + } + @dates=@tmp; + } + + @tmp=(); + foreach $date (@dates) { + push(@tmp,$date) if (&Date_Cmp($date,$date0)>=0 && + &Date_Cmp($date,$date1)<0 && + &Date_Split($date)); + } + + @tmp; +} + +sub DateCalc_DateDate { + print "DEBUG: DateCalc_DateDate\n" if ($Curr{"Debug"} =~ /trace/); + my($D1,$D2,$mode)=@_; + my(@d_in_m)=(0,31,28,31,30,31,30,31,31,30,31,30,31); + $mode=0 if (! defined $mode); + + # Exact mode + if ($mode==0) { + my($y1,$m1,$d1,$h1,$mn1,$s1)=&Date_Split($D1, 1); + my($y2,$m2,$d2,$h2,$mn2,$s2)=&Date_Split($D2, 1); + my($i,@delta,$d,$delta,$y)=(); + + # form the delta for hour/min/sec + $delta[4]=$h2-$h1; + $delta[5]=$mn2-$mn1; + $delta[6]=$s2-$s1; + + # form the delta for yr/mon/day + $delta[0]=$delta[1]=0; + $d=0; + if ($y2>$y1) { + $d=&Date_DaysInYear($y1) - &Date_DayOfYear($m1,$d1,$y1); + $d+=&Date_DayOfYear($m2,$d2,$y2); + for ($y=$y1+1; $y<$y2; $y++) { + $d+= &Date_DaysInYear($y); + } + } elsif ($y2<$y1) { + $d=&Date_DaysInYear($y2) - &Date_DayOfYear($m2,$d2,$y2); + $d+=&Date_DayOfYear($m1,$d1,$y1); + for ($y=$y2+1; $y<$y1; $y++) { + $d+= &Date_DaysInYear($y); + } + $d *= -1; + } else { + $d=&Date_DayOfYear($m2,$d2,$y2) - &Date_DayOfYear($m1,$d1,$y1); + } + $delta[2]=0; + $delta[3]=$d; + + for ($i=0; $i<7; $i++) { + $delta[$i]="+".$delta[$i] if ($delta[$i]>=0); + } + + $delta=join(":",@delta); + $delta=&Delta_Normalize($delta,0); + return $delta; + } + + my($date1,$date2)=($D1,$D2); + my($tmp,$sign,$err,@tmp)=(); + + # make sure both are work days + if ($mode==2 || $mode==3) { + $date1=&Date_NextWorkDay($date1,0,1); + $date2=&Date_NextWorkDay($date2,0,1); + } + + # make sure date1 comes before date2 + if (&Date_Cmp($date1,$date2)>0) { + $sign="-"; + $tmp=$date1; + $date1=$date2; + $date2=$tmp; + } else { + $sign="+"; + } + if (&Date_Cmp($date1,$date2)==0) { + return "+0:+0:+0:+0:+0:+0:+0" if ($Cnf{"DeltaSigns"}); + return "+0:0:0:0:0:0:0"; + } + + my($y1,$m1,$d1,$h1,$mn1,$s1)=&Date_Split($date1, 1); + my($y2,$m2,$d2,$h2,$mn2,$s2)=&Date_Split($date2, 1); + my($dy,$dm,$dw,$dd,$dh,$dmn,$ds,$ddd)=(0,0,0,0,0,0,0,0); + + if ($mode != 3) { + + # Do years + $dy=$y2-$y1; + $dm=0; + if ($dy>0) { + $tmp=&DateCalc_DateDelta($date1,"+$dy:0:0:0:0:0:0",\$err,0); + if (&Date_Cmp($tmp,$date2)>0) { + $dy--; + $tmp=$date1; + $tmp=&DateCalc_DateDelta($date1,"+$dy:0:0:0:0:0:0",\$err,0) + if ($dy>0); + $dm=12; + } + $date1=$tmp; + } + + # Do months + $dm+=$m2-$m1; + if ($dm>0) { + $tmp=&DateCalc_DateDelta($date1,"+0:$dm:0:0:0:0:0",\$err,0); + if (&Date_Cmp($tmp,$date2)>0) { + $dm--; + $tmp=$date1; + $tmp=&DateCalc_DateDelta($date1,"+0:$dm:0:0:0:0:0",\$err,0) + if ($dm>0); + } + $date1=$tmp; + } + + # At this point, check to see that we're on a business day again so that + # Aug 3 (Monday) -> Sep 3 (Sunday) -> Sep 4 (Monday) = 1 month + if ($mode==2) { + if (! &Date_IsWorkDay($date1,0)) { + $date1=&Date_NextWorkDay($date1,0,1); + } + } + } + + # Do days + if ($mode==2 || $mode==3) { + $dd=0; + while (1) { + $tmp=&Date_NextWorkDay($date1,1,1); + if (&Date_Cmp($tmp,$date2)<=0) { + $dd++; + $date1=$tmp; + } else { + last; + } + } + + } else { + ($y1,$m1,$d1)=( &Date_Split($date1, 1) )[0..2]; + $dd=0; + # If we're jumping across months, set $d1 to the first of the next month + # (or possibly the 0th of next month which is equivalent to the last day + # of this month) + if ($m1!=$m2) { + $d_in_m[2]=29 if (&Date_LeapYear($y1)); + $dd=$d_in_m[$m1]-$d1+1; + $d1=1; + $tmp=&DateCalc_DateDelta($date1,"+0:0:0:$dd:0:0:0",\$err,0); + if (&Date_Cmp($tmp,$date2)>0) { + $dd--; + $d1--; + $tmp=&DateCalc_DateDelta($date1,"+0:0:0:$dd:0:0:0",\$err,0); + } + $date1=$tmp; + } + + $ddd=0; + if ($d1<$d2) { + $ddd=$d2-$d1; + $tmp=&DateCalc_DateDelta($date1,"+0:0:0:$ddd:0:0:0",\$err,0); + if (&Date_Cmp($tmp,$date2)>0) { + $ddd--; + $tmp=&DateCalc_DateDelta($date1,"+0:0:0:$ddd:0:0:0",\$err,0); + } + $date1=$tmp; + } + $dd+=$ddd; + } + + # in business mode, make sure h1 comes before h2 (if not find delta between + # now and end of day and move to start of next business day) + $d1=( &Date_Split($date1, 1) )[2]; + $dh=$dmn=$ds=0; + if ($mode==2 || $mode==3 and $d1 != $d2) { + $tmp=&Date_SetTime($date1,$Cnf{"WorkDayEnd"}); + $tmp=&DateCalc_DateDelta($tmp,"+0:0:0:0:0:1:0") + if ($Cnf{"WorkDay24Hr"}); + $tmp=&DateCalc_DateDate($date1,$tmp,0); + ($tmp,$tmp,$tmp,$tmp,$dh,$dmn,$ds)=&Delta_Split($tmp); + $date1=&Date_NextWorkDay($date1,1,0); + $date1=&Date_SetTime($date1,$Cnf{"WorkDayBeg"}); + $d1=( &Date_Split($date1, 1) )[2]; + confess "ERROR: DateCalc DateDate Business.\n" if ($d1 != $d2); + } + + # Hours, minutes, seconds + $tmp=&DateCalc_DateDate($date1,$date2,0); + @tmp=&Delta_Split($tmp); + $dh += $tmp[4]; + $dmn += $tmp[5]; + $ds += $tmp[6]; + + $tmp="$sign$dy:$dm:0:$dd:$dh:$dmn:$ds"; + &Delta_Normalize($tmp,$mode); +} + +sub DateCalc_DeltaDelta { + print "DEBUG: DateCalc_DeltaDelta\n" if ($Curr{"Debug"} =~ /trace/); + my($D1,$D2,$mode)=@_; + my(@delta1,@delta2,$i,$delta,@delta)=(); + $mode=0 if (! defined $mode); + + @delta1=&Delta_Split($D1); + @delta2=&Delta_Split($D2); + for ($i=0; $i<7; $i++) { + $delta[$i]=$delta1[$i]+$delta2[$i]; + $delta[$i]="+".$delta[$i] if ($delta[$i]>=0); + } + + $delta=join(":",@delta); + $delta=&Delta_Normalize($delta,$mode); + return $delta; +} + +sub DateCalc_DateDelta { + print "DEBUG: DateCalc_DateDelta\n" if ($Curr{"Debug"} =~ /trace/); + my($D1,$D2,$errref,$mode)=@_; + my($date)=(); + my(@d_in_m)=(0,31,28,31,30,31,30,31,31,30,31,30,31); + my($h1,$m1,$h2,$m2,$len,$hh,$mm)=(); + $mode=0 if (! defined $mode); + + if ($mode==2 || $mode==3) { + $h1=$Curr{"WDBh"}; + $m1=$Curr{"WDBm"}; + $h2=$Curr{"WDEh"}; + $m2=$Curr{"WDEm"}; + $hh=$h2-$h1; + $mm=$m2-$m1; + if ($mm<0) { + $hh--; + $mm+=60; + } + } + + # Date, delta + my($y,$m,$d,$h,$mn,$s)=&Date_Split($D1, 1); + my($dy,$dm,$dw,$dd,$dh,$dmn,$ds)=&Delta_Split($D2); + + # do the month/year part + $y+=$dy; + while (length($y)<4) { + $y = "0$y"; + } + &ModuloAddition(-12,$dm,\$m,\$y); # -12 means 1-12 instead of 0-11 + $d_in_m[2]=29 if (&Date_LeapYear($y)); + + # if we have gone past the last day of a month, move the date back to + # the last day of the month + if ($d>$d_in_m[$m]) { + $d=$d_in_m[$m]; + } + + # do the week part + if ($mode==0 || $mode==1) { + $dd += $dw*7; + } else { + $date=&DateCalc_DateDelta(&Date_Join($y,$m,$d,$h,$mn,$s), + "+0:0:$dw:0:0:0:0",0); + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date, 1); + } + + # in business mode, set the day to a work day at this point so the h/mn/s + # stuff will work out + if ($mode==2 || $mode==3) { + $d=$d_in_m[$m] if ($d>$d_in_m[$m]); + $date=&Date_NextWorkDay(&Date_Join($y,$m,$d,$h,$mn,$s),0,1); + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date, 1); + } + + # seconds, minutes, hours + &ModuloAddition(60,$ds,\$s,\$mn); + if ($mode==2 || $mode==3) { + while (1) { + &ModuloAddition(60,$dmn,\$mn,\$h); + $h+= $dh; + + if ($h>$h2 or $h==$h2 && $mn>$m2) { + $dh=$h-$h2; + $dmn=$mn-$m2; + $h=$h1; + $mn=$m1; + $dd++; + + } elsif ($h<$h1 or $h==$h1 && $mn<$m1) { + $dh=$h-$h1; + $dmn=$m1-$mn; + $h=$h2; + $mn=$m2; + $dd--; + + } elsif ($h==$h2 && $mn==$m2) { + $dd++; + $dh=-$hh; + $dmn=-$mm; + + } else { + last; + } + } + + } else { + &ModuloAddition(60,$dmn,\$mn,\$h); + &ModuloAddition(24,$dh,\$h,\$d); + } + + # If we have just gone past the last day of the month, we need to make + # up for this: + if ($d>$d_in_m[$m]) { + $dd+= $d-$d_in_m[$m]; + $d=$d_in_m[$m]; + } + + # days + if ($mode==2 || $mode==3) { + if ($dd>=0) { + $date=&Date_NextWorkDay(&Date_Join($y,$m,$d,$h,$mn,$s),$dd,1); + } else { + $date=&Date_PrevWorkDay(&Date_Join($y,$m,$d,$h,$mn,$s),-$dd,1); + } + ($y,$m,$d,$h,$mn,$s)=&Date_Split($date, 1); + + } else { + $d_in_m[2]=29 if (&Date_LeapYear($y)); + $d=$d_in_m[$m] if ($d>$d_in_m[$m]); + $d += $dd; + while ($d<1) { + $m--; + if ($m==0) { + $m=12; + $y--; + if (&Date_LeapYear($y)) { + $d_in_m[2]=29; + } else { + $d_in_m[2]=28; + } + } + $d += $d_in_m[$m]; + } + while ($d>$d_in_m[$m]) { + $d -= $d_in_m[$m]; + $m++; + if ($m==13) { + $m=1; + $y++; + if (&Date_LeapYear($y)) { + $d_in_m[2]=29; + } else { + $d_in_m[2]=28; + } + } + } + } + + if ($y<0 or $y>9999) { + $$errref=3; + return; + } + &Date_Join($y,$m,$d,$h,$mn,$s); +} + +sub Date_UpdateHolidays { + print "DEBUG: Date_UpdateHolidays\n" if ($Curr{"Debug"} =~ /trace/); + my($year)=@_; + $Holiday{"year"}=$year; + $Holiday{"dates"}{$year}={}; + + my($date,$delta,$err)=(); + my($key,@tmp,$tmp); + + foreach $key (keys %{ $Holiday{"desc"} }) { + @tmp=&Recur_Split($key); + if (@tmp) { + $tmp=&ParseDateString("${year}010100:00:00"); + ($date)=&ParseRecur($key,$tmp,$tmp,($year+1)."-01-01"); + next if (! $date); + + } elsif ($key =~ /^(.*)([+-].*)$/) { + # Date +/- Delta + ($date,$delta)=($1,$2); + $tmp=&ParseDateString("$date $year"); + if ($tmp) { + $date=$tmp; + } else { + $date=&ParseDateString($date); + next if ($date !~ /^$year/); + } + $date=&DateCalc($date,$delta,\$err,0); + + } else { + # Date + $date=$key; + $tmp=&ParseDateString("$date $year"); + if ($tmp) { + $date=$tmp; + } else { + $date=&ParseDateString($date); + next if ($date !~ /^$year/); + } + } + $Holiday{"dates"}{$year}{$date}=$Holiday{"desc"}{$key}; + } +} + +# This sets a Date::Manip config variable. +sub Date_SetConfigVariable { + print "DEBUG: Date_SetConfigVariable\n" if ($Curr{"Debug"} =~ /trace/); + my($var,$val)=@_; + + # These are most appropriate for command line options instead of in files. + $Cnf{"PathSep"}=$val, return if ($var =~ /^PathSep$/i); + $Cnf{"PersonalCnf"}=$val, return if ($var =~ /^PersonalCnf$/i); + $Cnf{"PersonalCnfPath"}=$val, return if ($var =~ /^PersonalCnfPath$/i); + &EraseHolidays(), return if ($var =~ /^EraseHolidays$/i); + $Cnf{"IgnoreGlobalCnf"}=1, return if ($var =~ /^IgnoreGlobalCnf$/i); + $Cnf{"GlobalCnf"}=$val, return if ($var =~ /^GlobalCnf$/i); + + $Curr{"InitLang"}=1, + $Cnf{"Language"}=$val, return if ($var =~ /^Language$/i); + $Cnf{"DateFormat"}=$val, return if ($var =~ /^DateFormat$/i); + $Cnf{"TZ"}=$val, return if ($var =~ /^TZ$/i); + $Cnf{"ConvTZ"}=$val, return if ($var =~ /^ConvTZ$/i); + $Cnf{"Internal"}=$val, return if ($var =~ /^Internal$/i); + $Cnf{"FirstDay"}=$val, return if ($var =~ /^FirstDay$/i); + $Cnf{"WorkWeekBeg"}=$val, return if ($var =~ /^WorkWeekBeg$/i); + $Cnf{"WorkWeekEnd"}=$val, return if ($var =~ /^WorkWeekEnd$/i); + $Cnf{"WorkDayBeg"}=$val, + $Curr{"ResetWorkDay"}=1, return if ($var =~ /^WorkDayBeg$/i); + $Cnf{"WorkDayEnd"}=$val, + $Curr{"ResetWorkDay"}=1, return if ($var =~ /^WorkDayEnd$/i); + $Cnf{"WorkDay24Hr"}=$val, + $Curr{"ResetWorkDay"}=1, return if ($var =~ /^WorkDay24Hr$/i); + $Cnf{"DeltaSigns"}=$val, return if ($var =~ /^DeltaSigns$/i); + $Cnf{"Jan1Week1"}=$val, return if ($var =~ /^Jan1Week1$/i); + $Cnf{"YYtoYYYY"}=$val, return if ($var =~ /^YYtoYYYY$/i); + $Cnf{"UpdateCurrTZ"}=$val, return if ($var =~ /^UpdateCurrTZ$/i); + $Cnf{"IntCharSet"}=$val, return if ($var =~ /^IntCharSet$/i); + $Curr{"DebugVal"}=$val, return if ($var =~ /^Debug$/i); + $Cnf{"TomorrowFirst"}=$val, return if ($var =~ /^TomorrowFirst$/i); + $Cnf{"ForceDate"}=$val, return if ($var =~ /^ForceDate$/i); + + confess "ERROR: Unknown configuration variable $var in Date::Manip.\n"; +} + +sub EraseHolidays { + print "DEBUG: EraseHolidays\n" if ($Curr{"Debug"} =~ /trace/); + + $Cnf{"EraseHolidays"}=0; + delete $Holiday{"list"}; + $Holiday{"list"}={}; + delete $Holiday{"desc"}; + $Holiday{"desc"}={}; + $Holiday{"dates"}={}; +} + +# This returns a pointer to a list of times and events in the format +# [ date, [ events ], date, [ events ], ... ] +# where each list of events are events that are in effect at the date +# immediately preceding the list. +# +# This takes either one date or two dates as arguments. +sub Events_Calc { + print "DEBUG: Events_Calc\n" if ($Curr{"Debug"} =~ /trace/); + + my($date0,$date1)=@_; + + my($tmp); + $date0=&ParseDateString($date0); + return undef if (! $date0); + if ($date1) { + $date1=&ParseDateString($date1); + if (&Date_Cmp($date0,$date1)>0) { + $tmp=$date1; + $date1=$date0; + $date0=$tmp; + } + } else { + $date1=&DateCalc_DateDelta($date0,"+0:0:0:0:0:0:1"); + } + + # + # [ d0,d1,del,name ] => [ d0, d1+del ) + # [ d0,0,del,name ] => [ d0, d0+del ) + # + my(%ret,$d0,$d1,$del,$name,$c0,$c1); + my(@tmp)=@{ $Events{"dates"} }; + DATE: while (@tmp) { + ($d0,$d1,$del,$name)=splice(@tmp,0,4); + $d0=&ParseDateString($d0); + $d1=&ParseDateString($d1) if ($d1); + $del=&ParseDateDelta($del) if ($del); + if ($d1) { + if ($del) { + $d1=&DateCalc_DateDelta($d1,$del); + } + } else { + $d1=&DateCalc_DateDelta($d0,$del); + } + if (&Date_Cmp($d0,$d1)>0) { + $tmp=$d1; + $d1=$d0; + $d0=$tmp; + } + # [ date0,date1 ) + # [ d0,d1 ) OR [ d0,d1 ) + next DATE if (&Date_Cmp($d1,$date0)<=0 || + &Date_Cmp($d0,$date1)>=0); + # [ date0,date1 ) + # [ d0,d1 ) + # [ d0, d1 ) + if (&Date_Cmp($d0,$date0)<=0) { + push @{ $ret{$date0} },$name; + push @{ $ret{$d1} },"!$name" if (&Date_Cmp($d1,$date1)<0); + next DATE; + } + # [ date0,date1 ) + # [ d0,d1 ) + if (&Date_Cmp($d1,$date1)>=0) { + push @{ $ret{$d0} },$name; + next DATE; + } + # [ date0,date1 ) + # [ d0,d1 ) + push @{ $ret{$d0} },$name; + push @{ $ret{$d1} },"!$name"; + } + + # + # [ recur,delta0,delta1,name ] => [ {date-delta0},{date+delta1} ) + # + my($rec,$del0,$del1,@d); + @tmp=@{ $Events{"recur"} }; + RECUR: while (@tmp) { + ($rec,$del0,$del1,$name)=splice(@tmp,0,4); + @d=(); + + } + + # Sort them AND take into account the "!$name" entries. + my(%tmp,$date,@tmp2,@ret); + @d=sort { &Date_Cmp($a,$b) } keys %ret; + foreach $date (@d) { + @tmp=@{ $ret{$date} }; + @tmp2=(); + foreach $tmp (@tmp) { + push(@tmp2,$tmp), next if ($tmp =~ /^!/); + $tmp{$tmp}=1; + } + foreach $tmp (@tmp2) { + $tmp =~ s/^!//; + delete $tmp{$tmp}; + } + push(@ret,$date,[ keys %tmp ]); + } + + return \@ret; +} + +# This parses the raw events list +sub Events_ParseRaw { + print "DEBUG: Events_ParseRaw\n" if ($Curr{"Debug"} =~ /trace/); + + # Only need to be parsed once + my($force)=@_; + $Events{"parsed"}=0 if ($force); + return if ($Events{"parsed"}); + $Events{"parsed"}=1; + + my(@events)=@{ $Events{"raw"} }; + my($event,$name,@event,$date0,$date1,$tmp,$delta,$recur0,$recur1,@recur,$r, + $recur); + EVENT: while (@events) { + ($event,$name)=splice(@events,0,2); + @event=split(/\s*;\s*/,$event); + + if ($#event == 0) { + + if ($date0=&ParseDateString($event[0])) { + # + # date = event + # + $tmp=&ParseDateString("$event[0] 00:00:00"); + if ($tmp && $tmp eq $date0) { + $delta="+0:0:0:1:0:0:0"; + } else { + $delta="+0:0:0:0:1:0:0"; + } + push @{ $Events{"dates"} },($date0,0,$delta,$name); + + } elsif ($recur=&ParseRecur($event[0])) { + # + # recur = event + # + ($recur0,$recur1)=&Recur_Split($recur); + if ($recur0) { + if ($recur1) { + $r="$recur0:$recur1"; + } else { + $r=$recur0; + } + } else { + $r=$recur1; + } + (@recur)=split(/:/,$r); + if (pop(@recur)==0 && pop(@recur)==0 && pop(@recur)==0) { + $delta="+0:0:0:1:0:0:0"; + } else { + $delta="+0:0:0:0:1:0:0"; + } + push @{ $Events{"recur"} },($recur,0,$delta,$name); + + } else { + # ??? = event + warn "WARNING: illegal event ignored [ @event ]\n"; + next EVENT; + } + + } elsif ($#event == 1) { + + if ($date0=&ParseDateString($event[0])) { + + if ($date1=&ParseDateString($event[1])) { + # + # date ; date = event + # + $tmp=&ParseDateString("$event[1] 00:00:00"); + if ($tmp && $tmp eq $date1) { + $date1=&DateCalc_DateDelta($date1,"+0:0:0:1:0:0:0"); + } + push @{ $Events{"dates"} },($date0,$date1,0,$name); + + } elsif ($delta=&ParseDateDelta($event[1])) { + # + # date ; delta = event + # + push @{ $Events{"dates"} },($date0,0,$delta,$name); + + } else { + # date ; ??? = event + warn "WARNING: illegal event ignored [ @event ]\n"; + next EVENT; + } + + } elsif ($recur=&ParseRecur($event[0])) { + + if ($delta=&ParseDateDelta($event[1])) { + # + # recur ; delta = event + # + push @{ $Events{"recur"} },($recur,0,$delta,$name); + + } else { + # recur ; ??? = event + warn "WARNING: illegal event ignored [ @event ]\n"; + next EVENT; + } + + } else { + # ??? ; ??? = event + warn "WARNING: illegal event ignored [ @event ]\n"; + next EVENT; + } + + } else { + # date ; delta0 ; delta1 = event + # recur ; delta0 ; delta1 = event + # ??? ; ??? ; ??? ... = event + warn "WARNING: illegal event ignored [ @event ]\n"; + next EVENT; + } + } +} + +# This reads an init file. +sub Date_InitFile { + print "DEBUG: Date_InitFile\n" if ($Curr{"Debug"} =~ /trace/); + my($file)=@_; + my($in)=new IO::File; + local($_)=(); + my($section)="vars"; + my($var,$val,$recur,$name)=(); + + $in->open($file) || return; + while(defined ($_=<$in>)) { + chomp; + s/^\s+//; + s/\s+$//; + next if (! $_ or /^\#/); + + if (/^\*holiday/i) { + $section="holiday"; + &EraseHolidays() if ($section =~ /holiday/i && $Cnf{"EraseHolidays"}); + next; + } elsif (/^\*events/i) { + $section="events"; + next; + } + + if ($section =~ /var/i) { + confess "ERROR: invalid Date::Manip config file line.\n $_\n" + if (! /(.*\S)\s*=\s*(.*)$/); + ($var,$val)=($1,$2); + &Date_SetConfigVariable($var,$val); + + } elsif ($section =~ /holiday/i) { + confess "ERROR: invalid Date::Manip config file line.\n $_\n" + if (! /(.*\S)\s*=\s*(.*)$/); + ($recur,$name)=($1,$2); + $name="" if (! defined $name); + $Holiday{"desc"}{$recur}=$name; + + } elsif ($section =~ /events/i) { + confess "ERROR: invalid Date::Manip config file line.\n $_\n" + if (! /(.*\S)\s*=\s*(.*)$/); + ($val,$var)=($1,$2); + push @{ $Events{"raw"} },($val,$var); + + } else { + # A section not currently used by Date::Manip (but may be + # used by some extension to it). + next; + } + } + close($in); +} + +# $flag=&Date_TimeCheck(\$h,\$mn,\$s,\$ampm); +# Returns 1 if any of the fields are bad. All fields are optional, and +# all possible checks are done on the data. If a field is not passed in, +# it is set to default values. If data is missing, appropriate defaults +# are supplied. +sub Date_TimeCheck { + print "DEBUG: Date_TimeCheck\n" if ($Curr{"Debug"} =~ /trace/); + my($h,$mn,$s,$ampm)=@_; + my($tmp1,$tmp2,$tmp3)=(); + + $$h="" if (! defined $$h); + $$mn="" if (! defined $$mn); + $$s="" if (! defined $$s); + $$ampm="" if (! defined $$ampm); + $$ampm=uc($$ampm) if ($$ampm); + + # Check hour + $tmp1=$Lang{$Cnf{"Language"}}{"AmPm"}; + $tmp2=""; + if ($$ampm =~ /^$tmp1$/i) { + $tmp3=$Lang{$Cnf{"Language"}}{"AM"}; + $tmp2="AM" if ($$ampm =~ /^$tmp3$/i); + $tmp3=$Lang{$Cnf{"Language"}}{"PM"}; + $tmp2="PM" if ($$ampm =~ /^$tmp3$/i); + } elsif ($$ampm) { + return 1; + } + if ($tmp2 eq "AM" || $tmp2 eq "PM") { + $$h="0$$h" if (length($$h)==1); + return 1 if ($$h<1 || $$h>12); + $$h="00" if ($tmp2 eq "AM" and $$h==12); + $$h += 12 if ($tmp2 eq "PM" and $$h!=12); + } else { + $$h="00" if ($$h eq ""); + $$h="0$$h" if (length($$h)==1); + return 1 if (! &IsInt($$h,0,23)); + $tmp2="AM" if ($$h<12); + $tmp2="PM" if ($$h>=12); + } + $$ampm=$Lang{$Cnf{"Language"}}{"AMstr"}; + $$ampm=$Lang{$Cnf{"Language"}}{"PMstr"} if ($tmp2 eq "PM"); + + # Check minutes + $$mn="00" if ($$mn eq ""); + $$mn="0$$mn" if (length($$mn)==1); + return 1 if (! &IsInt($$mn,0,59)); + + # Check seconds + $$s="00" if ($$s eq ""); + $$s="0$$s" if (length($$s)==1); + return 1 if (! &IsInt($$s,0,59)); + + return 0; +} + +# $flag=&Date_DateCheck(\$y,\$m,\$d,\$h,\$mn,\$s,\$ampm,\$wk); +# Returns 1 if any of the fields are bad. All fields are optional, and +# all possible checks are done on the data. If a field is not passed in, +# it is set to default values. If data is missing, appropriate defaults +# are supplied. +# +# If the flag UpdateHolidays is set, the year is set to +# CurrHolidayYear. +sub Date_DateCheck { + print "DEBUG: Date_DateCheck\n" if ($Curr{"Debug"} =~ /trace/); + my($y,$m,$d,$h,$mn,$s,$ampm,$wk)=@_; + my($tmp1,$tmp2,$tmp3)=(); + + my(@d_in_m)=(0,31,28,31,30,31,30,31,31,30,31,30,31); + my($curr_y)=$Curr{"Y"}; + my($curr_m)=$Curr{"M"}; + my($curr_d)=$Curr{"D"}; + $$m=1, $$d=1 if (defined $$y and ! defined $$m and ! defined $$d); + $$y="" if (! defined $$y); + $$m="" if (! defined $$m); + $$d="" if (! defined $$d); + $$wk="" if (! defined $$wk); + $$d=$curr_d if ($$y eq "" and $$m eq "" and $$d eq ""); + + # Check year. + $$y=$curr_y if ($$y eq ""); + $$y=&Date_FixYear($$y) if (length($$y)<4); + return 1 if (! &IsInt($$y,0,9999)); + $d_in_m[2]=29 if (&Date_LeapYear($$y)); + + # Check month + $$m=$curr_m if ($$m eq ""); + $$m=$Lang{$Cnf{"Language"}}{"MonthH"}{lc($$m)} + if (exists $Lang{$Cnf{"Language"}}{"MonthH"}{lc($$m)}); + $$m="0$$m" if (length($$m)==1); + return 1 if (! &IsInt($$m,1,12)); + + # Check day + $$d="01" if ($$d eq ""); + $$d="0$$d" if (length($$d)==1); + return 1 if (! &IsInt($$d,1,$d_in_m[$$m])); + if ($$wk) { + $tmp1=&Date_DayOfWeek($$m,$$d,$$y); + $tmp2=$Lang{$Cnf{"Language"}}{"WeekH"}{lc($$wk)} + if (exists $Lang{$Cnf{"Language"}}{"WeekH"}{lc($$wk)}); + return 1 if ($tmp1 != $tmp2); + } + + return &Date_TimeCheck($h,$mn,$s,$ampm); +} + +# Takes a year in 2 digit form and returns it in 4 digit form +sub Date_FixYear { + print "DEBUG: Date_FixYear\n" if ($Curr{"Debug"} =~ /trace/); + my($y)=@_; + my($curr_y)=$Curr{"Y"}; + $y=$curr_y if (! defined $y or ! $y); + return $y if (length($y)==4); + confess "ERROR: Invalid year ($y)\n" if (length($y)!=2); + my($y1,$y2)=(); + + if (lc($Cnf{"YYtoYYYY"}) eq "c") { + $y1=substring($y,0,2); + $y="$y1$y"; + + } elsif ($Cnf{"YYtoYYYY"} =~ /^c(\d{2})$/i) { + $y1=$1; + $y="$y1$y"; + + } elsif ($Cnf{"YYtoYYYY"} =~ /^c(\d{2})(\d{2})$/i) { + $y1="$1$2"; + $y ="$1$y"; + $y += 100 if ($y<$y1); + + } else { + $y1=$curr_y-$Cnf{"YYtoYYYY"}; + $y2=$y1+99; + $y="19$y"; + while ($y<$y1) { + $y+=100; + } + while ($y>$y2) { + $y-=100; + } + } + $y; +} + +# &Date_NthWeekOfYear($y,$n); +# Returns a list of (YYYY,MM,DD) for the 1st day of the Nth week of the +# year. +# &Date_NthWeekOfYear($y,$n,$dow,$flag); +# Returns a list of (YYYY,MM,DD) for the Nth DoW of the year. If flag +# is nil, the first DoW of the year may actually be in the previous +# year (since the 1st week may include days from the previous year). +# If flag is non-nil, the 1st DoW of the year refers to the 1st one +# actually in the year +sub Date_NthWeekOfYear { + print "DEBUG: Date_NthWeekOfYear\n" if ($Curr{"Debug"} =~ /trace/); + my($y,$n,$dow,$flag)=@_; + my($m,$d,$err,$tmp,$date,%dow)=(); + $y=$Curr{"Y"} if (! defined $y or ! $y); + $n=1 if (! defined $n or $n eq ""); + return () if ($n<0 || $n>53); + if (defined $dow) { + $dow=lc($dow); + %dow=%{ $Lang{$Cnf{"Language"}}{"WeekH"} }; + $dow=$dow{$dow} if (exists $dow{$dow}); + return () if ($dow<1 || $dow>7); + $flag="" if (! defined $flag); + } else { + $dow=""; + $flag=""; + } + + $y=&Date_FixYear($y) if (length($y)<4); + if ($Cnf{"Jan1Week1"}) { + $date=&Date_Join($y,1,1,0,0,0); + } else { + $date=&Date_Join($y,1,4,0,0,0); + } + $date=&Date_GetPrev($date,$Cnf{"FirstDay"},1); + $date=&Date_GetNext($date,$dow,1) if ($dow ne ""); + + if ($flag) { + ($tmp)=&Date_Split($date, 1); + $n++ if ($tmp != $y); + } + + if ($n>1) { + $date=&DateCalc_DateDelta($date,"+0:0:". ($n-1) . ":0:0:0:0",\$err,0); + } elsif ($n==0) { + $date=&DateCalc_DateDelta($date,"-0:0:1:0:0:0:0",\$err,0); + } + ($y,$m,$d)=&Date_Split($date, 1); + ($y,$m,$d); +} + +######################################################################## +# LANGUAGE INITIALIZATION +######################################################################## + +# 8-bit international characters can be gotten by "\xXX". I don't know +# how to get 16-bit characters. I've got to read up on perllocale. +sub Char_8Bit { + my($hash)=@_; + + # grave ` + # A` 00c0 a` 00e0 + # E` 00c8 e` 00e8 + # I` 00cc i` 00ec + # O` 00d2 o` 00f2 + # U` 00d9 u` 00f9 + # W` 1e80 w` 1e81 + # Y` 1ef2 y` 1ef3 + + $$hash{"A`"} = "\xc0"; # LATIN CAPITAL LETTER A WITH GRAVE + $$hash{"E`"} = "\xc8"; # LATIN CAPITAL LETTER E WITH GRAVE + $$hash{"I`"} = "\xcc"; # LATIN CAPITAL LETTER I WITH GRAVE + $$hash{"O`"} = "\xd2"; # LATIN CAPITAL LETTER O WITH GRAVE + $$hash{"U`"} = "\xd9"; # LATIN CAPITAL LETTER U WITH GRAVE + $$hash{"a`"} = "\xe0"; # LATIN SMALL LETTER A WITH GRAVE + $$hash{"e`"} = "\xe8"; # LATIN SMALL LETTER E WITH GRAVE + $$hash{"i`"} = "\xec"; # LATIN SMALL LETTER I WITH GRAVE + $$hash{"o`"} = "\xf2"; # LATIN SMALL LETTER O WITH GRAVE + $$hash{"u`"} = "\xf9"; # LATIN SMALL LETTER U WITH GRAVE + + # acute ' + # A' 00c1 a' 00e1 + # C' 0106 c' 0107 + # E' 00c9 e' 00e9 + # I' 00cd i' 00ed + # L' 0139 l' 013a + # N' 0143 n' 0144 + # O' 00d3 o' 00f3 + # R' 0154 r' 0155 + # S' 015a s' 015b + # U' 00da u' 00fa + # W' 1e82 w' 1e83 + # Y' 00dd y' 00fd + # Z' 0179 z' 017a + + $$hash{"A'"} = "\xc1"; # LATIN CAPITAL LETTER A WITH ACUTE + $$hash{"E'"} = "\xc9"; # LATIN CAPITAL LETTER E WITH ACUTE + $$hash{"I'"} = "\xcd"; # LATIN CAPITAL LETTER I WITH ACUTE + $$hash{"O'"} = "\xd3"; # LATIN CAPITAL LETTER O WITH ACUTE + $$hash{"U'"} = "\xda"; # LATIN CAPITAL LETTER U WITH ACUTE + $$hash{"Y'"} = "\xdd"; # LATIN CAPITAL LETTER Y WITH ACUTE + $$hash{"a'"} = "\xe1"; # LATIN SMALL LETTER A WITH ACUTE + $$hash{"e'"} = "\xe9"; # LATIN SMALL LETTER E WITH ACUTE + $$hash{"i'"} = "\xed"; # LATIN SMALL LETTER I WITH ACUTE + $$hash{"o'"} = "\xf3"; # LATIN SMALL LETTER O WITH ACUTE + $$hash{"u'"} = "\xfa"; # LATIN SMALL LETTER U WITH ACUTE + $$hash{"y'"} = "\xfd"; # LATIN SMALL LETTER Y WITH ACUTE + + # double acute " " + # O" 0150 o" 0151 + # U" 0170 u" 0171 + + # circumflex ^ + # A^ 00c2 a^ 00e2 + # C^ 0108 c^ 0109 + # E^ 00ca e^ 00ea + # G^ 011c g^ 011d + # H^ 0124 h^ 0125 + # I^ 00ce i^ 00ee + # J^ 0134 j^ 0135 + # O^ 00d4 o^ 00f4 + # S^ 015c s^ 015d + # U^ 00db u^ 00fb + # W^ 0174 w^ 0175 + # Y^ 0176 y^ 0177 + + $$hash{"A^"} = "\xc2"; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX + $$hash{"E^"} = "\xca"; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX + $$hash{"I^"} = "\xce"; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX + $$hash{"O^"} = "\xd4"; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX + $$hash{"U^"} = "\xdb"; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX + $$hash{"a^"} = "\xe2"; # LATIN SMALL LETTER A WITH CIRCUMFLEX + $$hash{"e^"} = "\xea"; # LATIN SMALL LETTER E WITH CIRCUMFLEX + $$hash{"i^"} = "\xee"; # LATIN SMALL LETTER I WITH CIRCUMFLEX + $$hash{"o^"} = "\xf4"; # LATIN SMALL LETTER O WITH CIRCUMFLEX + $$hash{"u^"} = "\xfb"; # LATIN SMALL LETTER U WITH CIRCUMFLEX + + # tilde ~ + # A~ 00c3 a~ 00e3 + # I~ 0128 i~ 0129 + # N~ 00d1 n~ 00f1 + # O~ 00d5 o~ 00f5 + # U~ 0168 u~ 0169 + + $$hash{"A~"} = "\xc3"; # LATIN CAPITAL LETTER A WITH TILDE + $$hash{"N~"} = "\xd1"; # LATIN CAPITAL LETTER N WITH TILDE + $$hash{"O~"} = "\xd5"; # LATIN CAPITAL LETTER O WITH TILDE + $$hash{"a~"} = "\xe3"; # LATIN SMALL LETTER A WITH TILDE + $$hash{"n~"} = "\xf1"; # LATIN SMALL LETTER N WITH TILDE + $$hash{"o~"} = "\xf5"; # LATIN SMALL LETTER O WITH TILDE + + # macron - + # A- 0100 a- 0101 + # E- 0112 e- 0113 + # I- 012a i- 012b + # O- 014c o- 014d + # U- 016a u- 016b + + # breve ( [half circle up] + # A( 0102 a( 0103 + # G( 011e g( 011f + # U( 016c u( 016d + + # dot . + # C. 010a c. 010b + # E. 0116 e. 0117 + # G. 0120 g. 0121 + # I. 0130 + # Z. 017b z. 017c + + # diaeresis : [side by side dots] + # A: 00c4 a: 00e4 + # E: 00cb e: 00eb + # I: 00cf i: 00ef + # O: 00d6 o: 00f6 + # U: 00dc u: 00fc + # W: 1e84 w: 1e85 + # Y: 0178 y: 00ff + + $$hash{"A:"} = "\xc4"; # LATIN CAPITAL LETTER A WITH DIAERESIS + $$hash{"E:"} = "\xcb"; # LATIN CAPITAL LETTER E WITH DIAERESIS + $$hash{"I:"} = "\xcf"; # LATIN CAPITAL LETTER I WITH DIAERESIS + $$hash{"O:"} = "\xd6"; # LATIN CAPITAL LETTER O WITH DIAERESIS + $$hash{"U:"} = "\xdc"; # LATIN CAPITAL LETTER U WITH DIAERESIS + $$hash{"a:"} = "\xe4"; # LATIN SMALL LETTER A WITH DIAERESIS + $$hash{"e:"} = "\xeb"; # LATIN SMALL LETTER E WITH DIAERESIS + $$hash{"i:"} = "\xef"; # LATIN SMALL LETTER I WITH DIAERESIS + $$hash{"o:"} = "\xf6"; # LATIN SMALL LETTER O WITH DIAERESIS + $$hash{"u:"} = "\xfc"; # LATIN SMALL LETTER U WITH DIAERESIS + $$hash{"y:"} = "\xff"; # LATIN SMALL LETTER Y WITH DIAERESIS + + # ring o + # U0 016e u0 016f + + # cedilla , [squiggle down and left below the letter] + # ,C 00c7 ,c 00e7 + # ,G 0122 ,g 0123 + # ,K 0136 ,k 0137 + # ,L 013b ,l 013c + # ,N 0145 ,n 0146 + # ,R 0156 ,r 0157 + # ,S 015e ,s 015f + # ,T 0162 ,t 0163 + + $$hash{",C"} = "\xc7"; # LATIN CAPITAL LETTER C WITH CEDILLA + $$hash{",c"} = "\xe7"; # LATIN SMALL LETTER C WITH CEDILLA + + # ogonek ; [squiggle down and right below the letter] + # A; 0104 a; 0105 + # E; 0118 e; 0119 + # I; 012e i; 012f + # U; 0172 u; 0173 + + # caron < [little v on top] + # A< 01cd a< 01ce + # C< 010c c< 010d + # D< 010e d< 010f + # E< 011a e< 011b + # L< 013d l< 013e + # N< 0147 n< 0148 + # R< 0158 r< 0159 + # S< 0160 s< 0161 + # T< 0164 t< 0165 + # Z< 017d z< 017e + + + # Other characters + + # First character is below, 2nd character is above + $$hash{"||"} = "\xa6"; # BROKEN BAR + $$hash{" :"} = "\xa8"; # DIAERESIS + $$hash{"-a"} = "\xaa"; # FEMININE ORDINAL INDICATOR + #$$hash{" -"}= "\xaf"; # MACRON (narrow bar) + $$hash{" -"} = "\xad"; # HYPHEN (wide bar) + $$hash{" o"} = "\xb0"; # DEGREE SIGN + $$hash{"-+"} = "\xb1"; # PLUS\342\200\220MINUS SIGN + $$hash{" 1"} = "\xb9"; # SUPERSCRIPT ONE + $$hash{" 2"} = "\xb2"; # SUPERSCRIPT TWO + $$hash{" 3"} = "\xb3"; # SUPERSCRIPT THREE + $$hash{" '"} = "\xb4"; # ACUTE ACCENT + $$hash{"-o"} = "\xba"; # MASCULINE ORDINAL INDICATOR + $$hash{" ."} = "\xb7"; # MIDDLE DOT + $$hash{", "} = "\xb8"; # CEDILLA + $$hash{"Ao"} = "\xc5"; # LATIN CAPITAL LETTER A WITH RING ABOVE + $$hash{"ao"} = "\xe5"; # LATIN SMALL LETTER A WITH RING ABOVE + $$hash{"ox"} = "\xf0"; # LATIN SMALL LETTER ETH + + # upside down characters + + $$hash{"ud!"} = "\xa1"; # INVERTED EXCLAMATION MARK + $$hash{"ud?"} = "\xbf"; # INVERTED QUESTION MARK + + # overlay characters + + $$hash{"X o"} = "\xa4"; # CURRENCY SIGN + $$hash{"Y ="} = "\xa5"; # YEN SIGN + $$hash{"S o"} = "\xa7"; # SECTION SIGN + $$hash{"O c"} = "\xa9"; # COPYRIGHT SIGN Copyright + $$hash{"O R"} = "\xae"; # REGISTERED SIGN + $$hash{"D -"} = "\xd0"; # LATIN CAPITAL LETTER ETH + $$hash{"O /"} = "\xd8"; # LATIN CAPITAL LETTER O WITH STROKE + $$hash{"o /"} = "\xf8"; # LATIN SMALL LETTER O WITH STROKE + + # special names + + $$hash{"1/4"} = "\xbc"; # VULGAR FRACTION ONE QUARTER + $$hash{"1/2"} = "\xbd"; # VULGAR FRACTION ONE HALF + $$hash{"3/4"} = "\xbe"; # VULGAR FRACTION THREE QUARTERS + $$hash{"<<"} = "\xab"; # LEFT POINTING DOUBLE ANGLE QUOTATION MARK + $$hash{">>"} = "\xbb"; # RIGHT POINTING DOUBLE ANGLE QUOTATION MARK + $$hash{"cent"}= "\xa2"; # CENT SIGN + $$hash{"lb"} = "\xa3"; # POUND SIGN + $$hash{"mu"} = "\xb5"; # MICRO SIGN + $$hash{"beta"}= "\xdf"; # LATIN SMALL LETTER SHARP S + $$hash{"para"}= "\xb6"; # PILCROW SIGN + $$hash{"-|"} = "\xac"; # NOT SIGN + $$hash{"AE"} = "\xc6"; # LATIN CAPITAL LETTER AE + $$hash{"ae"} = "\xe6"; # LATIN SMALL LETTER AE + $$hash{"x"} = "\xd7"; # MULTIPLICATION SIGN + $$hash{"P"} = "\xde"; # LATIN CAPITAL LETTER THORN + $$hash{"/"} = "\xf7"; # DIVISION SIGN + $$hash{"p"} = "\xfe"; # LATIN SMALL LETTER THORN +} + +# $hashref = &Date_Init_LANGUAGE; +# This returns a hash containing all of the initialization for a +# specific language. The hash elements are: +# +# @ month_name full month names January February ... +# @ month_abb month abbreviations Jan Feb ... +# @ day_name day names Monday Tuesday ... +# @ day_abb day abbreviations Mon Tue ... +# @ day_char day character abbrevs M T ... +# @ am AM notations +# @ pm PM notations +# +# @ num_suff number with suffix 1st 2nd ... +# @ num_word numbers spelled out first second ... +# +# $ now words which mean now now today ... +# $ last words which mean last last final ... +# $ each words which mean each each every ... +# $ of of (as in a member of) in of ... +# ex. 4th day OF June +# $ at at 4:00 at +# $ on on Sunday on +# $ future in the future in +# $ past in the past ago +# $ next next item next +# $ prev previous item last previous +# $ later 2 hours later +# +# % offset a hash of special dates { tomorrow->0:0:0:1:0:0:0 } +# % times a hash of times { noon->12:00:00 ... } +# +# $ years words for year y yr year ... +# $ months words for month +# $ weeks words for week +# $ days words for day +# $ hours words for hour +# $ minutes words for minute +# $ seconds words for second +# % replace +# The replace element is quite important, but a bit tricky. In +# English (and probably other languages), one of the abbreviations +# for the word month that would be nice is "m". The problem is that +# "m" matches the "m" in "minute" which causes the string to be +# improperly matched in some cases. Hence, the list of abbreviations +# for month is given as: +# "mon month months" +# In order to allow you to enter "m", replacements can be done. +# $replace is a list of pairs of words which are matched and replaced +# AS ENTIRE WORDS. Having $replace equal to "m"->"month" means that +# the entire word "m" will be replaced with "month". This allows the +# desired abbreviation to be used. Make sure that replace contains +# an even number of words (i.e. all must be pairs). Any time a +# desired abbreviation matches the start of any other, it has to go +# here. +# +# $ exact exact mode exactly +# $ approx approximate mode approximately +# $ business business mode business +# +# r sephm hour/minute separator (?::) +# r sepms minute/second separator (?::) +# r sepss second/fraction separator (?:[.:]) +# +# Elements marked with an asterix (@) are returned as a set of lists. +# Each list contains the strings for each element. The first set is used +# when the 7-bit ASCII (US) character set is wanted. The 2nd set is used +# when an international character set is available. Both of the 1st two +# sets should be complete (but the 2nd list can be left empty to force the +# first set to be used always). The 3rd set and later can be partial sets +# if desired. +# +# Elements marked with a dollar ($) are returned as a simple list of words. +# +# Elements marked with a percent (%) are returned as a hash list. +# +# Elements marked with (r) are regular expression elements which must not +# create a back reference. +# +# ***NOTE*** Every hash element (unless otherwise noted) MUST be defined in +# every language. + +sub Date_Init_English { + print "DEBUG: Date_Init_English\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + + $$d{"month_name"}= + [["January","February","March","April","May","June", + "July","August","September","October","November","December"]]; + + $$d{"month_abb"}= + [["Jan","Feb","Mar","Apr","May","Jun", + "Jul","Aug","Sep","Oct","Nov","Dec"], + [], + ["","","","","","","","","Sept"]]; + + $$d{"day_name"}= + [["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]]; + $$d{"day_abb"}= + [["Mon","Tue","Wed","Thu","Fri","Sat","Sun"], + ["", "Tues","", "Thur","", "", ""]]; + $$d{"day_char"}= + [["M","T","W","Th","F","Sa","S"]]; + + $$d{"num_suff"}= + [["1st","2nd","3rd","4th","5th","6th","7th","8th","9th","10th", + "11th","12th","13th","14th","15th","16th","17th","18th","19th","20th", + "21st","22nd","23rd","24th","25th","26th","27th","28th","29th","30th", + "31st"]]; + $$d{"num_word"}= + [["first","second","third","fourth","fifth","sixth","seventh","eighth", + "ninth","tenth","eleventh","twelfth","thirteenth","fourteenth", + "fifteenth","sixteenth","seventeenth","eighteenth","nineteenth", + "twentieth","twenty-first","twenty-second","twenty-third", + "twenty-fourth","twenty-fifth","twenty-sixth","twenty-seventh", + "twenty-eighth","twenty-ninth","thirtieth","thirty-first"]]; + + $$d{"now"} =["today","now"]; + $$d{"last"} =["last","final"]; + $$d{"each"} =["each","every"]; + $$d{"of"} =["in","of"]; + $$d{"at"} =["at"]; + $$d{"on"} =["on"]; + $$d{"future"} =["in"]; + $$d{"past"} =["ago"]; + $$d{"next"} =["next"]; + $$d{"prev"} =["previous","last"]; + $$d{"later"} =["later"]; + + $$d{"exact"} =["exactly"]; + $$d{"approx"} =["approximately"]; + $$d{"business"}=["business"]; + + $$d{"offset"} =["yesterday","-0:0:0:1:0:0:0","tomorrow","+0:0:0:1:0:0:0"]; + $$d{"times"} =["noon","12:00:00","midnight","00:00:00"]; + + $$d{"years"} =["y","yr","year","yrs","years"]; + $$d{"months"} =["mon","month","months"]; + $$d{"weeks"} =["w","wk","wks","week","weeks"]; + $$d{"days"} =["d","day","days"]; + $$d{"hours"} =["h","hr","hrs","hour","hours"]; + $$d{"minutes"} =["mn","min","minute","minutes"]; + $$d{"seconds"} =["s","sec","second","seconds"]; + $$d{"replace"} =["m","month"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["AM","A.M."]; + $$d{"pm"} = ["PM","P.M."]; +} + +sub Date_Init_Italian { + print "DEBUG: Date_Init_Italian\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($i)=$h{"i'"}; + + $$d{"month_name"}= + [[qw(Gennaio Febbraio Marzo Aprile Maggio Giugno + Luglio Agosto Settembre Ottobre Novembre Dicembre)]]; + + $$d{"month_abb"}= + [[qw(Gen Feb Mar Apr Mag Giu Lug Ago Set Ott Nov Dic)]]; + + $$d{"day_name"}= + [[qw(Lunedi Martedi Mercoledi Giovedi Venerdi Sabato Domenica)], + [qw(Luned${i} Marted${i} Mercoled${i} Gioved${i} Venerd${i})]]; + $$d{"day_abb"}= + [[qw(Lun Mar Mer Gio Ven Sab Dom)]]; + $$d{"day_char"}= + [[qw(L Ma Me G V S D)]]; + + $$d{"num_suff"}= + [[qw(1mo 2do 3zo 4to 5to 6to 7mo 8vo 9no 10mo 11mo 12mo 13mo 14mo 15mo + 16mo 17mo 18mo 19mo 20mo 21mo 22mo 23mo 24mo 25mo 26mo 27mo 28mo + 29mo 3mo 31mo)]]; + $$d{"num_word"}= + [[qw(primo secondo terzo quarto quinto sesto settimo ottavo nono decimo + undicesimo dodicesimo tredicesimo quattordicesimo quindicesimo + sedicesimo diciassettesimo diciottesimo diciannovesimo ventesimo + ventunesimo ventiduesimo ventitreesimo ventiquattresimo + venticinquesimo ventiseiesimo ventisettesimo ventottesimo + ventinovesimo trentesimo trentunesimo)]]; + + $$d{"now"} =[qw(adesso oggi)]; + $$d{"last"} =[qw(ultimo)]; + $$d{"each"} =[qw(ogni)]; + $$d{"of"} =[qw(della del)]; + $$d{"at"} =[qw(alle)]; + $$d{"on"} =[qw(di)]; + $$d{"future"} =[qw(fra)]; + $$d{"past"} =[qw(fa)]; + $$d{"next"} =[qw(prossimo)]; + $$d{"prev"} =[qw(ultimo)]; + $$d{"later"} =[qw(dopo)]; + + $$d{"exact"} =[qw(esattamente)]; + $$d{"approx"} =[qw(circa)]; + $$d{"business"}=[qw(lavorativi lavorativo)]; + + $$d{"offset"} =[qw(ieri -0:0:0:1:0:0:0 domani +0:0:0:1:0:0:0)]; + $$d{"times"} =[qw(mezzogiorno 12:00:00 mezzanotte 00:00:00)]; + + $$d{"years"} =[qw(anni anno a)]; + $$d{"months"} =[qw(mesi mese mes)]; + $$d{"weeks"} =[qw(settimane settimana sett)]; + $$d{"days"} =[qw(giorni giorno g)]; + $$d{"hours"} =[qw(ore ora h)]; + $$d{"minutes"} =[qw(minuti minuto min)]; + $$d{"seconds"} =[qw(secondi secondo sec)]; + $$d{"replace"} =[qw(s sec m mes)]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = [qw(AM)]; + $$d{"pm"} = [qw(PM)]; +} + +sub Date_Init_French { + print "DEBUG: Date_Init_French\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($e)=$h{"e'"}; + my($u)=$h{"u^"}; + my($a)=$h{"a'"}; + + $$d{"month_name"}= + [["janvier","fevrier","mars","avril","mai","juin", + "juillet","aout","septembre","octobre","novembre","decembre"], + ["janvier","f${e}vrier","mars","avril","mai","juin", + "juillet","ao${u}t","septembre","octobre","novembre","d${e}cembre"]]; + $$d{"month_abb"}= + [["jan","fev","mar","avr","mai","juin", + "juil","aout","sept","oct","nov","dec"], + ["jan","f${e}v","mar","avr","mai","juin", + "juil","ao${u}t","sept","oct","nov","d${e}c"]]; + + $$d{"day_name"}= + [["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"]]; + $$d{"day_abb"}= + [["lun","mar","mer","jeu","ven","sam","dim"]]; + $$d{"day_char"}= + [["l","ma","me","j","v","s","d"]]; + + $$d{"num_suff"}= + [["1er","2e","3e","4e","5e","6e","7e","8e","9e","10e", + "11e","12e","13e","14e","15e","16e","17e","18e","19e","20e", + "21e","22e","23e","24e","25e","26e","27e","28e","29e","30e", + "31e"]]; + $$d{"num_word"}= + [["premier","deux","trois","quatre","cinq","six","sept","huit","neuf", + "dix","onze","douze","treize","quatorze","quinze","seize","dix-sept", + "dix-huit","dix-neuf","vingt","vingt et un","vingt-deux","vingt-trois", + "vingt-quatre","vingt-cinq","vingt-six","vingt-sept","vingt-huit", + "vingt-neuf","trente","trente et un"], + ["1re"]]; + + $$d{"now"} =["aujourd'hui","maintenant"]; + $$d{"last"} =["dernier"]; + $$d{"each"} =["chaque","tous les","toutes les"]; + $$d{"of"} =["en","de"]; + $$d{"at"} =["a","${a}0"]; + $$d{"on"} =["sur"]; + $$d{"future"} =["en"]; + $$d{"past"} =["il y a"]; + $$d{"next"} =["suivant"]; + $$d{"prev"} =["precedent","pr${e}c${e}dent"]; + $$d{"later"} =["plus tard"]; + + $$d{"exact"} =["exactement"]; + $$d{"approx"} =["approximativement"]; + $$d{"business"}=["professionel"]; + + $$d{"offset"} =["hier","-0:0:0:1:0:0:0","demain","+0:0:0:1:0:0:0"]; + $$d{"times"} =["midi","12:00:00","minuit","00:00:00"]; + + $$d{"years"} =["an","annee","ans","annees","ann${e}e","ann${e}es"]; + $$d{"months"} =["mois"]; + $$d{"weeks"} =["sem","semaine"]; + $$d{"days"} =["j","jour","jours"]; + $$d{"hours"} =["h","heure","heures"]; + $$d{"minutes"} =["mn","min","minute","minutes"]; + $$d{"seconds"} =["s","sec","seconde","secondes"]; + $$d{"replace"} =["m","mois"]; + + $$d{"sephm"} ='[h:]'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:,]'; + + $$d{"am"} = ["du matin"]; + $$d{"pm"} = ["du soir"]; +} + +sub Date_Init_Romanian { + print "DEBUG: Date_Init_Romanian\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($p)=$h{"p"}; + my($i)=$h{"i^"}; + my($a)=$h{"a~"}; + my($o)=$h{"-o"}; + + $$d{"month_name"}= + [["ianuarie","februarie","martie","aprilie","mai","iunie", + "iulie","august","septembrie","octombrie","noiembrie","decembrie"]]; + $$d{"month_abb"}= + [["ian","febr","mart","apr","mai","iun", + "iul","aug","sept","oct","nov","dec"], + ["","feb"]]; + + $$d{"day_name"}= + [["luni","marti","miercuri","joi","vineri","simbata","duminica"], + ["luni","mar${p}i","miercuri","joi","vineri","s${i}mb${a}t${a}", + "duminic${a}"]]; + $$d{"day_abb"}= + [["lun","mar","mie","joi","vin","sim","dum"], + ["lun","mar","mie","joi","vin","s${i}m","dum"]]; + $$d{"day_char"}= + [["L","Ma","Mi","J","V","S","D"]]; + + $$d{"num_suff"}= + [["prima","a doua","a 3-a","a 4-a","a 5-a","a 6-a","a 7-a","a 8-a", + "a 9-a","a 10-a","a 11-a","a 12-a","a 13-a","a 14-a","a 15-a", + "a 16-a","a 17-a","a 18-a","a 19-a","a 20-a","a 21-a","a 22-a", + "a 23-a","a 24-a","a 25-a","a 26-a","a 27-a","a 28-a","a 29-a", + "a 30-a","a 31-a"]]; + + $$d{"num_word"}= + [["prima","a doua","a treia","a patra","a cincea","a sasea","a saptea", + "a opta","a noua","a zecea","a unsprezecea","a doisprezecea", + "a treisprezecea","a patrusprezecea","a cincisprezecea","a saiprezecea", + "a saptesprezecea","a optsprezecea","a nouasprezecea","a douazecea", + "a douazecisiuna","a douazecisidoua","a douazecisitreia", + "a douazecisipatra","a douazecisicincea","a douazecisisasea", + "a douazecisisaptea","a douazecisiopta","a douazecisinoua","a treizecea", + "a treizecisiuna"], + ["prima","a doua","a treia","a patra","a cincea","a ${o}asea", + "a ${o}aptea","a opta","a noua","a zecea","a unsprezecea", + "a doisprezecea","a treisprezecea","a patrusprezecea","a cincisprezecea", + "a ${o}aiprezecea","a ${o}aptesprezecea","a optsprezecea", + "a nou${a}sprezecea","a dou${a}zecea","a dou${a}zeci${o}iuna", + "a dou${a}zeci${o}idoua","a dou${a}zeci${o}itreia", + "a dou${a}zeci${o}ipatra","a dou${a}zeci${o}icincea", + "a dou${a}zeci${o}i${o}asea","a dou${a}zeci${o}i${o}aptea", + "a dou${a}zeci${o}iopta","a dou${a}zeci${o}inoua","a treizecea", + "a treizeci${o}iuna"], + ["intii", "doi", "trei", "patru", "cinci", "sase", "sapte", + "opt","noua","zece","unsprezece","doisprezece", + "treisprezece","patrusprezece","cincisprezece","saiprezece", + "saptesprezece","optsprezece","nouasprezece","douazeci", + "douazecisiunu","douazecisidoi","douazecisitrei", + "douazecisipatru","douazecisicinci","douazecisisase","douazecisisapte", + "douazecisiopt","douazecisinoua","treizeci","treizecisiunu"], + ["${i}nt${i}i", "doi", "trei", "patru", "cinci", "${o}ase", "${o}apte", + "opt","nou${a}","zece","unsprezece","doisprezece", + "treisprezece","patrusprezece","cincisprezece","${o}aiprezece", + "${o}aptesprezece","optsprezece","nou${a}sprezece","dou${a}zeci", + "dou${a}zeci${o}iunu","dou${a}zeci${o}idoi","dou${a}zeci${o}itrei", + "dou${a}zecisipatru","dou${a}zeci${o}icinci","dou${a}zeci${o}i${o}ase", + "dou${a}zeci${o}i${o}apte","dou${a}zeci${o}iopt", + "dou${a}zeci${o}inou${a}","treizeci","treizeci${o}iunu"]]; + + $$d{"now"} =["acum","azi","astazi","ast${a}zi"]; + $$d{"last"} =["ultima"]; + $$d{"each"} =["fiecare"]; + $$d{"of"} =["din","in","n"]; + $$d{"at"} =["la"]; + $$d{"on"} =["on"]; + $$d{"future"} =["in","${i}n"]; + $$d{"past"} =["in urma", "${i}n urm${a}"]; + $$d{"next"} =["urmatoarea","urm${a}toarea"]; + $$d{"prev"} =["precedenta","ultima"]; + $$d{"later"} =["mai tirziu", "mai t${i}rziu"]; + + $$d{"exact"} =["exact"]; + $$d{"approx"} =["aproximativ"]; + $$d{"business"}=["de lucru","lucratoare","lucr${a}toare"]; + + $$d{"offset"} =["ieri","-0:0:0:1:0:0:0", + "alaltaieri", "-0:0:0:2:0:0:0", + "alalt${a}ieri","-0:0:0:2:0:0:0", + "miine","+0:0:0:1:0:0:0", + "m${i}ine","+0:0:0:1:0:0:0", + "poimiine","+0:0:0:2:0:0:0", + "poim${i}ine","+0:0:0:2:0:0:0"]; + $$d{"times"} =["amiaza","12:00:00", + "amiaz${a}","12:00:00", + "miezul noptii","00:00:00", + "miezul nop${p}ii","00:00:00"]; + + $$d{"years"} =["ani","an","a"]; + $$d{"months"} =["luni","luna","lun${a}","l"]; + $$d{"weeks"} =["saptamini","s${a}pt${a}m${i}ni","saptamina", + "s${a}pt${a}m${i}na","sapt","s${a}pt"]; + $$d{"days"} =["zile","zi","z"]; + $$d{"hours"} =["ore", "ora", "or${a}", "h"]; + $$d{"minutes"} =["minute","min","m"]; + $$d{"seconds"} =["secunde","sec",]; + $$d{"replace"} =["s","secunde"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:,]'; + + $$d{"am"} = ["AM","A.M."]; + $$d{"pm"} = ["PM","P.M."]; +} + +sub Date_Init_Swedish { + print "DEBUG: Date_Init_Swedish\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($ao)=$h{"ao"}; + my($o) =$h{"o:"}; + my($a) =$h{"a:"}; + + $$d{"month_name"}= + [["Januari","Februari","Mars","April","Maj","Juni", + "Juli","Augusti","September","Oktober","November","December"]]; + $$d{"month_abb"}= + [["Jan","Feb","Mar","Apr","Maj","Jun", + "Jul","Aug","Sep","Okt","Nov","Dec"]]; + + $$d{"day_name"}= + [["Mandag","Tisdag","Onsdag","Torsdag","Fredag","Lordag","Sondag"], + ["M${ao}ndag","Tisdag","Onsdag","Torsdag","Fredag","L${o}rdag", + "S${o}ndag"]]; + $$d{"day_abb"}= + [["Man","Tis","Ons","Tor","Fre","Lor","Son"], + ["M${ao}n","Tis","Ons","Tor","Fre","L${o}r","S${o}n"]]; + $$d{"day_char"}= + [["M","Ti","O","To","F","L","S"]]; + + $$d{"num_suff"}= + [["1:a","2:a","3:e","4:e","5:e","6:e","7:e","8:e","9:e","10:e", + "11:e","12:e","13:e","14:e","15:e","16:e","17:e","18:e","19:e","20:e", + "21:a","22:a","23:e","24:e","25:e","26:e","27:e","28:e","29:e","30:e", + "31:a"]]; + $$d{"num_word"}= + [["forsta","andra","tredje","fjarde","femte","sjatte","sjunde", + "attonde","nionde","tionde","elfte","tolfte","trettonde","fjortonde", + "femtonde","sextonde","sjuttonde","artonde","nittonde","tjugonde", + "tjugoforsta","tjugoandra","tjugotredje","tjugofjarde","tjugofemte", + "tjugosjatte","tjugosjunde","tjugoattonde","tjugonionde", + "trettionde","trettioforsta"], + ["f${o}rsta","andra","tredje","fj${a}rde","femte","sj${a}tte","sjunde", + "${ao}ttonde","nionde","tionde","elfte","tolfte","trettonde","fjortonde", + "femtonde","sextonde","sjuttonde","artonde","nittonde","tjugonde", + "tjugof${o}rsta","tjugoandra","tjugotredje","tjugofj${a}rde","tjugofemte", + "tjugosj${a}tte","tjugosjunde","tjugo${ao}ttonde","tjugonionde", + "trettionde","trettiof${o}rsta"]]; + + $$d{"now"} =["idag","nu"]; + $$d{"last"} =["forra","f${o}rra","senaste"]; + $$d{"each"} =["varje"]; + $$d{"of"} =["om"]; + $$d{"at"} =["kl","kl.","klockan"]; + $$d{"on"} =["pa","p${ao}"]; + $$d{"future"} =["om"]; + $$d{"past"} =["sedan"]; + $$d{"next"} =["nasta","n${a}sta"]; + $$d{"prev"} =["forra","f${o}rra"]; + $$d{"later"} =["senare"]; + + $$d{"exact"} =["exakt"]; + $$d{"approx"} =["ungefar","ungef${a}r"]; + $$d{"business"}=["arbetsdag","arbetsdagar"]; + + $$d{"offset"} =["ig${ao}r","-0:0:0:1:0:0:0","igar","-0:0:0:1:0:0:0", + "imorgon","+0:0:0:1:0:0:0"]; + $$d{"times"} =["mitt pa dagen","12:00:00","mitt p${ao} dagen","12:00:00", + "midnatt","00:00:00"]; + + $$d{"years"} =["ar","${ao}r"]; + $$d{"months"} =["man","manad","manader","m${ao}n","m${ao}nad","m${ao}nader"]; + $$d{"weeks"} =["v","vecka","veckor"]; + $$d{"days"} =["d","dag","dagar"]; + $$d{"hours"} =["t","tim","timme","timmar"]; + $$d{"minutes"} =["min","minut","minuter"]; + $$d{"seconds"} =["s","sek","sekund","sekunder"]; + $$d{"replace"} =["m","minut"]; + + $$d{"sephm"} ='[.:]'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["FM"]; + $$d{"pm"} = ["EM"]; +} + +sub Date_Init_German { + print "DEBUG: Date_Init_German\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($a)=$h{"a:"}; + my($u)=$h{"u:"}; + my($o)=$h{"o:"}; + my($b)=$h{"beta"}; + + $$d{"month_name"}= + [["Januar","Februar","Maerz","April","Mai","Juni", + "Juli","August","September","Oktober","November","Dezember"], + ["J${a}nner","Februar","M${a}rz","April","Mai","Juni", + "Juli","August","September","Oktober","November","Dezember"]]; + $$d{"month_abb"}= + [["Jan","Feb","Mar","Apr","Mai","Jun", + "Jul","Aug","Sep","Okt","Nov","Dez"], + ["J${a}n","Feb","M${a}r","Apr","Mai","Jun", + "Jul","Aug","Sep","Okt","Nov","Dez"]]; + + $$d{"day_name"}= + [["Montag","Dienstag","Mittwoch","Donnerstag","Freitag","Samstag", + "Sonntag"]]; + $$d{"day_abb"}= + [["Mon","Die","Mit","Don","Fre","Sam","Son"]]; + $$d{"day_char"}= + [["M","Di","Mi","Do","F","Sa","So"]]; + + $$d{"num_suff"}= + [["1.","2.","3.","4.","5.","6.","7.","8.","9.","10.", + "11.","12.","13.","14.","15.","16.","17.","18.","19.","20.", + "21.","22.","23.","24.","25.","26.","27.","28.","29.","30.", + "31."]]; + $$d{"num_word"}= + [ + ["erste","zweite","dritte","vierte","funfte","sechste","siebente", + "achte","neunte","zehnte","elfte","zwolfte","dreizehnte","vierzehnte", + "funfzehnte","sechzehnte","siebzehnte","achtzehnte","neunzehnte", + "zwanzigste","einundzwanzigste","zweiundzwanzigste","dreiundzwanzigste", + "vierundzwanzigste","funfundzwanzigste","sechundzwanzigste", + "siebundzwanzigste","achtundzwanzigste","neunundzwanzigste", + "dreibigste","einunddreibigste"], + ["erste","zweite","dritte","vierte","f${u}nfte","sechste","siebente", + "achte","neunte","zehnte","elfte","zw${o}lfte","dreizehnte", + "vierzehnte","f${u}nfzehnte","sechzehnte","siebzehnte","achtzehnte", + "neunzehnte","zwanzigste","einundzwanzigste","zweiundzwanzigste", + "dreiundzwanzigste","vierundzwanzigste","f${u}nfundzwanzigste", + "sechundzwanzigste","siebundzwanzigste","achtundzwanzigste", + "neunundzwanzigste","drei${b}igste","einunddrei${b}igste"], + ["erster"]]; + + $$d{"now"} =["heute","jetzt"]; + $$d{"last"} =["letzte","letzten"]; + $$d{"each"} =["jeden"]; + $$d{"of"} =["der","im","des"]; + $$d{"at"} =["um"]; + $$d{"on"} =["am"]; + $$d{"future"} =["in"]; + $$d{"past"} =["vor"]; + $$d{"next"} =["nachste","n${a}chste","nachsten","n${a}chsten"]; + $$d{"prev"} =["vorherigen","vorherige","letzte","letzten"]; + $$d{"later"} =["spater","sp${a}ter"]; + + $$d{"exact"} =["genau"]; + $$d{"approx"} =["ungefahr","ungef${a}hr"]; + $$d{"business"}=["Arbeitstag"]; + + $$d{"offset"} =["gestern","-0:0:0:1:0:0:0","morgen","+0:0:0:1:0:0:0"]; + $$d{"times"} =["mittag","12:00:00","mitternacht","00:00:00"]; + + $$d{"years"} =["j","Jahr","Jahre"]; + $$d{"months"} =["Monat","Monate"]; + $$d{"weeks"} =["w","Woche","Wochen"]; + $$d{"days"} =["t","Tag","Tage"]; + $$d{"hours"} =["h","std","Stunde","Stunden"]; + $$d{"minutes"} =["min","Minute","Minuten"]; + $$d{"seconds"} =["s","sek","Sekunde","Sekunden"]; + $$d{"replace"} =["m","Monat"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} ='[: ]'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["FM"]; + $$d{"pm"} = ["EM"]; +} + +sub Date_Init_Dutch { + print "DEBUG: Date_Init_Dutch\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + + $$d{"month_name"}= + [["januari","februari","maart","april","mei","juni","juli","augustus", + "september","october","november","december"], + ["","","","","","","","","","oktober"]]; + + $$d{"month_abb"}= + [["jan","feb","maa","apr","mei","jun","jul", + "aug","sep","oct","nov","dec"], + ["","","mrt","","","","","","","okt"]]; + $$d{"day_name"}= + [["maandag","dinsdag","woensdag","donderdag","vrijdag","zaterdag", + "zondag"]]; + $$d{"day_abb"}= + [["ma","di","wo","do","vr","zat","zon"], + ["","","","","","za","zo"]]; + $$d{"day_char"}= + [["M","D","W","D","V","Za","Zo"]]; + + $$d{"num_suff"}= + [["1ste","2de","3de","4de","5de","6de","7de","8ste","9de","10de", + "11de","12de","13de","14de","15de","16de","17de","18de","19de","20ste", + "21ste","22ste","23ste","24ste","25ste","26ste","27ste","28ste","29ste", + "30ste","31ste"]]; + $$d{"num_word"}= + [["eerste","tweede","derde","vierde","vijfde","zesde","zevende","achtste", + "negende","tiende","elfde","twaalfde", + map {"${_}tiende";} qw (der veer vijf zes zeven acht negen), + "twintigste", + map {"${_}entwintigste";} qw (een twee drie vier vijf zes zeven acht + negen), + "dertigste","eenendertigste"], + ["","","","","","","","","","","","","","","","","","","","", + map {"${_}-en-twintigste";} qw (een twee drie vier vijf zes zeven acht + negen), + "dertigste","een-en-dertigste"], + ["een","twee","drie","vier","vijf","zes","zeven","acht","negen","tien", + "elf","twaalf", + map {"${_}tien"} qw (der veer vijf zes zeven acht negen), + "twintig", + map {"${_}entwintig"} qw (een twee drie vier vijf zes zeven acht negen), + "dertig","eenendertig"], + ["","","","","","","","","","","","","","","","","","","","", + map {"${_}-en-twintig"} qw (een twee drie vier vijf zes zeven acht + negen), + "dertig","een-en-dertig"]]; + + $$d{"now"} =["nu","nou","vandaag"]; + $$d{"last"} =["laatste"]; + $$d{"each"} =["elke","elk"]; + $$d{"of"} =["in","van"]; + $$d{"at"} =["om"]; + $$d{"on"} =["op"]; + $$d{"future"} =["over"]; + $$d{"past"} =["geleden","vroeger","eerder"]; + $$d{"next"} =["volgende","volgend"]; + $$d{"prev"} =["voorgaande","voorgaand"]; + $$d{"later"} =["later"]; + + $$d{"exact"} =["exact","precies","nauwkeurig"]; + $$d{"approx"} =["ongeveer","ong",'ong\.',"circa","ca",'ca\.']; + $$d{"business"}=["werk","zakelijke","zakelijk"]; + + $$d{"offset"} =["morgen","+0:0:0:1:0:0:0","overmorgen","+0:0:0:2:0:0:0", + "gisteren","-0:0:0:1:0:0:0","eergisteren","-0::00:2:0:0:0"]; + $$d{"times"} =["noen","12:00:00","middernacht","00:00:00"]; + + $$d{"years"} =["jaar","jaren","ja","j"]; + $$d{"months"} =["maand","maanden","mnd"]; + $$d{"weeks"} =["week","weken","w"]; + $$d{"days"} =["dag","dagen","d"]; + $$d{"hours"} =["uur","uren","u","h"]; + $$d{"minutes"} =["minuut","minuten","min"]; + $$d{"seconds"} =["seconde","seconden","sec","s"]; + $$d{"replace"} =["m","minuten"]; + + $$d{"sephm"} ='[:.uh]'; + $$d{"sepms"} ='[:.m]'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["am","a.m.","vm","v.m.","voormiddag","'s_ochtends", + "ochtend","'s_nachts","nacht"]; + $$d{"pm"} = ["pm","p.m.","nm","n.m.","namiddag","'s_middags","middag", + "'s_avonds","avond"]; +} + +sub Date_Init_Polish { + print "DEBUG: Date_Init_Polish\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + + $$d{"month_name"}= + [["stycznia","luty","marca","kwietnia","maja","czerwca", + "lipca","sierpnia","wrzesnia","pazdziernika","listopada","grudnia"], + ["stycznia","luty","marca","kwietnia","maja","czerwca","lipca", + "sierpnia","wrze\x9cnia","pa\x9fdziernika","listopada","grudnia"]]; + $$d{"month_abb"}= + [["sty.","lut.","mar.","kwi.","maj","cze.", + "lip.","sie.","wrz.","paz.","lis.","gru."], + ["sty.","lut.","mar.","kwi.","maj","cze.", + "lip.","sie.","wrz.","pa\x9f.","lis.","gru."]]; + + $$d{"day_name"}= + [["poniedzialek","wtorek","sroda","czwartek","piatek","sobota", + "niedziela"], + ["poniedzia\x81\xb3ek","wtorek","\x9croda","czwartek","pi\x81\xb9tek", + "sobota","niedziela"]]; + $$d{"day_abb"}= + [["po.","wt.","sr.","cz.","pi.","so.","ni."], + ["po.","wt.","\x9cr.","cz.","pi.","so.","ni."]]; + $$d{"day_char"}= + [["p","w","e","c","p","s","n"], + ["p","w","\x9c.","c","p","s","n"]]; + + $$d{"num_suff"}= + [["1.","2.","3.","4.","5.","6.","7.","8.","9.","10.", + "11.","12.","13.","14.","15.","16.","17.","18.","19.","20.", + "21.","22.","23.","24.","25.","26.","27.","28.","29.","30.", + "31."]]; + $$d{"num_word"}= + [["pierwszego","drugiego","trzeczego","czwartego","piatego","szostego", + "siodmego","osmego","dziewiatego","dziesiatego", + "jedenastego","dwunastego","trzynastego","czternastego","pietnastego", + "szestnastego","siedemnastego","osiemnastego","dziewietnastego", + "dwudziestego", + "dwudziestego pierwszego","dwudziestego drugiego", + "dwudziestego trzeczego","dwudziestego czwartego", + "dwudziestego piatego","dwudziestego szostego", + "dwudziestego siodmego","dwudziestego osmego", + "dwudziestego dziewiatego","trzydziestego","trzydziestego pierwszego"], + ["pierwszego","drugiego","trzeczego","czwartego","pi\x81\xb9tego", + "sz\x81\xf3stego","si\x81\xf3dmego","\x81\xf3smego","dziewi\x81\xb9tego", + "dziesi\x81\xb9tego","jedenastego","dwunastego","trzynastego", + "czternastego","pi\x81\xeatnastego","szestnastego","siedemnastego", + "osiemnastego","dziewietnastego","dwudziestego", + "dwudziestego pierwszego","dwudziestego drugiego", + "dwudziestego trzeczego","dwudziestego czwartego", + "dwudziestego pi\x81\xb9tego","dwudziestego sz\x81\xf3stego", + "dwudziestego si\x81\xf3dmego","dwudziestego \x81\xf3smego", + "dwudziestego dziewi\x81\xb9tego","trzydziestego", + "trzydziestego pierwszego"]]; + + $$d{"now"} =["dzisaj","teraz"]; + $$d{"last"} =["ostatni","ostatna"]; + $$d{"each"} =["kazdy","ka\x81\xbfdy", "kazdym","ka\x81\xbfdym"]; + $$d{"of"} =["w","z"]; + $$d{"at"} =["o","u"]; + $$d{"on"} =["na"]; + $$d{"future"} =["za"]; + $$d{"past"} =["temu"]; + $$d{"next"} =["nastepny","nast\x81\xeapny","nastepnym","nast\x81\xeapnym", + "przyszly","przysz\x81\xb3y","przyszlym", + "przysz\x81\xb3ym"]; + $$d{"prev"} =["zeszly","zesz\x81\xb3y","zeszlym","zesz\x81\xb3ym"]; + $$d{"later"} =["later"]; + + $$d{"exact"} =["doklandnie","dok\x81\xb3andnie"]; + $$d{"approx"} =["w przyblizeniu","w przybli\x81\xbfeniu","mniej wiecej", + "mniej wi\x81\xeacej","okolo","oko\x81\xb3o"]; + $$d{"business"}=["sluzbowy","s\x81\xb3u\x81\xbfbowy","sluzbowym", + "s\x81\xb3u\x81\xbfbowym"]; + + $$d{"times"} =["po\x81\xb3udnie","12:00:00", + "p\x81\xf3\x81\xb3noc","00:00:00", + "poludnie","12:00:00","polnoc","00:00:00"]; + $$d{"offset"} =["wczoraj","-0:0:1:0:0:0","jutro","+0:0:1:0:0:0"]; + + $$d{"years"} =["rok","lat","lata","latach"]; + $$d{"months"} =["m.","miesiac","miesi\x81\xb9c","miesiecy", + "miesi\x81\xeacy","miesiacu","miesi\x81\xb9cu"]; + $$d{"weeks"} =["ty.","tydzien","tydzie\x81\xf1","tygodniu"]; + $$d{"days"} =["d.","dzien","dzie\x81\xf1","dni"]; + $$d{"hours"} =["g.","godzina","godziny","godzinie"]; + $$d{"minutes"} =["mn.","min.","minut","minuty"]; + $$d{"seconds"} =["s.","sekund","sekundy"]; + $$d{"replace"} =["m.","miesiac"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["AM","A.M."]; + $$d{"pm"} = ["PM","P.M."]; +} + +sub Date_Init_Spanish { + print "DEBUG: Date_Init_Spanish\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + + $$d{"month_name"}= + [["Enero","Febrero","Marzo","Abril","Mayo","Junio","Julio","Agosto", + "Septiembre","Octubre","Noviembre","Diciembre"]]; + + $$d{"month_abb"}= + [["Ene","Feb","Mar","Abr","May","Jun","Jul","Ago","Sep","Oct", + "Nov","Dic"]]; + + $$d{"day_name"}= + [["Lunes","Martes","Miercoles","Jueves","Viernes","Sabado","Domingo"]]; + $$d{"day_abb"}= + [["Lun","Mar","Mie","Jue","Vie","Sab","Dom"]]; + $$d{"day_char"}= + [["L","Ma","Mi","J","V","S","D"]]; + + $$d{"num_suff"}= + [["1o","2o","3o","4o","5o","6o","7o","8o","9o","10o", + "11o","12o","13o","14o","15o","16o","17o","18o","19o","20o", + "21o","22o","23o","24o","25o","26o","27o","28o","29o","30o","31o"], + ["1a","2a","3a","4a","5a","6a","7a","8a","9a","10a", + "11a","12a","13a","14a","15a","16a","17a","18a","19a","20a", + "21a","22a","23a","24a","25a","26a","27a","28a","29a","30a","31a"]]; + $$d{"num_word"}= + [["Primero","Segundo","Tercero","Cuarto","Quinto","Sexto","Septimo", + "Octavo","Noveno","Decimo","Decimo Primero","Decimo Segundo", + "Decimo Tercero","Decimo Cuarto","Decimo Quinto","Decimo Sexto", + "Decimo Septimo","Decimo Octavo","Decimo Noveno","Vigesimo", + "Vigesimo Primero","Vigesimo Segundo","Vigesimo Tercero", + "Vigesimo Cuarto","Vigesimo Quinto","Vigesimo Sexto", + "Vigesimo Septimo","Vigesimo Octavo","Vigesimo Noveno","Trigesimo", + "Trigesimo Primero"], + ["Primera","Segunda","Tercera","Cuarta","Quinta","Sexta","Septima", + "Octava","Novena","Decima","Decimo Primera","Decimo Segunda", + "Decimo Tercera","Decimo Cuarta","Decimo Quinta","Decimo Sexta", + "Decimo Septima","Decimo Octava","Decimo Novena","Vigesima", + "Vigesimo Primera","Vigesimo Segunda","Vigesimo Tercera", + "Vigesimo Cuarta","Vigesimo Quinta","Vigesimo Sexta", + "Vigesimo Septima","Vigesimo Octava","Vigesimo Novena","Trigesima", + "Trigesimo Primera"]]; + + $$d{"now"} =["Hoy","Ahora"]; + $$d{"last"} =["ultimo"]; + $$d{"each"} =["cada"]; + $$d{"of"} =["en","de"]; + $$d{"at"} =["a"]; + $$d{"on"} =["el"]; + $$d{"future"} =["en"]; + $$d{"past"} =["hace"]; + $$d{"next"} =["siguiente"]; + $$d{"prev"} =["anterior"]; + $$d{"later"} =["later"]; + + $$d{"exact"} =["exactamente"]; + $$d{"approx"} =["aproximadamente"]; + $$d{"business"}=["laborales"]; + + $$d{"offset"} =["ayer","-0:0:0:1:0:0:0","manana","+0:0:0:1:0:0:0"]; + $$d{"times"} =["mediodia","12:00:00","medianoche","00:00:00"]; + + $$d{"years"} =["a","ano","ano","anos","anos"]; + $$d{"months"} =["m","mes","mes","meses"]; + $$d{"weeks"} =["sem","semana","semana","semanas"]; + $$d{"days"} =["d","dia","dias"]; + $$d{"hours"} =["hr","hrs","hora","horas"]; + $$d{"minutes"} =["min","min","minuto","minutos"]; + $$d{"seconds"} =["s","seg","segundo","segundos"]; + $$d{"replace"} =["m","mes"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["AM","A.M."]; + $$d{"pm"} = ["PM","P.M."]; +} + +sub Date_Init_Portuguese { + print "DEBUG: Date_Init_Portuguese\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($o) = $h{"-o"}; + my($c) = $h{",c"}; + my($a) = $h{"a'"}; + my($e) = $h{"e'"}; + my($u) = $h{"u'"}; + my($o2)= $h{"o'"}; + my($a2)= $h{"a`"}; + my($a3)= $h{"a~"}; + my($e2)= $h{"e^"}; + + $$d{"month_name"}= + [["Janeiro","Fevereiro","Marco","Abril","Maio","Junho", + "Julho","Agosto","Setembro","Outubro","Novembro","Dezembro"], + ["Janeiro","Fevereiro","Mar${c}o","Abril","Maio","Junho", + "Julho","Agosto","Setembro","Outubro","Novembro","Dezembro"]]; + + $$d{"month_abb"}= + [["Jan","Fev","Mar","Abr","Mai","Jun", + "Jul","Ago","Set","Out","Nov","Dez"]]; + + $$d{"day_name"}= + [["Segunda","Terca","Quarta","Quinta","Sexta","Sabado","Domingo"], + ["Segunda","Ter${c}a","Quarta","Quinta","Sexta","S${a}bado","Domingo"]]; + $$d{"day_abb"}= + [["Seg","Ter","Qua","Qui","Sex","Sab","Dom"], + ["Seg","Ter","Qua","Qui","Sex","S${a}b","Dom"]]; + $$d{"day_char"}= + [["Sg","T","Qa","Qi","Sx","Sb","D"]]; + + $$d{"num_suff"}= + [["1${o}","2${o}","3${o}","4${o}","5${o}","6${o}","7${o}","8${o}", + "9${o}","10${o}","11${o}","12${o}","13${o}","14${o}","15${o}", + "16${o}","17${o}","18${o}","19${o}","20${o}","21${o}","22${o}", + "23${o}","24${o}","25${o}","26${o}","27${o}","28${o}","29${o}", + "30${o}","31${o}"]]; + $$d{"num_word"}= + [["primeiro","segundo","terceiro","quarto","quinto","sexto","setimo", + "oitavo","nono","decimo","decimo primeiro","decimo segundo", + "decimo terceiro","decimo quarto","decimo quinto","decimo sexto", + "decimo setimo","decimo oitavo","decimo nono","vigesimo", + "vigesimo primeiro","vigesimo segundo","vigesimo terceiro", + "vigesimo quarto","vigesimo quinto","vigesimo sexto","vigesimo setimo", + "vigesimo oitavo","vigesimo nono","trigesimo","trigesimo primeiro"], + ["primeiro","segundo","terceiro","quarto","quinto","sexto","s${e}timo", + "oitavo","nono","d${e}cimo","d${e}cimo primeiro","d${e}cimo segundo", + "d${e}cimo terceiro","d${e}cimo quarto","d${e}cimo quinto", + "d${e}cimo sexto","d${e}cimo s${e}timo","d${e}cimo oitavo", + "d${e}cimo nono","vig${e}simo","vig${e}simo primeiro", + "vig${e}simo segundo","vig${e}simo terceiro","vig${e}simo quarto", + "vig${e}simo quinto","vig${e}simo sexto","vig${e}simo s${e}timo", + "vig${e}simo oitavo","vig${e}simo nono","trig${e}simo", + "trig${e}simo primeiro"]]; + + $$d{"now"} =["agora","hoje"]; + $$d{"last"} =["${u}ltimo","ultimo"]; + $$d{"each"} =["cada"]; + $$d{"of"} =["da","do"]; + $$d{"at"} =["as","${a2}s"]; + $$d{"on"} =["na","no"]; + $$d{"future"} =["em"]; + $$d{"past"} =["a","${a2}"]; + $$d{"next"} =["proxima","proximo","pr${o2}xima","pr${o2}ximo"]; + $$d{"prev"} =["ultima","ultimo","${u}ltima","${u}ltimo"]; + $$d{"later"} =["passadas","passados"]; + + $$d{"exact"} =["exactamente"]; + $$d{"approx"} =["aproximadamente"]; + $$d{"business"}=["util","uteis"]; + + $$d{"offset"} =["ontem","-0:0:0:1:0:0:0", + "amanha","+0:0:0:1:0:0:0","amanh${a3}","+0:0:0:1:0:0:0"]; + $$d{"times"} =["meio-dia","12:00:00","meia-noite","00:00:00"]; + + $$d{"years"} =["anos","ano","ans","an","a"]; + $$d{"months"} =["meses","m${e2}s","mes","m"]; + $$d{"weeks"} =["semanas","semana","sem","sems","s"]; + $$d{"days"} =["dias","dia","d"]; + $$d{"hours"} =["horas","hora","hr","hrs"]; + $$d{"minutes"} =["minutos","minuto","min","mn"]; + $$d{"seconds"} =["segundos","segundo","seg","sg"]; + $$d{"replace"} =["m","mes","s","sems"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[,]'; + + $$d{"am"} = ["AM","A.M."]; + $$d{"pm"} = ["PM","P.M."]; +} + +sub Date_Init_Russian { + print "DEBUG: Date_Init_Russian\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + my(%h)=(); + &Char_8Bit(\%h); + my($a) =$h{"a:"}; + + $$d{"month_name"}= + [ + ["\xd1\xce\xd7\xc1\xd2\xd1","\xc6\xc5\xd7\xd2\xc1\xcc\xd1", + "\xcd\xc1\xd2\xd4\xc1","\xc1\xd0\xd2\xc5\xcc\xd1","\xcd\xc1\xd1", + "\xc9\xc0\xce\xd1", + "\xc9\xc0\xcc\xd1","\xc1\xd7\xc7\xd5\xd3\xd4\xc1", + "\xd3\xc5\xce\xd4\xd1\xc2\xd2\xd1","\xcf\xcb\xd4\xd1\xc2\xd2\xd1", + "\xce\xcf\xd1\xc2\xd2\xd1","\xc4\xc5\xcb\xc1\xc2\xd2\xd1"], + ["\xd1\xce\xd7\xc1\xd2\xd8","\xc6\xc5\xd7\xd2\xc1\xcc\xd8", + "\xcd\xc1\xd2\xd4","\xc1\xd0\xd2\xc5\xcc\xd8","\xcd\xc1\xca", + "\xc9\xc0\xce\xd8", + "\xc9\xc0\xcc\xd8","\xc1\xd7\xc7\xd5\xd3\xd4", + "\xd3\xc5\xce\xd4\xd1\xc2\xd2\xd8","\xcf\xcb\xd4\xd1\xc2\xd2\xd8", + "\xce\xcf\xd1\xc2\xd2\xd8","\xc4\xc5\xcb\xc1\xc2\xd2\xd8"] + ]; + + $$d{"month_abb"}= + [["\xd1\xce\xd7","\xc6\xc5\xd7","\xcd\xd2\xd4","\xc1\xd0\xd2", + "\xcd\xc1\xca","\xc9\xc0\xce", + "\xc9\xc0\xcc","\xc1\xd7\xc7","\xd3\xce\xd4","\xcf\xcb\xd4", + "\xce\xcf\xd1\xc2","\xc4\xc5\xcb"], + ["","\xc6\xd7\xd2","","","\xcd\xc1\xd1","", + "","","\xd3\xc5\xce","\xcf\xcb\xd4","\xce\xcf\xd1",""]]; + + $$d{"day_name"}= + [["\xd0\xcf\xce\xc5\xc4\xc5\xcc\xd8\xce\xc9\xcb", + "\xd7\xd4\xcf\xd2\xce\xc9\xcb","\xd3\xd2\xc5\xc4\xc1", + "\xde\xc5\xd4\xd7\xc5\xd2\xc7","\xd0\xd1\xd4\xce\xc9\xc3\xc1", + "\xd3\xd5\xc2\xc2\xcf\xd4\xc1", + "\xd7\xcf\xd3\xcb\xd2\xc5\xd3\xc5\xce\xd8\xc5"]]; + $$d{"day_abb"}= + [["\xd0\xce\xc4","\xd7\xd4\xd2","\xd3\xd2\xc4","\xde\xd4\xd7", + "\xd0\xd4\xce","\xd3\xd5\xc2","\xd7\xd3\xcb"], + ["\xd0\xcf\xce","\xd7\xd4\xcf","\xd3\xd2e","\xde\xc5\xd4", + "\xd0\xd1\xd4","\xd3\xd5\xc2","\xd7\xcf\xd3\xcb"]]; + $$d{"day_char"}= + [["\xd0\xce","\xd7\xd4","\xd3\xd2","\xde\xd4","\xd0\xd4","\xd3\xc2", + "\xd7\xd3"]]; + + $$d{"num_suff"}= + [["1 ","2 ","3 ","4 ","5 ","6 ","7 ","8 ","9 ","10 ", + "11 ","12 ","13 ","14 ","15 ","16 ","17 ","18 ","19 ","20 ", + "21 ","22 ","23 ","24 ","25 ","26 ","27 ","28 ","29 ","30 ", + "31 "]]; + $$d{"num_word"}= + [["\xd0\xc5\xd2\xd7\xd9\xca","\xd7\xd4\xcf\xd2\xcf\xca", + "\xd4\xd2\xc5\xd4\xc9\xca","\xde\xc5\xd4\xd7\xc5\xd2\xd4\xd9\xca", + "\xd0\xd1\xd4\xd9\xca","\xdb\xc5\xd3\xd4\xcf\xca", + "\xd3\xc5\xc4\xd8\xcd\xcf\xca","\xd7\xcf\xd3\xd8\xcd\xcf\xca", + "\xc4\xc5\xd7\xd1\xd4\xd9\xca","\xc4\xc5\xd3\xd1\xd4\xd9\xca", + "\xcf\xc4\xc9\xce\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xc4\xd7\xc5\xce\xc1\xc4\xde\xc1\xd4\xd9\xca", + "\xd4\xd2\xc5\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xde\xc5\xd4\xd9\xd2\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xd0\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xdb\xc5\xd3\xd4\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xd7\xcf\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xc4\xc5\xd7\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xd9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xd4\xcf\xd2\xcf\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd4\xd2\xc5\xd4\xc9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xde\xc5\xd4\xd7\xc5\xd2\xd4\xd9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xd1\xd4\xd9\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xdb\xc5\xd3\xd4\xcf\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd3\xc5\xc4\xd8\xcd\xcf\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xcf\xd3\xd8\xcd\xcf\xca", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xc4\xc5\xd7\xd1\xd4\xd9\xca", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xd9\xca", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xd9\xca"], + + ["\xd0\xc5\xd2\xd7\xcf\xc5","\xd7\xd4\xcf\xd2\xcf\xc5", + "\xd4\xd2\xc5\xd4\xd8\xc5","\xde\xc5\xd4\xd7\xc5\xd2\xd4\xcf\xc5", + "\xd0\xd1\xd4\xcf\xc5","\xdb\xc5\xd3\xd4\xcf\xc5", + "\xd3\xc5\xc4\xd8\xcd\xcf\xc5","\xd7\xcf\xd3\xd8\xcd\xcf\xc5", + "\xc4\xc5\xd7\xd1\xd4\xcf\xc5","\xc4\xc5\xd3\xd1\xd4\xcf\xc5", + "\xcf\xc4\xc9\xce\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xc4\xd7\xc5\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xd4\xd2\xc5\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xde\xc5\xd4\xd9\xd2\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xd0\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xdb\xc5\xd3\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xd7\xcf\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xc4\xc5\xd7\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xd4\xcf\xd2\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd4\xd2\xc5\xd4\xd8\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xde\xc5\xd4\xd7\xc5\xd2\xd4\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xd1\xd4\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xdb\xc5\xd3\xd4\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd3\xc5\xc4\xd8\xcd\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xcf\xd3\xd8\xcd\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xc4\xc5\xd7\xd1\xd4\xcf\xc5", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xcf\xc5", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xcf\xc5"], + + ["\xd0\xc5\xd2\xd7\xcf\xc7\xcf","\xd7\xd4\xcf\xd2\xcf\xc7\xcf", + "\xd4\xd2\xc5\xd4\xd8\xc5\xc7\xcf", + "\xde\xc5\xd4\xd7\xc5\xd2\xd4\xcf\xc7\xcf","\xd0\xd1\xd4\xcf\xc7\xcf", + "\xdb\xc5\xd3\xd4\xcf\xc7\xcf","\xd3\xc5\xc4\xd8\xcd\xcf\xc7\xcf", + "\xd7\xcf\xd3\xd8\xcd\xcf\xc7\xcf", + "\xc4\xc5\xd7\xd1\xd4\xcf\xc7\xcf","\xc4\xc5\xd3\xd1\xd4\xcf\xc7\xcf", + "\xcf\xc4\xc9\xce\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc5\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xd4\xd2\xc5\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xde\xc5\xd4\xd9\xd2\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xd0\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xdb\xc5\xd3\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xd7\xcf\xd3\xc5\xcd\xd8\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xc4\xc5\xd7\xd1\xd4\xce\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xd4\xcf\xd2\xcf\xc5", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd4\xd2\xc5\xd4\xd8\xc5\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xde\xc5\xd4\xd7\xc5\xd2\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd0\xd1\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xdb\xc5\xd3\xd4\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd3\xc5\xc4\xd8\xcd\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xd7\xcf\xd3\xd8\xcd\xcf\xc7\xcf", + "\xc4\xd7\xc1\xc4\xc3\xc1\xd4\xd8 \xc4\xc5\xd7\xd1\xd4\xcf\xc7\xcf", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xcf\xc7\xcf", + "\xd4\xd2\xc9\xc4\xc3\xc1\xd4\xd8 \xd0\xc5\xd2\xd7\xcf\xc7\xcf"]]; + + $$d{"now"} =["\xd3\xc5\xc7\xcf\xc4\xce\xd1","\xd3\xc5\xca\xde\xc1\xd3"]; + $$d{"last"} =["\xd0\xcf\xd3\xcc\xc5\xc4\xce\xc9\xca"]; + $$d{"each"} =["\xcb\xc1\xd6\xc4\xd9\xca"]; + $$d{"of"} =[" "]; + $$d{"at"} =["\xd7"]; + $$d{"on"} =["\xd7"]; + $$d{"future"} =["\xd7\xd0\xc5\xd2\xc5\xc4 \xce\xc1"]; + $$d{"past"} =["\xce\xc1\xda\xc1\xc4 \xce\xc1 "]; + $$d{"next"} =["\xd3\xcc\xc5\xc4\xd5\xc0\xdd\xc9\xca"]; + $$d{"prev"} =["\xd0\xd2\xc5\xc4\xd9\xc4\xd5\xdd\xc9\xca"]; + $$d{"later"} =["\xd0\xcf\xda\xd6\xc5"]; + + $$d{"exact"} =["\xd4\xcf\xde\xce\xcf"]; + $$d{"approx"} =["\xd0\xd2\xc9\xcd\xc5\xd2\xce\xcf"]; + $$d{"business"}=["\xd2\xc1\xc2\xcf\xde\xc9\xc8"]; + + $$d{"offset"} =["\xd0\xcf\xda\xc1\xd7\xde\xc5\xd2\xc1","-0:0:0:2:0:0:0", + "\xd7\xde\xc5\xd2\xc1","-0:0:0:1:0:0:0", + "\xda\xc1\xd7\xd4\xd2\xc1","+0:0:0:1:0:0:0", + "\xd0\xcf\xd3\xcc\xc5\xda\xc1\xd7\xd4\xd2\xc1", + "+0:0:0:2:0:0:0"]; + $$d{"times"} =["\xd0\xcf\xcc\xc4\xc5\xce\xd8","12:00:00", + "\xd0\xcf\xcc\xce\xcf\xde\xd8","00:00:00"]; + + $$d{"years"} =["\xc7","\xc7\xc4","\xc7\xcf\xc4","\xcc\xc5\xd4", + "\xcc\xc5\xd4","\xc7\xcf\xc4\xc1"]; + $$d{"months"} =["\xcd\xc5\xd3","\xcd\xc5\xd3\xd1\xc3", + "\xcd\xc5\xd3\xd1\xc3\xc5\xd7"]; + $$d{"weeks"} =["\xce\xc5\xc4\xc5\xcc\xd1","\xce\xc5\xc4\xc5\xcc\xd8", + "\xce\xc5\xc4\xc5\xcc\xc9","\xce\xc5\xc4\xc5\xcc\xc0"]; + $$d{"days"} =["\xc4","\xc4\xc5\xce\xd8","\xc4\xce\xc5\xca", + "\xc4\xce\xd1"]; + $$d{"hours"} =["\xde","\xde.","\xde\xd3","\xde\xd3\xd7","\xde\xc1\xd3", + "\xde\xc1\xd3\xcf\xd7","\xde\xc1\xd3\xc1"]; + $$d{"minutes"} =["\xcd\xce","\xcd\xc9\xce","\xcd\xc9\xce\xd5\xd4\xc1", + "\xcd\xc9\xce\xd5\xd4"]; + $$d{"seconds"} =["\xd3","\xd3\xc5\xcb","\xd3\xc5\xcb\xd5\xce\xc4\xc1", + "\xd3\xc5\xcb\xd5\xce\xc4"]; + $$d{"replace"} =[]; + + $$d{"sephm"} ="[:\xde]"; + $$d{"sepms"} ="[:\xcd]"; + $$d{"sepss"} ="[:.\xd3]"; + + $$d{"am"} = ["\xc4\xd0","${a}\xf0","${a}.\xf0.","\xce\xcf\xde\xc9", + "\xd5\xd4\xd2\xc1", + "\xc4\xcf \xd0\xcf\xcc\xd5\xc4\xce\xd1"]; + $$d{"pm"} = ["\xd0\xd0","\xf0\xf0","\xf0.\xf0.","\xc4\xce\xd1", + "\xd7\xc5\xde\xc5\xd2\xc1", + "\xd0\xcf\xd3\xcc\xc5 \xd0\xcf\xcc\xd5\xc4\xce\xd1", + "\xd0\xcf \xd0\xcf\xcc\xd5\xc4\xce\xc0"]; +} + +sub Date_Init_Turkish { + print "DEBUG: Date_Init_Turkish\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + + $$d{"month_name"}= + [ + ["ocak","subat","mart","nisan","mayis","haziran", + "temmuz","agustos","eylul","ekim","kasim","aralik"], + ["ocak","\xfeubat","mart","nisan","may\xfds","haziran", + "temmuz","a\xf0ustos","eyl\xfcl","ekim","kas\xfdm","aral\xfdk"] + ]; + + $$d{"month_abb"}= + [ + ["oca","sub","mar","nis","may","haz", + "tem","agu","eyl","eki","kas","ara"], + ["oca","\xfeub","mar","nis","may","haz", + "tem","a\xf0u","eyl","eki","kas","ara"] + ]; + + $$d{"day_name"}= + [ + ["pazartesi","sali","carsamba","persembe","cuma","cumartesi","pazar"], + ["pazartesi","sal\xfd","\xe7ar\xfeamba","per\xfeembe","cuma", + "cumartesi","pazar"], + ]; + + $$d{"day_abb"}= + [ + ["pzt","sal","car","per","cum","cts","paz"], + ["pzt","sal","\xe7ar","per","cum","cts","paz"], + ]; + + $$d{"day_char"}= + [["Pt","S","Cr","Pr","C","Ct","P"], + ["Pt","S","\xc7","Pr","C","Ct","P"]]; + + $$d{"num_suff"}= + [[ "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9.", "10.", + "11.", "12.", "13.", "14.", "15.", "16.", "17.", "18.", "19.", "20.", + "21.", "22.", "23.", "24.", "25.", "26.", "27.", "28.", "29.", "30.", + "31."]]; + + $$d{"num_word"}= + [ + ["birinci","ikinci","ucuncu","dorduncu", + "besinci","altinci","yedinci","sekizinci", + "dokuzuncu","onuncu","onbirinci","onikinci", + "onucuncu","ondordoncu", + "onbesinci","onaltinci","onyedinci","onsekizinci", + "ondokuzuncu","yirminci","yirmibirinci","yirmikinci", + "yirmiucuncu","yirmidorduncu", + "yirmibesinci","yirmialtinci","yirmiyedinci","yirmisekizinci", + "yirmidokuzuncu","otuzuncu","otuzbirinci"], + ["birinci","ikinci","\xfc\xe7\xfcnc\xfc","d\xf6rd\xfcnc\xfc", + "be\xfeinci","alt\xfdnc\xfd","yedinci","sekizinci", + "dokuzuncu","onuncu","onbirinci","onikinci", + "on\xfc\xe7\xfcnc\xfc","ond\xf6rd\xfcnc\xfc", + "onbe\xfeinci","onalt\xfdnc\xfd","onyedinci","onsekizinci", + "ondokuzuncu","yirminci","yirmibirinci","yirmikinci", + "yirmi\xfc\xe7\xfcnc\xfc","yirmid\xf6rd\xfcnc\xfc", + "yirmibe\xfeinci","yirmialt\xfdnc\xfd","yirmiyedinci","yirmisekizinci", + "yirmidokuzuncu","otuzuncu","otuzbirinci"] + ]; + + $$d{"now"} =["\xfeimdi", "simdi", "bugun","bug\xfcn"]; + $$d{"last"} =["son", "sonuncu"]; + $$d{"each"} =["her"]; + $$d{"of"} =["of"]; + $$d{"at"} =["saat"]; + $$d{"on"} =["on"]; + $$d{"future"} =["gelecek"]; + $$d{"past"} =["ge\xe7mi\xfe", "gecmis","gecen", "ge\xe7en"]; + $$d{"next"} =["gelecek","sonraki"]; + $$d{"prev"} =["onceki","\xf6nceki"]; + $$d{"later"} =["sonra"]; + + $$d{"exact"} =["tam"]; + $$d{"approx"} =["yakla\xfe\xfdk", "yaklasik"]; + $$d{"business"}=["i\xfe","\xe7al\xfd\xfema","is", "calisma"]; + + $$d{"offset"} =["d\xfcn","-0:0:0:1:0:0:0", + "dun", "-0:0:0:1:0:0:0", + "yar\xfdn","+0:0:0:1:0:0:0", + "yarin","+0:0:0:1:0:0:0"]; + + $$d{"times"} =["\xf6\xf0len","12:00:00", + "oglen","12:00:00", + "yarim","12:300:00", + "yar\xfdm","12:30:00", + "gece yar\xfds\xfd","00:00:00", + "gece yarisi","00:00:00"]; + + $$d{"years"} =["yil","y"]; + $$d{"months"} =["ay","a"]; + $$d{"weeks"} =["hafta", "h"]; + $$d{"days"} =["gun","g"]; + $$d{"hours"} =["saat"]; + $$d{"minutes"} =["dakika","dak","d"]; + $$d{"seconds"} =["saniye","sn",]; + $$d{"replace"} =["s","saat"]; + + $$d{"sephm"} =':'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:,]'; + + $$d{"am"} = ["\xf6gleden \xf6nce","ogleden once"]; + $$d{"pm"} = ["\xf6\xf0leden sonra","ogleden sonra"]; +} + +sub Date_Init_Danish { + print "DEBUG: Date_Init_Danish\n" if ($Curr{"Debug"} =~ /trace/); + my($d)=@_; + + $$d{"month_name"}= + [["Januar","Februar","Marts","April","Maj","Juni", + "Juli","August","September","Oktober","November","December"]]; + $$d{"month_abb"}= + [["Jan","Feb","Mar","Apr","Maj","Jun", + "Jul","Aug","Sep","Okt","Nov","Dec"]]; + + $$d{"day_name"}= + [["Mandag","Tirsdag","Onsdag","Torsdag","Fredag","Lordag","Sondag"], + ["Mandag","Tirsdag","Onsdag","Torsdag","Fredag","L\xf8rdag","S\xf8ndag"]]; + + $$d{"day_abb"}= + [["Man","Tis","Ons","Tor","Fre","Lor","Son"], + ["Man","Tis","Ons","Tor","Fre","L\xf8r","S\xf8n"]]; + $$d{"day_char"}= + [["M","Ti","O","To","F","L","S"]]; + + $$d{"num_suff"}= + [["1:e","2:e","3:e","4:e","5:e","6:e","7:e","8:e","9:e","10:e", + "11:e","12:e","13:e","14:e","15:e","16:e","17:e","18:e","19:e","20:e", + "21:e","22:e","23:e","24:e","25:e","26:e","27:e","28:e","29:e","30:e", + "31:e"]]; + $$d{"num_word"}= + [["forste","anden","tredie","fjerde","femte","sjette","syvende", + "ottende","niende","tiende","elfte","tolvte","trettende","fjortende", + "femtende","sekstende","syttende","attende","nittende","tyvende", + "enogtyvende","toogtyvende","treogtyvende","fireogtyvende","femogtyvende", + "seksogtyvende","syvogtyvende","otteogtyvende","niogtyvende", + "tredivte","enogtredivte"], + ["f\xf8rste","anden","tredie","fjerde","femte","sjette","syvende", + "ottende","niende","tiende","elfte","tolvte","trettende","fjortende", + "femtende","sekstende","syttende","attende","nittende","tyvende", + "enogtyvende","toogtyvende","treogtyvende","fireogtyvende","femogtyvende", + "seksogtyvende","syvogtyvende","otteogtyvende","niogtyvende", + "tredivte","enogtredivte"]]; + + $$d{"now"} =["idag","nu"]; + $$d{"last"} =["forrige","sidste","nyeste"]; + $$d{"each"} =["hver"]; + $$d{"of"} =["om"]; + $$d{"at"} =["kl","kl.","klokken"]; + $$d{"on"} =["pa","p\xe5"]; + $$d{"future"} =["om"]; + $$d{"past"} =["siden"]; + $$d{"next"} =["nasta","n\xe6ste"]; + $$d{"prev"} =["forrige"]; + $$d{"later"} =["senere"]; + + $$d{"exact"} =["pracist","pr\xe6cist"]; + $$d{"approx"} =["circa"]; + $$d{"business"}=["arbejdsdag","arbejdsdage"]; + + $$d{"offset"} =["ig\xe5r","-0:0:0:1:0:0:0","igar","-0:0:0:1:0:0:0", + "imorgen","+0:0:0:1:0:0:0"]; + $$d{"times"} =["midt pa dagen","12:00:00","midt p\xe5 dagen","12:00:00", + "midnat","00:00:00"]; + + $$d{"years"} =["ar","\xe5r"]; + $$d{"months"} =["man","maned","maneder","m\xe5n","m\xe5ned","m\xe5neder"]; + $$d{"weeks"} =["u","uge","uger"]; + $$d{"days"} =["d","dag","dage"]; + $$d{"hours"} =["t","tim","time","timer"]; + $$d{"minutes"} =["min","minut","minutter"]; + $$d{"seconds"} =["s","sek","sekund","sekunder"]; + $$d{"replace"} =["m","minut"]; + + $$d{"sephm"} ='[.:]'; + $$d{"sepms"} =':'; + $$d{"sepss"} ='[.:]'; + + $$d{"am"} = ["FM"]; + $$d{"pm"} = ["EM"]; +} + +######################################################################## +# FROM MY PERSONAL LIBRARIES +######################################################################## + +no integer; + +# &ModuloAddition($N,$add,\$val,\$rem); +# This calculates $val=$val+$add and forces $val to be in a certain range. +# This is useful for adding numbers for which only a certain range is +# allowed (for example, minutes can be between 0 and 59 or months can be +# between 1 and 12). The absolute value of $N determines the range and +# the sign of $N determines whether the range is 0 to N-1 (if N>0) or +# 1 to N (N<0). The remainder (as modulo N) is added to $rem. +# Example: +# To add 2 hours together (with the excess returned in days) use: +# &ModuloAddition(60,$s1,\$s,\$day); +sub ModuloAddition { + my($N,$add,$val,$rem)=@_; + return if ($N==0); + $$val+=$add; + if ($N<0) { + # 1 to N + $N = -$N; + if ($$val>$N) { + $$rem+= int(($$val-1)/$N); + $$val = ($$val-1)%$N +1; + } elsif ($$val<1) { + $$rem-= int(-$$val/$N)+1; + $$val = $N-(-$$val % $N); + } + + } else { + # 0 to N-1 + if ($$val>($N-1)) { + $$rem+= int($$val/$N); + $$val = $$val%$N; + } elsif ($$val<0) { + $$rem-= int(-($$val+1)/$N)+1; + $$val = ($N-1)-(-($$val+1)%$N); + } + } +} + +# $Flag=&IsInt($String [,$low, $high]); +# Returns 1 if $String is a valid integer, 0 otherwise. If $low is +# entered, $String must be >= $low. If $high is entered, $String must +# be <= $high. It is valid to check only one of the bounds. +sub IsInt { + my($N,$low,$high)=@_; + return 0 if (! defined $N or + $N !~ /^\s*[-+]?\d+\s*$/ or + defined $low && $N<$low or + defined $high && $N>$high); + return 1; +} + +# $Pos=&SinLindex(\@List,$Str [,$offset [,$CaseInsensitive]]); +# Searches for an exact string in a list. +# +# This is similar to RinLindex except that it searches for elements +# which are exactly equal to $Str (possibly case insensitive). +sub SinLindex { + my($listref,$Str,$offset,$Insensitive)=@_; + my($i,$len,$tmp)=(); + $len=$#$listref; + return -2 if ($len<0 or ! $Str); + return -1 if (&Index_First(\$offset,$len)); + $Str=uc($Str) if ($Insensitive); + for ($i=$offset; $i<=$len; $i++) { + $tmp=$$listref[$i]; + $tmp=uc($tmp) if ($Insensitive); + return $i if ($tmp eq $Str); + } + return -1; +} + +sub Index_First { + my($offsetref,$max)=@_; + $$offsetref=0 if (! $$offsetref); + if ($$offsetref < 0) { + $$offsetref += $max + 1; + $$offsetref=0 if ($$offsetref < 0); + } + return -1 if ($$offsetref > $max); + return 0; +} + +# $File=&CleanFile($file); +# This cleans up a path to remove the following things: +# double slash /a//b -> /a/b +# trailing dot /a/. -> /a +# leading dot ./a -> a +# trailing slash a/ -> a +sub CleanFile { + my($file)=@_; + $file =~ s/\s*$//; + $file =~ s/^\s*//; + $file =~ s|//+|/|g; # multiple slash + $file =~ s|/\.$|/|; # trailing /. (leaves trailing slash) + $file =~ s|^\./|| # leading ./ + if ($file ne "./"); + $file =~ s|/$|| # trailing slash + if ($file ne "/"); + return $file; +} + +# $File=&ExpandTilde($file); +# This checks to see if a "~" appears as the first character in a path. +# If it does, the "~" expansion is interpreted (if possible) and the full +# path is returned. If a "~" expansion is used but cannot be +# interpreted, an empty string is returned. +# +# This is Windows/Mac friendly. +# This is efficient. +sub ExpandTilde { + my($file)=shift; + my($user,$home)=(); + # ~aaa/bbb= ~ aaa /bbb + if ($file =~ s|^~([^/]*)||) { + $user=$1; + # Single user operating systems (Mac, MSWindows) don't have the getpwnam + # and getpwuid routines defined. Try to catch various different ways + # of knowing we are on one of these systems: + return "" if ($OS eq "Windows" or + $OS eq "Mac" or + $OS eq "Netware" or + $OS eq "MPE"); + $user="" if (! defined $user); + + if ($user) { + $home= (getpwnam($user))[7]; + } else { + $home= (getpwuid($<))[7]; + } + $home = VMS::Filespec::unixpath($home) if ($OS eq "VMS"); + return "" if (! $home); + $file="$home/$file"; + } + $file; +} + +# $File=&FullFilePath($file); +# Returns the full or relative path to $file (expanding "~" if necessary). +# Returns an empty string if a "~" expansion cannot be interpreted. The +# path does not need to exist. CleanFile is called. +sub FullFilePath { + my($file)=shift; + my($rootpat) = '^/'; #default pattern to match absolute path + $rootpat = '^(\\|/|([A-Za-z]:[\\/]))' if ($OS eq 'Windows'); + $file=&ExpandTilde($file); + return "" if (! $file); + return &CleanFile($file); +} + +# $Flag=&CheckFilePath($file [,$mode]); +# Checks to see if $file exists, to see what type it is, and whether +# the script can access it. If it exists and has the correct mode, 1 +# is returned. +# +# $mode is a string which may contain any of the valid file test operator +# characters except t, M, A, C. The appropriate test is run for each +# character. For example, if $mode is "re" the -r and -e tests are both +# run. +# +# An empty string is returned if the file doesn't exist. A 0 is returned +# if the file exists but any test fails. +# +# All characters in $mode which do not correspond to valid tests are +# ignored. +sub CheckFilePath { + my($file,$mode)=@_; + my($test)=(); + $file=&FullFilePath($file); + $mode = "" if (! defined $mode); + + # Run tests + return 0 if (! defined $file or ! $file); + return 0 if (( ! -e $file) or + ($mode =~ /r/ && ! -r $file) or + ($mode =~ /w/ && ! -w $file) or + ($mode =~ /x/ && ! -x $file) or + ($mode =~ /R/ && ! -R $file) or + ($mode =~ /W/ && ! -W $file) or + ($mode =~ /X/ && ! -X $file) or + ($mode =~ /o/ && ! -o $file) or + ($mode =~ /O/ && ! -O $file) or + ($mode =~ /z/ && ! -z $file) or + ($mode =~ /s/ && ! -s $file) or + ($mode =~ /f/ && ! -f $file) or + ($mode =~ /d/ && ! -d $file) or + ($mode =~ /l/ && ! -l $file) or + ($mode =~ /s/ && ! -s $file) or + ($mode =~ /p/ && ! -p $file) or + ($mode =~ /b/ && ! -b $file) or + ($mode =~ /c/ && ! -c $file) or + ($mode =~ /u/ && ! -u $file) or + ($mode =~ /g/ && ! -g $file) or + ($mode =~ /k/ && ! -k $file) or + ($mode =~ /T/ && ! -T $file) or + ($mode =~ /B/ && ! -B $file)); + return 1; +} +#&& + +# $Path=&FixPath($path [,$full] [,$mode] [,$error]); +# Makes sure that every directory in $path (a colon separated list of +# directories) appears as a full path or relative path. All "~" +# expansions are removed. All trailing slashes are removed also. If +# $full is non-nil, relative paths are expanded to full paths as well. +# +# If $mode is given, it may be either "e", "r", or "w". In this case, +# additional checking is done to each directory. If $mode is "e", it +# need ony exist to pass the check. If $mode is "r", it must have have +# read and execute permission. If $mode is "w", it must have read, +# write, and execute permission. +# +# The value of $error determines what happens if the directory does not +# pass the test. If it is non-nil, if any directory does not pass the +# test, the subroutine returns the empty string. Otherwise, it is simply +# removed from $path. +# +# The corrected path is returned. +sub FixPath { + my($path,$full,$mode,$err)=@_; + local($_)=""; + my(@dir)=split(/$Cnf{"PathSep"}/,$path); + $full=0 if (! defined $full); + $mode="" if (! defined $mode); + $err=0 if (! defined $err); + $path=""; + if ($mode eq "e") { + $mode="de"; + } elsif ($mode eq "r") { + $mode="derx"; + } elsif ($mode eq "w") { + $mode="derwx"; + } + + foreach (@dir) { + + # Expand path + if ($full) { + $_=&FullFilePath($_); + } else { + $_=&ExpandTilde($_); + } + if (! $_) { + return "" if ($err); + next; + } + + # Check mode + if (! $mode or &CheckFilePath($_,$mode)) { + $path .= $Cnf{"PathSep"} . $_; + } else { + return "" if ($err); + } + } + $path =~ s/^$Cnf{"PathSep"}//; + return $path; +} +#&& + +# $File=&SearchPath($file,$path [,$mode] [,@suffixes]); +# Searches through directories in $path for a file named $file. The +# full path is returned if one is found, or an empty string otherwise. +# The file may exist with one of the @suffixes. The mode is checked +# similar to &CheckFilePath. +# +# The first full path that matches the name and mode is returned. If none +# is found, an empty string is returned. +sub SearchPath { + my($file,$path,$mode,@suff)=@_; + my($f,$s,$d,@dir,$fs)=(); + $path=&FixPath($path,1,"r"); + @dir=split(/$Cnf{"PathSep"}/,$path); + foreach $d (@dir) { + $f="$d/$file"; + $f=~ s|//|/|g; + return $f if (&CheckFilePath($f,$mode)); + foreach $s (@suff) { + $fs="$f.$s"; + return $fs if (&CheckFilePath($fs,$mode)); + } + } + return ""; +} + +# @list=&ReturnList($str); +# This takes a string which should be a comma separated list of integers +# or ranges (5-7). It returns a sorted list of all integers referred to +# by the string, or () if there is an invalid element. +# +# Negative integers are also handled. "-2--1" is equivalent to "-2,-1". +sub ReturnList { + my($str)=@_; + my(@ret,@str,$from,$to,$tmp)=(); + @str=split(/,/,$str); + foreach $str (@str) { + if ($str =~ /^[-+]?\d+$/) { + push(@ret,$str); + } elsif ($str =~ /^([-+]?\d+)-([-+]?\d+)$/) { + ($from,$to)=($1,$2); + if ($from>$to) { + $tmp=$from; + $from=$to; + $to=$tmp; + } + push(@ret,$from..$to); + } else { + return (); + } + } + @ret; +} + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Date/Manip.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Date/Manip.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,2755 @@ +# Copyright (c) 1995-2003 Sullivan Beck. All rights reserved. +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl itself. + +=head1 NAME + +Date::Manip - date manipulation routines + +=head1 SYNOPSIS + + use Date::Manip; + + $date = ParseDate(\@args); + $date = ParseDate($string); + $date = ParseDate(\$string); + + @date = UnixDate($date,@format); + $date = UnixDate($date,@format); + + $delta = ParseDateDelta(\@args); + $delta = ParseDateDelta($string); + $delta = ParseDateDelta(\$string); + + @str = Delta_Format($delta,$dec,@format); + $str = Delta_Format($delta,$dec,@format); + + $recur = ParseRecur($string,$base,$date0,$date1,$flags); + @dates = ParseRecur($string,$base,$date0,$date1,$flags); + + $flag = Date_Cmp($date1,$date2); + + $d = DateCalc($d1,$d2 [,$errref] [,$del]); + + $date = Date_SetTime($date,$hr,$min,$sec); + $date = Date_SetTime($date,$time); + + $date = Date_SetDateField($date,$field,$val [,$nocheck]); + + $date = Date_GetPrev($date,$dow,$today,$hr,$min,$sec); + $date = Date_GetPrev($date,$dow,$today,$time); + + $date = Date_GetNext($date,$dow,$today,$hr,$min,$sec); + $date = Date_GetNext($date,$dow,$today,$time); + + $version = DateManipVersion; + + $flag = Date_IsWorkDay($date [,$flag]); + + $date = Date_NextWorkDay($date,$off [,$time]); + $date = Date_PrevWorkDay($date,$off [,$time]); + + $name = Date_IsHoliday($date); + + $listref = Events_List($date); + $listref = Events_List($date0,$date1); + + &Date_Init(); + &Date_Init("VAR=VAL","VAR=VAL",...); + @list = Date_Init(); + @list = Date_Init("VAR=VAL","VAR=VAL",...); + +The above routines all check to make sure that Date_Init is called. If it +hasn't been, they will call it automatically. As a result, there is usually +no need to call Date_Init explicitely unless you want to change some of the +config variables (described below). + +The following routines are used by the above routines (though they can also +be called directly). $y may be entered as either a 2 or 4 digit year (it +will be converted to a 4 digit year based on the variable YYtoYYYY +described below). Month and day should be numeric in all cases. Most (if +not all) of the information below can be gotten from UnixDate which is +really the way I intended it to be gotten, but there are reasons to use +these (these are significantly faster). + +***NOTE*** Unlike the routines listed above, the following routines do NOT +explicitely call Date_Init. You must make sure that Date_Init has been +called, either by you explicitely, or by one of the above routines before you +use these routines. + + $day = Date_DayOfWeek($m,$d,$y); + $secs = Date_SecsSince1970($m,$d,$y,$h,$mn,$s); + $secs = Date_SecsSince1970GMT($m,$d,$y,$h,$mn,$s); + $days = Date_DaysSince1BC($m,$d,$y); + $day = Date_DayOfYear($m,$d,$y); + $days = Date_DaysInYear($y); + $wkno = Date_WeekOfYear($m,$d,$y,$first); + $flag = Date_LeapYear($y); + $day = Date_DaySuffix($d); + $tz = Date_TimeZone(); + ($y,$m,$d,$h,$mn,$s) = Date_NthDayOfYear($y,$n); + +=head1 DESCRIPTION + +This is a set of routines designed to make any common date/time +manipulation easy to do. Operations such as comparing two times, +calculating a time a given amount of time from another, or parsing +international times are all easily done. From the very beginning, the main +focus of Date::Manip has been to be able to do ANY desired date/time +operation easily, not necessarily quickly. Also, it is definitely oriented +towards the type of operations we (as people) tend to think of rather than +those operations used routinely by computers. There are other modules that +can do a subset of the operations available in Date::Manip much quicker +than those presented here, so be sure to read the section SHOULD I USE +DATE::MANIP below before deciding which of the Date and Time modules from +CPAN is for you. + +Date::Manip deals with time as it is presented the Gregorian calendar (the +one currently in use). The Julian calendar defined leap years as every 4th +year. The Gregorian calendar improved this by making every 100th year NOT +a leap year, unless it was also the 400th year. The Gregorian calendar has +been extrapolated back to the year 0000 AD and forward to the year 9999 AD. +Note that in historical context, the Julian calendar was in use until 1582 +when the Gregorian calendar was adopted by the Catholic church. Protestant +countries did not accept it until later; Germany and Netherlands in 1698, +British Empire in 1752, Russia in 1918. Note that the Gregorian calendar +is itself imperfect and at some point will need to be corrected. No attempt +is made to correct for that, and my great great great grandchildren will be +long dead before this even occurs, so it's not an immediate concern. Yes, +this is the same type of attitute that caused the great Y2K problem... but +I have an excuse: I don't know what the correction will be, so I can't +possible implement it. Nobody doubted that the year after 1999 would be +known as 2000 :-). + +Date::Manip is therefore not equipped to truly deal with historical dates, +but should be able to perform (virtually) any operation dealing with a +modern time and date. + +Date::Manip has (or will have) functionality to work with several fundamental +types of data. + +=over 4 + +=item DATE + +Although the word date is used extensively here, it is actually somewhat +misleading. Date::Manip works with the full date AND time (year, month, +day, hour, minute, second and weeks when appropriate). It doesn't work +with fractional seconds. Timezones are also supported to some extent. + +NOTE: Much better support for timezones (including Daylight Savings Time) +is planned for the future. + +=item DELTA + +This refers to a duration or elapsed time. One thing to note is that, as +used in this module, a delta refers only to the amount of time elapsed. It +includes no information about a starting or ending time. + +=item RECURRENCE + +A recurrence is simply a notation for defining when a recurring event +occurs. For example, if an event occurs every other Friday or every +4 hours, this can be defined as a recurrence. With a recurrence and a +starting and ending date, you can get a list of dates in that period when +a recurring event occurs. + +=item GRAIN + +The granularity of a time basically refers to how accurate you wish to +treat a date. For example, if you want to compare two dates to see if +they are identical at a granularity of days, then they only have to occur +on the same day. At a granularity of an hour, they have to occur within +an hour of each other, etc. + +NOTE: Support for this will be added in the future. + +=item HOLIDAYS and EVENTS + +These are basically a named time. Holidays are used in business mode +calculations. Events allow things like calendar and scheduling +applications to be designed much more easily. + +=back + +Among other things, Date::Manip allow you to: + +1. Enter a date and be able to choose any format convenient + +2. Compare two dates, entered in widely different formats + to determine which is earlier + +3. Extract any information you want from ANY date using a + format string similar to the Unix date command + +4. Determine the amount of time between two dates + +5. Add a time offset to a date to get a second date (i.e. + determine the date 132 days ago or 2 years and 3 months + after Jan 2, 1992) + +6. Work with dates with dates using international formats + (foreign month names, 12/10/95 referring to October + rather than December, etc.). + +7. To find a list of dates where a recurring event happens. + +Each of these tasks is trivial (one or two lines at most) with this package. + +=head1 EXAMPLES + +In the documentation below, US formats are used, but in most (if not all) +cases, a non-English equivalent will work equally well. + +1. Parsing a date from any convenient format + + $date = ParseDate("today"); + $date = ParseDate("1st thursday in June 1992"); + $date = ParseDate("05/10/93"); + $date = ParseDate("12:30 Dec 12th 1880"); + $date = ParseDate("8:00pm december tenth"); + if (! $date) { + # Error in the date + } + +2. Compare two dates + + $date1 = ParseDate($string1); + $date2 = ParseDate($string2); + $flag = Date_Cmp($date1,$date2); + if ($flag<0) { + # date1 is earlier + } elsif ($flag==0) { + # the two dates are identical + } else { + # date2 is earlier + } + +3. Extract information from a date. + + print &UnixDate("today","It is now %T on %b %e, %Y."); + => "It is now 13:24:08 on Feb 3, 1996." + +4. The amount of time between two dates. + + $date1 = ParseDate($string1); + $date2 = ParseDate($string2); + $delta = DateCalc($date1,$date2,\$err); + => 0:0:WK:DD:HH:MM:SS the weeks, days, hours, minutes, + and seconds between the two + $delta = DateCalc($date1,$date2,\$err,1); + => YY:MM:WK:DD:HH:MM:SS the years, months, etc. between + the two + + Read the documentation below for an explanation of the + difference. + +5. To determine a date a given offset from another. + + $date = DateCalc("today","+ 3hours 12minutes 6 seconds",\$err); + $date = DateCalc("12 hours ago","12:30 6Jan90",\$err); + + It even works with business days: + + $date = DateCalc("today","+ 3 business days",\$err); + +6. To work with dates in another language. + + &Date_Init("Language=French","DateFormat=non-US"); + $date = ParseDate("1er decembre 1990"); + +7. To find a list of dates where a recurring event happens + (including quite complex ones). + + # To find the 2nd tuesday of every month + @date = ParseRecur("0:1*2:2:0:0:0",$base,$start,$stop); + + # To find the Monday after easter in 1997-1999. + @date = ParseRecur("*1997-1999:0:0:0:0:0:0*EASTER,ND1"); + +NOTE: Some date forms do not work as well in languages other than English, +but this is not because Date::Manip is incapable of doing so (almost nothing +in this module is language dependent). It is simply that I do not have the +correct translation available for some words. If there is a date form that +works in English but does not work in a language you need, let me know and +if you can provide me the translation, I will fix Date::Manip. + +=head1 SHOULD I USE DATE::MANIP + +If you look in CPAN, you'll find that there are a number of Date and Time +packages. Is Date::Manip the one you should be using? In my opinion, the +answer is no most of the time. This sounds odd coming from the author of +the software, but read on. + +Date::Manip is written entirely in perl. It's the most powerful of the +date modules. It's also the biggest and slowest. + +Since Date::Manip is written entirely in perl, and depends on no other +module not in a standard perl distribution, Date::Manip has no dependancies +to meet. Other modules have dependancies on a C compiler or other perl +modules. Since it is fairly easy to satisfy these dependancies for +anyone who is reasonably familiar with perl modules, this is not a +huge advantage that Date::Manip has. + +On the other hand, simpler perl modules tend to be faster than Date::Manip, +and modules written in C are significantly faster than their perl +counterparts (at least if they're done right). The TimeDate and +Time-modules modules are written in perl, but are much simpler (and +hence, faster) than Date::Manip. The Date::Calc module is written in C +and is a good module for doing many date calculations much faster than +Date::Manip. Between these three, most of your common date operations +can be done. + +Date::Manip is certainly the most powerful of the Date modules. To the +best of my knowledge, it will do everything that any other date module will +do (not just the ones I listed above), and there are a number of features +that Date::Manip has that none of the other modules have. Date::Manip is +the "Swiss Army Knife" of Date modules. I'm trying to build a library +which can do _EVERY_ conceivable date/time manipulation that you'll run +into in everyday life. + +Although I am working on making Date::Manip faster, it will never be as +fast as other modules. And before anyone asks, Date::Manip will never +be translated to C (at least by me). I write C because I have to. I +write perl because I like to. Date::Manip is something I do because it +interests me, not something I'm paid for. + +Date::Manip is also big. The last time I looked, it's one of the largest +CPAN modules there is. If you ignore modules like Tk, LWP, etc. which are +actually packages of modules, it may be the largest. It's true that +Date::Manip will do almost every date operation you could imagine... but +you rarely need all that power. I'm working on reducing the footprint of +Date::Manip, but even at it's slimmest, it'll outweigh the other modules by +a good bit. + +If you are going to be using the module in cases where performance is an +important factor (started up in a CGI program being run by your web server +5,000 times a second), you should check out one of the other Date or Time +modules in CPAN. If you're only doing fairly simple date operations +(parsing common date formats, finding the difference between two dates, +etc.), the other modules will almost certainly suffice. If you're doing +one operation very repetitively (parsing 10,000 dates from a database), you +are probably better off writing your own functions (perhaps bypassing all +date modules entirely) designed specifically for your needs. + +On the other hand, if you want one solution for all your date needs, don't +need peak speed, or are trying to do more exotic date operations, +Date::Manip is for you. Operations on things like business dates, foreign +language dates, holidays and other recurring events, etc. are available +more-or-less exclusively in Date::Manip. + +=head1 ROUTINES + +=over 4 + +=item ParseDate + + $date = ParseDate(\@args); + $date = ParseDate($string); + $date = ParseDate(\$string); + +This takes an array or a string containing a date and parses it. When the +date is included as an array (for example, the arguments to a program) the +array should contain a valid date in the first one or more elements +(elements after a valid date are ignored). Elements containing a valid +date are shifted from the array. The largest possible number of elements +which can be correctly interpreted as a valid date are always used. If a +string is entered rather than an array, that string is tested for a valid +date. The string is unmodified, even if passed in by reference. + +The real work is done in the ParseDateString routine. + +The ParseDate routine is primarily used to handle command line arguments. +If you have a command where you want to enter a date as a command line +argument, you can use Date::Manip to make something like the following +work: + + mycommand -date Dec 10 1997 -arg -arg2 + +No more reading man pages to find out what date format is required in a +man page. + +Historical note: this is originally why the Date::Manip routines were +written (though long before they were released as the Date::Manip module). +I was using a bunch of programs (primarily batch queue managers) where +dates and times were entered as command line options and I was getting +highly annoyed at the many different (but not compatible) ways that they +had to be entered. Date::Manip originally consisted of basically 1 routine +which I could pass "@ARGV" to and have it remove a date from the beginning. + +=item ParseDateString + + $date = ParseDateString($string); + +This routine is called by ParseDate, but it may also be called directly +to save some time (a negligable amount). + +NOTE: One of the most frequently asked questions that I have gotten +is how to parse seconds since the epoch. ParseDateString cannot simply +parse a number as the seconds since the epoch (it conflicts with some +ISO-8601 date formats). There are two ways to get this information. +First, you can do the following: + + $secs = ... # seconds since Jan 1, 1970 00:00:00 GMT + $date = &DateCalc("Jan 1, 1970 00:00:00 GMT",$secs); + +Second, you can call it directly as: + + $date = &ParseDateString("epoch $secs"); + +To go backwards, just use the "%s" format of UnixDate: + + $secs = &UnixDate($date,"%s"); + +A full date actually includes 2 parts: date and time. A time must include +hours and minutes and can optionally include seconds, fractional seconds, +an am/pm type string, and a timezone. For example: + + [at] HH:MN [Zone] + [at] HH:MN [am] [Zone] + [at] HH:MN:SS [am] [Zone] + [at] HH:MN:SS.SSSS [am] [Zone] + [at] HH am [Zone] + +Hours can be written using 1 or 2 digits, but the single digit form may +only be used when no ambiguity is introduced (i.e. when it is not +immediately preceded by a digit). + +A time is usually entered in 24 hour mode, but 12 hour mode can be used +as well if AM/PM are entered (AM can be entered as AM or A.M. or other +variations depending on the language). + +Fractional seconds are also supported in parsing but the fractional part is +discarded (with NO rounding ocurring). + +Timezones always appear immediately after the time. A number of different +forms are supported (see the section TIMEZONEs below). + +Incidentally, the time is removed from the date before the date is parsed, +so the time may appear before or after the date, or between any two parts +of the date. + +Valid date formats include the ISO 8601 formats: + + YYYYMMDDHHMNSSF... + YYYYMMDDHHMNSS + YYYYMMDDHHMN + YYYYMMDDHH + YY-MMDDHHMNSSF... + YY-MMDDHHMNSS + YY-MMDDHHMN + YY-MMDDHH + YYYYMMDD + YYYYMM + YYYY + YY-MMDD + YY-MM + YY + YYYYwWWD ex. 1965-W02-2 + YYwWWD + YYYYDOY ex. 1965-045 + YYDOY + +In the above list, YYYY and YY signify 4 or 2 digit years, MM, DD, HH, MN, SS +refer to two digit month, day, hour, minute, and second respectively. F... +refers to fractional seconds (any number of digits) which will be ignored. +The last 4 formats can be explained by example: 1965-w02-2 refers to Tuesday +(day 2) of the 2nd week of 1965. 1965-045 refers to the 45th day of 1965. + +In all cases, parts of the date may be separated by dashes "-". If this is +done, 1 or 2 digit forms of MM, DD, etc. may be used. All dashes are +optional except for those given in the table above (which MUST be included +for that format to be correctly parsed). So 19980820, 1998-0820, +1998-08-20, 1998-8-20, and 199808-20 are all equivalent, but that date may +NOT be written as 980820 (it must be written as 98-0820). + +NOTE: Even though not allowed in the standard, the timezone for an ISO-8601 +date is flexible and may be any of the timezones understood by Date::Manip. + +Additional date formats are available which may or may not be common including: + + MM/DD ** + MM/DD/YY ** + MM/DD/YYYY ** + + mmmDD DDmmm mmmYYYY/DD mmmYYYY + mmmDD/YY DDmmmYY DD/YYmmm YYYYmmmDD YYYYmmm + mmmDDYYYY DDmmmYYYY DDYYYYmmm YYYY/DDmmm + +Where mmm refers to the name of a month. All parts of the date can be +separated by valid separators (space, "/", or "."). The separator "-" may +be used as long as it doesn't conflict with an ISO 8601 format, but this +is discouraged since it is easy to overlook conflicts. For example, the +format MM/DD/YY is just fine, but MM-DD-YY does not work since it conflicts +with YY-MM-DD. To be safe, if "-" is used as a separator in a non-ISO +format, they should be turned into "/" before calling the Date::Manip +routines. As with ISO 8601 formats, all separators are optional except for +those given as a "/" in the list above. + +** Note that with these formats, Americans tend to write month first, but +many other countries tend to write day first. The latter behavior can be +obtained by setting the config variable DateFormat to something other than +"US" (see CUSTOMIZING DATE::MANIP below). + +Date separators are treated very flexibly (they are converted to spaces), +so the following dates are all equivalent: + + 12/10/1965 + 12-10 / 1965 + 12 // 10 -. 1965 + +In some cases, this may actually be TOO flexible, but no attempt is made to +trap this. + +Years can be entered as 2 or 4 digits, days and months as 1 or 2 digits. +Both days and months must include 2 digits whenever they are immediately +adjacent to another numeric part of the date or time. Date separators +are required if single digit forms of DD or MM are used. If separators +are not used, the date will either be unparsable or will get parsed +incorrectly. + +Miscellaneous other allowed formats are: + which dofw in mmm in YY "first sunday in june 1996 at 14:00" ** + dofw week num YY "sunday week 22 1995" ** + which dofw YY "22nd sunday at noon" ** + dofw which week YY "sunday 22nd week in 1996" ** + next/last dofw "next friday at noon" + next/last week/month "next month" + in num days/weeks/months "in 3 weeks at 12:00" + num days/weeks/months later "3 weeks later" + num days/weeks/months ago "3 weeks ago" + dofw in num week "Friday in 2 weeks" + in num weeks dofw "in 2 weeks on friday" + dofw num week ago "Friday 2 weeks ago" + num week ago dofw "2 weeks ago friday" + last day in mmm in YY "last day of October" + dofw "Friday" (Friday of current week) + Nth "12th", "1st" (day of current month) + epoch SECS seconds since the epoch (negative values + are supported) + +** Note that the formats "sunday week 22" and "22nd sunday" give very +different bahaviors. "sunday week 22" returns the sunday of the 22nd week +of the year based on how week 1 is defined. ISO 8601 defines week one to +contain Jan 4, so "sunday week 1" might be the first or second sunday of +the current year, or the last sunday of the previous year. "22nd sunday" +gives the actual 22nd time sunday occurs in a given year, regardless of the +definition of a week. + +Note that certain words such as "in", "at", "of", etc. which commonly appear +in a date or time are ignored. Also, the year is always optional. + +In addition, the following strings are recognized: + today (exactly now OR today at a given time if a time is specified) + now (synonym for today) + yesterday (exactly 24 hours ago unless a time is specified) + tomorrow (exactly 24 hours from now unless a time is specifed) + noon (12:00:00) + midnight (00:00:00) +Other languages have similar (and in some cases additional) strings. + +Some things to note: + +All strings are case insensitive. "December" and "DEceMBer" both work. + +When a part of the date is not given, defaults are used: year defaults +to current year; hours, minutes, seconds to 00. + +The year may be entered as 2 or 4 digits. If entered as 2 digits, it will +be converted to a 4 digit year. There are several ways to do this based on +the value of the YYtoYYYY variable (described below). The default behavior +it to force the 2 digit year to be in the 100 year period CurrYear-89 to +CurrYear+10. So in 1996, the range is [1907 to 2006], and the 2 digit year +05 would refer to 2005 but 07 would refer to 1907. See CUSTOMIZING +DATE::MANIP below for information on YYtoYYYY for other methods. + +Dates are always checked to make sure they are valid. + +In all of the formats, the day of week ("Friday") can be entered anywhere +in the date and it will be checked for accuracy. In other words, + "Tue Jul 16 1996 13:17:00" +will work but + "Jul 16 1996 Wednesday 13:17:00" +will not (because Jul 16, 1996 is Tuesday, not Wednesday). Note that +depending on where the weekday comes, it may give unexpected results when +used in array context (with ParseDate). For example, the date +("Jun","25","Sun","1990") would return June 25 of the current year since +Jun 25, 1990 is not Sunday. + +The times "12:00 am", "12:00 pm", and "midnight" are not well defined. For +good or bad, I use the following convention in Date::Manip: + midnight = 12:00am = 00:00:00 + noon = 12:00pm = 12:00:00 +and the day goes from 00:00:00 to 23:59:59. In other words, midnight is the +beginning of a day rather than the end of one. The time 24:00:00 is also +allowed (though it is automatically transformed to 00:00:00 of the following +day). + +The format of the date returned is YYYYMMDDHH:MM:SS. The advantage of this +time format is that two times can be compared using simple string comparisons +to find out which is later. Also, it is readily understood by a human. +Alternate forms can be used if that is more convenient. See Date_Init below +and the config variable Internal. + +NOTE: The format for the date is going to change at some point in the future +to YYYYMMDDHH:MN:SS+HHMN*FLAGS. In order to maintain compatibility, you +should use UnixDate to extract information from a date, and Date_Cmp to compare +two dates. The simple string comparison will only work for dates in the same +timezone. + +=item UnixDate + + @date = UnixDate($date,@format); + $date = UnixDate($date,@format); + +This takes a date and a list of strings containing formats roughly +identical to the format strings used by the UNIX date(1) command. Each +format is parsed and an array of strings corresponding to each format is +returned. + +$date may be any string that can be parsed by ParseDateString. + +The format options are: + + Year + %y year - 00 to 99 + %Y year - 0001 to 9999 + %G year - 0001 to 9999 (see below) + %L year - 0001 to 9999 (see below) + Month, Week + %m month of year - 01 to 12 + %f month of year - " 1" to "12" + %b,%h month abbreviation - Jan to Dec + %B month name - January to December + %U week of year, Sunday + as first day of week - 01 to 53 + %W week of year, Monday + as first day of week - 01 to 53 + Day + %j day of the year - 001 to 366 + %d day of month - 01 to 31 + + %e day of month - " 1" to "31" + %v weekday abbreviation - " S"," M"," T"," W","Th"," F","Sa" + %a weekday abbreviation - Sun to Sat + %A weekday name - Sunday to Saturday + %w day of week - 1 (Monday) to 7 (Sunday) + %E day of month with suffix - 1st, 2nd, 3rd... + Hour + %H hour - 00 to 23 + %k hour - " 0" to "23" + %i hour - " 1" to "12" + %I hour - 01 to 12 + %p AM or PM + Minute, Second, Timezone + %M minute - 00 to 59 + %S second - 00 to 59 + %s seconds from 1/1/1970 GMT- negative if before 1/1/1970 + %o seconds from Jan 1, 1970 + in the current time zone + %Z timezone - "EDT" + %z timezone as GMT offset - "+0100" + Date, Time + %c %a %b %e %H:%M:%S %Y - Fri Apr 28 17:23:15 1995 + %C,%u %a %b %e %H:%M:%S %z %Y - Fri Apr 28 17:25:57 EDT 1995 + %g %a, %d %b %Y %H:%M:%S %z - Fri, 28 Apr 1995 17:23:15 EDT + %D,%x %m/%d/%y - 04/28/95 + %l date in ls(1) format + %b %e $H:$M - Apr 28 17:23 (if within 6 months) + %b %e %Y - Apr 28 1993 (otherwise) + %r %I:%M:%S %p - 05:39:55 PM + %R %H:%M - 17:40 + %T,%X %H:%M:%S - 17:40:58 + %V %m%d%H%M%y - 0428174095 + %Q %Y%m%d - 19961025 + %q %Y%m%d%H%M%S - 19961025174058 + %P %Y%m%d%H%M%S - 1996102517:40:58 + %F %A, %B %e, %Y - Sunday, January 1, 1996 + %J %G-W%W-%w - 1997-W02-2 + %K %Y-%j - 1997-045 + Other formats + %n insert a newline character + %t insert a tab character + %% insert a `%' character + %+ insert a `+' character + The following formats are currently unused but may be used in the future: + NO 1234567890 !@#$^&*()_|-=\`[];',./~{}:<>? + They currently insert the character following the %, but may (and probably + will) change in the future as new formats are added. + +If a lone percent is the final character in a format, it is ignored. + +Note that the ls format (%l) applies to date within the past OR future 6 +months! + +The %U, %W, %L, and %G formats are used to support the ISO-8601 format: +YYYY-wWW-D. In this format, a date is written as a year, the week of +the year, and the day of the week. Technically, the week may be considered +to start on any day of the week, but Sunday and Monday are the two most +common choices, so both are supported. + +The %U and %W formats return a week-of-year number from 01 to 53, and +%L and %G return a 4-digit year corresponding to the week. Most of the +time, the %L and %G formats returns the same value as the %Y format, +but there is a problem with days occuring in the first or last week of +the year. + +The ISO-8601 representation of Jan 1, 1993 written in the YYYY-wWWW-D +format is actually 1992-W53-5. In other words, Jan 1 is treates as being +in the last week of the preceding year. Depending on the year, days in +the first week of a year may belong to the previous year, and days in the +final week of a year may belong to the next year. + +The %L and %U formats contains the year and week-of-year values treating +weeks as starting on Sunday. The %G and %W formats are the year and +week-of-year values treating weeks as starting on Monday. + +%J returns the full ISO-8601 format (%G-W%W-%w). + +The formats used in this routine were originally based on date.pl (version +3.2) by Terry McGonigal, as well as a couple taken from different versions +of the Solaris date(1) command. Also, several have been added which are +unique to Date::Manip. + +=item ParseDateDelta + + $delta = ParseDateDelta(\@args); + $delta = ParseDateDelta($string); + $delta = ParseDateDelta(\$string); + +This takes an array and shifts a valid delta date (an amount of time) +from the array. Recognized deltas are of the form: + +Yy +Mm +Ww +Dd +Hh +MNmn +Ss + examples: + +4 hours +3mn -2second + + 4 hr 3 minutes -2 + 4 hour + 3 min -2 s + +Y:+M:+W:+D:+H:+MN:+S + examples: + 0:0:0:0:4:3:-2 + +4:3:-2 + mixed format + examples: + 4 hour 3:-2 + +A field in the format +Yy is a sign, a number, and a string specifying +the type of field. The sign is "+", "-", or absent (defaults to the +next larger element). The valid strings specifying the field type +are: + y: y, yr, year, years + m: m, mon, month, months + w: w, wk, ws, wks, week, weeks + d: d, day, days + h: h, hr, hour, hours + mn: mn, min, minute, minutes + s: s, sec, second, seconds + +Also, the "s" string may be omitted. The sign, number, and string may +all be separated from each other by any number of whitespaces. + +In the date, all fields must be given in the order: Y M W D H MN S. Any +number of them may be omitted provided the rest remain in the correct +order. In the 2nd (colon) format, from 2 to 7 of the fields may be given. +For example +D:+H:+MN:+S may be given to specify only four of the fields. +In any case, both the MN and S field may be present. No spaces may be +present in the colon format. + +Deltas may also be given as a combination of the two formats. For example, +the following is valid: +Yy +D:+H:+MN:+S. Again, all fields must be given +in the correct order. + +The word "in" may be given (prepended in English) to the delta ("in 5 years") +and the word "ago" may be given (appended in English) ("6 months ago"). The +"in" is completely ignored. The "ago" has the affect of reversing all signs +that appear in front of the components of the delta. I.e. "-12 yr 6 mon ago" +is identical to "+12yr +6mon" (don't forget that there is an implied minus +sign in front of the 6 because when no sign is explicitly given, it carries +the previously entered sign). + +One thing is worth noting. The year/month and day/hour/min/sec parts are +returned in a "normalized" form. That is, the signs are adjusted so as to +be all positive or all negative. For example, "+ 2 day - 2hour" does not +return "0:0:0:2:-2:0:0". It returns "+0:0:0:1:22:0:0" (1 day 22 hours +which is equivalent). I find (and I think most others agree) that this is +a more useful form. + +Since the year/month and day/hour/min/sec parts must be normalized +separately there is the possibility that the sign of the two parts will be +different. So, the delta "+ 2years -10 months - 2 days + 2 hours" produces +the delta "+1:2:-0:1:22:0:0". + +It is possible to include a sign for all elements that is output. See the +configuration variable DeltaSigns below. + +NOTE: The internal format of the delta changed in version 5.30 from +Y:M:D:H:MN:S to Y:M:W:D:H:MN:S . Also, it is going to change again at some +point in the future to Y:M:W:D:H:MN:S*FLAGS . Use the routine Delta_Format +to extract information rather than parsing it yourself. + +=item Delta_Format + + @str = Delta_Format($delta,$dec,@format); + $str = Delta_Format($delta,$dec,@format); + +This is similar to the UnixDate routine except that it extracts information +from a delta. Unlike the UnixDate routine, most of the formats are 2 +characters instead of 1. + +Formats currently understood are: + + %Xv : the value of the field named X + %Xd : the value of the field X, and all smaller fields, expressed in + units of X + %Xh : the value of field X, and all larger fields, expressed in units + of X + %Xt : the value of all fields expressed in units of X + + X is one of y,M,w,d,h,m,s (case sensitive). + + %% : returns a "%" + +NOTE: Delta_Format only understands "exact" relationships, so for any delta +that has a month component, there can be no mixing of the Y/M and +W/D/H/MN/S segments. In other words, the delta 1:6:1:1:1:1:1 has a month +component, so asking for the total number of years (using the %yd format) +will return 1.5 (which is what 1 year 6 months is). For deltas which have +NO month component, the relationship between years and days is known +(365.25 is used) and all formats work as expected (except that formats with +X equal to "M" are not allowed). + +So, the format "%hd" means the values of H, MN, and S expressed in hours. +So for the delta "0:0:0:0:2:30:0", this format returns 2.5. Similarly, the +format "%yd" means the value (in years) of both the Y and M fields, or, +if the month component is 0, it uses Y, W, D, H, MN, S. + +The format "%hh" returns the value of W, D, and H expressed in hours if +the month component is non-zero, or Y, W, D, H if the month component is 0. + +If $dec is non-zero, the %Xd and %Xt values are formatted to contain $dec +decimal places. + +=item ParseRecur + + $recur = ParseRecur($string [,$base,$date0,$date1,$flags]); + @dates = ParseRecur($string [,$base,$date0,$date1,$flags]); + +A recurrence refers to a recurring event. A fully specified recurrence +requires (in most cases) 4 items: a recur description (describing the +frequency of the event), a base date (a date when the event occurred and +which other occurrences are based on), and a start and end date. There may +be one or more flags included which modify the behavior of the recur +description. The fully specified recurrence is written as: + + recur*flags*base*date0*date1 + +Here, base, date0, and date1 are any strings (which must not contain any +asterixes) which can be parsed by ParseDate. flags is a comma separated +list of flags (described below), and recur is a string describing a +recurring event. + +If called in scalar context, it returns a string containing a fully +specified recurrence (or as much of it as can be determined with +unspecified fields left blank). In list context, it returns a list of all +dates referred to by a recurrence if enough information is given in the +recurrence. All dates returned are in the range: + + date0 <= date < date1 + +The argument $string can contain any of the parts of a full recurrence. +For example: + + recur + recur*flags + recur**base*date0*date1 + +The only part which is required is the recur description. Any values +contained in $string are overridden or modified by values passed in as +parameters to ParseRecur. + +A recur description is a string of the format Y:M:W:D:H:MN:S . Exactly one +of the colons may optionally be replaced by an asterisk, or an asterisk may +be prepended to the string. + +Any value "N" to the left of the asterisk refers to the "Nth" one. Any +value to the right of the asterisk refers to a value as it appears on a +calendar/clock. Values to the right can be listed a single values, ranges +(2 numbers separated by a dash "-"), or a comma separated list of values +or ranges. In a few cases, negative values are appropriate. + +This is best illustrated by example. + + 0:0:2:1:0:0:0 every 2 weeks and 1 day + 0:0:0:0:5:30:0 every 5 hours and 30 minutes + 0:0:0:2*12:30:0 every 2 days at 12:30 (each day) + 3*1:0:2:12:0:0 every 3 years on Jan 2 at noon + 0:1*0:2:12,14:0:0 2nd of every month at 12:00 and 14:00 + 1:0:0*45:0:0:0 45th day of every year + 0:1*4:2:0:0:0 4th tuesday (day 2) of every month + 0:1*-1:2:0:0:0 last tuesday of every month + 0:1:0*-2:0:0:0 2nd to last day of every month + 0:0:3*2:0:0:0 every 3rd tuesday (every 3 weeks on 2nd day of week) + 1:0*12:2:0:0:0 tuesday of the 12th week of each year + *1990-1995:12:0:1:0:0:0 + Dec 1 in 1990 through 1995 + + 0:1*2:0:0:0:0 the start of the 2nd week of every month (see Note 2) + 1*1:2:0:0:0:0 the start of the 2nd week in January each year (Note 2) + +I realize that this looks a bit cryptic, but after a discussion on the +CALENDAR mailing list, it looked like there was no concise, flexible +notation for handling recurring events. ISO 8601 notations were very bulky +and lacked the flexibility I wanted. As a result, I developed this +notation (based on crontab formats, but with much more flexibility) which +fits in well with this module, and which is able to express every type of +recurring event I could think of. + +NOTE: If a recurrence has a date0 and date1 in it AND a date0 and date1 +are passed in to the function, both sets of criteria apply. If flags are +passed in, they override any flags in the recurrence UNLESS the flags +passed in start with a plus (+) character in which case they are appended +to the flags in the recurrence. + +NOTE: There is no way to express the following with a single recurrence: + + every day at 12:30 and 1:00 + +You have to use two recurrences to do this. + +NOTE: A recurrence specifying the week of a month is NOT clearly defined +in common usage. What is the 1st week in a month? The behavior (with +respect to this module) is well defined (using the FDn and FIn flags +below), but in common usage, this is so ambiguous that this form should +probably never be used. It is included here solely for the sake of +completeness. + +NOTE: Depending on whether M and W are 0 or nonzero, D means different +things. This is given in the following table. + + M W D (when right of an asterisk) refers to + - - ------------------------------------------- + 0 0 day of year (1-366) + M 0 day of month (1-31) + 0 W day of week (1-7), W refers to the week of year + M W the Wth (1-5 or -1 to -5) occurrence of Dth (1-7) day of week in month + +NOTE: Base dates are only used with some types of recurrences. For example, + + 0:0:3*2:0:0:0 every 3rd tuesday + +requires a base date. If a base date is specified which doesn't match the +criteria (for example, if a base date falling on Monday were passed in with +this recurrence), the base date is moved forward to the first relevant date. + +Other dates do not require a base date. For example: + + 0:0*3:2:0:0:0 third tuesday of every month + +A recurrence written in the above format does NOT provide default values +for base, date0, or date1. They must be specified in order to get a list +of dates. + +A base date is not used entirely. It is only used to provide the parts +necessary for the left part of a recurrence. For example, the recurrence: + + 1:3*0:4:0:0:0 every 1 year, 3 months on the 4th day of the month + +would only use the year and month of the base date. + + +There are a small handful of English strings which can be parsed in place +of a numerical recur description. These include: + + every 2nd day [in 1997] + every 2nd day in June [1997] + 2nd day of every month [in 1997] + 2nd tuesday of every month [in 1997] + last tuesday of every month [in 1997] + every tuesday [in 1997] + every 2nd tuesday [in 1997] + every 2nd tuesday in June [1997] + +Each of these set base, date0, and date1 to a default value (the current +year with Jan 1 being the base date is the default if the year and month +are missing). + +The following flags (case insensitive) are understood: + + MWn : n is 1-7. The first week of the month is the week + which contains the first occurrence of day n (1=Monday). + MW2 means that the first week contains the first Tuesday + of the month. + MDn : n is 1-7. The first week of the month contains the + actual date (1st through 7th). MD4 means that the first + week of the month contains the 4th of that month. + + PDn : n is 1-7. Means the previous day n not counting today + PTn : n is 1-7. Means the previous day n counting today + NDn : n is 1-7. Means the next day n not counting today + NTn : n is 1-7. Means the next day n counting today + + FDn : n is any number. Means step forward n days. + BDn : n is any number. Means step backward n days. + FWn : n is any number. Means step forward n workdays. + BWn : n is any number. Means step backward n workdays. + + CWD : the closest work day (using the TomorrowFirst config variable). + CWN : the closest work day (looking forward first). + CWP : the closest work day (looking backward first). + + NWD : next work day counting today + PWD : previous work day counting today + DWD : next/previous work day (TomorrowFirst config) counting today + + EASTER: select easter for this year (the M, W, D fields are ignored + in the recur). + +NOTE: only one of MWn and MDn can be set. If both are set, only the +last one is used. The default is MW7 (i.e. the first week contains +the first Sunday). + +CWD, CWN, and CWP will usually return the same value, but if you are +starting at the middle day of a 3-day weekend (for example), it will return +either the first work day of the following week, or the last work day of +the previous week depending on whether it looks forward or backward first. + +All flags are applied AFTER the recurrence dates are calculated, and they +may move a date outside of the date0 to date1 range. No check is made for +this. + +The workday flags do not act exactly the same as a business mode calculation. +For example, a date that is Saturday with a FW1 steps forward to the first +workday (i.e. Monday). + +=item Date_Cmp + + $flag = Date_Cmp($date1,$date2); + +This takes two dates and compares them. Almost all dates can be compared +using the perl "cmp" command. The only time this will not work is when +comparing dates in different timezones. This routine will take that into +account. + +NOTE: This routine currently does little more than use "cmp", but once +the internal format for storing dates is in place (where timezone information +is kept as part of the date), this routine will become more important. You +should use this routine in prepartation for that version. + +=item DateCalc + + $d = DateCalc($d1,$d2 [,\$err] [,$mode]); + +This takes two dates, deltas, or one of each and performs the appropriate +calculation with them. Dates must be a string that can be parsed by +&ParseDateString. Deltas must be a string that can be parsed by +&ParseDateDelta. Two deltas add together to form a third delta. A date +and a delta returns a 2nd date. Two dates return a delta (the difference +between the two dates). + +Note that in many cases, it is somewhat ambiguous what the delta actually +refers to. Although it is ALWAYS known how many months in a year, hours in +a day, etc., it is NOT known how many days form a month. As a result, the +part of the delta containing month/year and the part with sec/min/hr/day +must be treated separately. For example, "Mar 31, 12:00:00" plus a delta +of 1month 2days would yield "May 2 12:00:00". The year/month is first +handled while keeping the same date. Mar 31 plus one month is Apr 31 (but +since Apr only has 30 days, it becomes Apr 30). Apr 30 + 2 days is May 2. +As a result, in the case where two dates are entered, the resulting delta +can take on two different forms. By default ($mode=0), an absolutely +correct delta (ignoring daylight savings time) is returned in days, hours, +minutes, and seconds. + +If $mode is 1, the math is done using an approximate mode where a delta is +returned using years and months as well. The year and month part is +calculated first followed by the rest. For example, the two dates "Mar 12 +1995" and "Apr 13 1995" would have an exact delta of "31 days" but in the +approximate mode, it would be returned as "1 month 1 day". Also, "Mar 31" +and "Apr 30" would have deltas of "30 days" or "1 month" (since Apr 31 +doesn't exist, it drops down to Apr 30). Approximate mode is a more human +way of looking at things (you'd say 1 month and 2 days more often then 33 +days), but it is less meaningful in terms of absolute time. In approximate +mode $d1 and $d2 must be dates. If either or both is a delta, the +calculation is done in exact mode. + +If $mode is 2, a business mode is used. That is, the calculation is done +using business days, ignoring holidays, weekends, etc. In order to +correctly use this mode, a config file must exist which contains the +section defining holidays (see documentation on the config file below). +The config file can also define the work week and the hours of the work +day, so it is possible to have different config files for different +businesses. + +For example, if a config file defines the workday as 08:00 to 18:00, a +work week consisting of Mon-Sat, and the standard (American) holidays, then +from Tuesday at 12:00 to the following Monday at 14:00 is 5 days and 2 +hours. If the "end" of the day is reached in a calculation, it +automatically switches to the next day. So, Tuesday at 12:00 plus 6 hours +is Wednesday at 08:00 (provided Wed is not a holiday). Also, a date that +is not during a workday automatically becomes the start of the next +workday. So, Sunday 12:00 and Monday at 03:00 both automatically becomes +Monday at 08:00 (provided Monday is not a holiday). In business mode, any +combination of date and delta may be entered, but a delta should not +contain a year or month field (weeks are fine though). + +See below for some additional comments about business mode calculations. + +Note that a business week is treated the same as an exact week (i.e. from +Tuesday to Tuesday, regardless of holidays). Because this means that the +relationship between days and weeks is NOT unambiguous, when a delta is +produced from two dates, it will be in terms of d/h/mn/s (i.e. no week +field). + +If $mode is 3 (which only applies when two dates are passed in), an exact +business mode is used. In this case, it returns a delta as an exact number +of business days/hours/etc. between the two. Weeks, months, and years are +ignored. + +Any other non-nil value of $mode is treated as $mode=1 (approximate mode). + +The mode can be automatically set in the dates/deltas passed by including a +key word somewhere in it. For example, in English, if the word +"approximately" is found in either of the date/delta arguments, approximate +mode is forced. Likewise, if the word "business" or "exactly" appears, +business/exact mode is forced (and $mode is ignored). So, the two +following are equivalent: + + $date = DateCalc("today","+ 2 business days",\$err); + $date = DateCalc("today","+ 2 days",\$err,2); + +Note that if the keyword method is used instead of passing in $mode, it is +important that the keyword actually appear in the argument passed in to +DateCalc. The following will NOT work: + + $delta = ParseDateDelta("+ 2 business days"); + $today = ParseDate("today"); + $date = DateCalc($today,$delta,\$err); + +because the mode keyword is removed from a date/delta by the parse routines, +and the mode is reset each time a parse routine is called. Since DateCalc +parses both of its arguments, whatever mode was previously set is ignored. + +If \$err is passed in, it is set to: + 1 is returned if $d1 is not a delta or date + 2 is returned if $d2 is not a delta or date + 3 is returned if the date is outside the years 1000 to 9999 +This argument is optional, but if included, it must come before $mode. + +Nothing is returned if an error occurs. + +When a delta is returned, the signs such that it is strictly positive or +strictly negative ("1 day - 2 hours" would never be returned for example). +The only time when this cannot be enforced is when two deltas with a +year/month component are entered. In this case, only the signs on the +day/hour/min/sec part are standardized. + +=item Date_SetTime + + $date = Date_SetTime($date,$hr,$min,$sec); + $date = Date_SetTime($date,$time); + +This takes a date (any string that may be parsed by ParseDateString) and +sets the time in that date. For example, one way to get the time for 7:30 +tomorrow would be to use the lines: + + $date = ParseDate("tomorrow"); + $date = Date_SetTime($date,"7:30"); + +Note that in this routine (as well as the other routines below which use +a time argument), no real parsing is done on the times. As a result, + + $date = Date_SetTime($date,"13:30"); + +works, but + + $date = Date_SetTime($date,"1:30 PM"); + +doesn't. + +=item Date_SetDateField + + $date = Date_SetDateField($date,$field,$val [,$nocheck]); + +This takes a date and sets one of it's fields to a new value. $field is +any of the strings "y", "m", "d", "h", "mn", "s" (case insensitive) and +$val is the new value. + +If $nocheck is non-zero, no check is made as to the validity of the date. + +=item Date_GetPrev + + $date = Date_GetPrev($date,$dow, $curr [,$hr,$min,$sec]); + $date = Date_GetPrev($date,$dow, $curr [,$time]); + $date = Date_GetPrev($date,undef,$curr,$hr,$min,$sec); + $date = Date_GetPrev($date,undef,$curr,$time); + +This takes a date (any string that may be parsed by ParseDateString) and finds +the previous occurrence of either a day of the week, or a certain time of day. + +If $dow is defined, the previous occurrence of the day of week is returned. +$dow may either be a string (such as "Fri" or "Friday") or a number +(between 1 and 7). The date of the previous $dow is returned. + +If $date falls on the day of week given by $dow, the date returned depends +on $curr. If $curr is 0, the date returned is a week before $date. If +$curr is 1, the date returned is the same as $date. If $curr is 2, the date +returned (including the time information) is required to be before $date. + +If a time is passed in (either as separate hours, minutes, seconds or as a +time in HH:MM:SS or HH:MM format), the time on this date is set to it. The +following examples should illustrate the use of Date_GetPrev: + + date dow curr time returns + Fri Nov 22 18:15:00 Thu any 12:30 Thu Nov 21 12:30:00 + Fri Nov 22 18:15:00 Fri 0 12:30 Fri Nov 15 12:30:00 + Fri Nov 22 18:15:00 Fri 1/2 12:30 Fri Nov 22 12:30:00 + + Fri Nov 22 18:15:00 Fri 1 18:30 Fri Nov 22 18:30:00 + Fri Nov 22 18:15:00 Fri 2 18:30 Fri Nov 15 18:30:00 + +If $dow is undefined, then a time must be entered, and the date returned is +the previous occurrence of this time. If $curr is non-zero, the current +time is returned if it matches the criteria passed in. In other words, the +time returned is the last time that a digital clock (in 24 hour mode) would +have displayed the time you passed in. If you define hours, minutes and +seconds default to 0 and you might jump back as much as an entire day. If +hours are undefined, you are looking for the last time the minutes/seconds +appeared on the digital clock, so at most, the time will jump back one hour. + + date curr hr min sec returns + Nov 22 18:15:00 0/1 18 undef undef Nov 22 18:00:00 + Nov 22 18:15:00 0/1 18 30 0 Nov 21 18:30:00 + Nov 22 18:15:00 0 18 15 undef Nov 21 18:15:00 + Nov 22 18:15:00 1 18 15 undef Nov 22 18:15:00 + Nov 22 18:15:00 0 undef 15 undef Nov 22 17:15:00 + Nov 22 18:15:00 1 undef 15 undef Nov 22 18:15:00 + +=item Date_GetNext + + $date = Date_GetNext($date,$dow, $curr [,$hr,$min,$sec]); + $date = Date_GetNext($date,$dow, $curr [,$time]); + $date = Date_GetNext($date,undef,$curr,$hr,$min,$sec); + $date = Date_GetNext($date,undef,$curr,$time); + +Similar to Date_GetPrev. + +=item Date_IsHoliday + + $name = Date_IsHoliday($date); + +This returns undef if $date is not a holiday, or a string containing the +name of the holiday otherwise. An empty string is returned for an unnamed +holiday. + +=item Events_List + + $ref = Events_List($date); + $ref = Events_List($date ,0 [,$flag]); + $ref = Events_List($date0,$date1 [,$flag]); + +This returns a list of events. Events are defined in the Events section +of the config file (discussed below). + +In the first form (a single argument), $date is any string containing a +date. A list of events active at that precise time will be returned. +The format is similar to when $flag=0, except only a single time will +be returned. + +In all other cases, a range of times will be used. If the 2nd argument +evaluates to 0, the range of times will be the 24 hour period from +midnight to midnight containing $date. Otherwise, the range is given +by the two dates. + +The value of $flag determines the format of the information that is +returned. + +With $flag=0, the events are returned as a reference to a list of the form: + + [ date, [ list_of_events ], date, [ list_of_events ], ... ] + +For example, if the following events are defined (using the syntax +discussed below in the description of the Event section of the config +file): + + 2000-01-01 ; 2000-03-21 = Winter + 2000-03-22 ; 2000-06-21 = Spring + 2000-02-01 = Event1 + 2000-05-01 = Event2 + 2000-04-01-12:00:00 = Event3 + +might result in the following output: + + &Events_List("2000-04-01") + => [ 2000040100:00:00, [ Spring ] ] + + &Events_List("2000-04-01 12:30"); + => [ 2000040112:30:00, [ Spring, Event3 ] ] + + &Events_List("2000-04-01",0); + => [ 2000040100:00:00, [ Spring ], + 2000040112:00:00, [ Spring, Event3 ], + 2000040113:00:00, [ Spring ] ] + + &Events_List("2000-03-15","2000-04-10"); + => [ 2000031500:00:00, [ Winter ], + 2000032200:00:00, [ Spring ] + 2000040112:00:00, [ Spring, Event3 ] + 2000040113:00:00, [ Spring ] ] + +Much more complicated events can be defined using recurrences. + +When $flag is non-zero, the format of the output is changed. If $flag +is 1, then a tally of the amount of time given to each event is returned. +Time for which two or more events apply is counted for both. + + &Events_List("2000-03-15","2000-04-10",1); + => { Winter => +0:0:1:0:0:0:0, + Spring => +0:0:2:5:0:0:0, + Event3 => +0:0:0:0:1:0:0 } + +When $flag is 2, a more complex tally with no event counted twice is +returned. + + &Events_List("2000-03-15","2000-04-10",2); + => { Winter => +0:0:1:0:0:0:0, + Spring => +0:0:2:4:23:0:0, + Event3+Spring => +0:0:0:0:1:0:0 } + +The hash contains one element for each combination of events. + +=item Date_DayOfWeek + + $day = Date_DayOfWeek($m,$d,$y); + +Returns the day of the week (1 for Monday, 7 for Sunday). + +All arguments must be numeric. + +=item Date_SecsSince1970 + + $secs = Date_SecsSince1970($m,$d,$y,$h,$mn,$s); + +Returns the number of seconds since Jan 1, 1970 00:00 (negative if date is +earlier). + +All arguments must be numeric. + +=item Date_SecsSince1970GMT + + $secs = Date_SecsSince1970GMT($m,$d,$y,$h,$mn,$s); + +Returns the number of seconds since Jan 1, 1970 00:00 GMT (negative if date +is earlier). If CurrTZ is "IGNORE", the number will be identical to +Date_SecsSince1970 (i.e. the date given will be treated as being in GMT). + +All arguments must be numeric. + +=item Date_DaysSince1BC + + $days = Date_DaysSince1BC($m,$d,$y); + +Returns the number of days since Dec 31, 1BC. This includes the year 0000. + +All arguments must be numeric. + +=item Date_DayOfYear + + $day = Date_DayOfYear($m,$d,$y); + +Returns the day of the year (001 to 366) + +All arguments must be numeric. + +=item Date_NthDayOfYear + + ($y,$m,$d,$h,$mn,$s) = Date_NthDayOfYear($y,$n); + +Returns the year, month, day, hour, minutes, and decimal seconds given +a floating point day of the year. + +All arguments must be numeric. $n must be greater than or equal to 1 +and less than 366 on non-leap years and 367 on leap years. + +NOTE: When $n is a decimal number, the results are non-intuitive perhaps. +Day 1 is Jan 01 00:00. Day 2 is Jan 02 00:00. Intuitively, you +might think of day 1.5 as being 1.5 days after Jan 01 00:00, but this +would mean that Day 1.5 was Jan 02 12:00 (which is later than Day 2). +The best way to think of this function is a timeline starting at 1 and +ending at 366 (in a non-leap year). In terms of a delta, think of $n +as the number of days after Dec 31 00:00 of the previous year. + +=item Date_DaysInYear + + $days = Date_DaysInYear($y); + +Returns the number of days in the year (365 or 366) + +=item Date_DaysInMonth + + $days = Date_DaysInMonth($m,$y); + +Returns the number of days in the month. + +=item Date_WeekOfYear + + $wkno = Date_WeekOfYear($m,$d,$y,$first); + +Figure out week number. $first is the first day of the week which is +usually 1 (Monday) or 7 (Sunday), but could be any number between 1 and 7 +in practice. + +All arguments must be numeric. + +NOTE: This routine should only be called in rare cases. Use UnixDate with +the %W, %U, %J, %L formats instead. This routine returns a week between 0 +and 53 which must then be "fixed" to get into the ISO-8601 weeks from 1 to +53. A date which returns a week of 0 actually belongs to the last week of +the previous year. A date which returns a week of 53 may belong to the +first week of the next year. + +=item Date_LeapYear + + $flag = Date_LeapYear($y); + +Returns 1 if the argument is a leap year +Written by David Muir Sharnoff + +=item Date_DaySuffix + + $day = Date_DaySuffix($d); + +Add `st', `nd', `rd', `th' to a date (ie 1st, 22nd, 29th). Works for +international dates. + +=item Date_TimeZone + + $tz = Date_TimeZone; + +This determines and returns the local timezone. If it is unable to determine +the local timezone, the following error occurs: + + ERROR: Date::Manip unable to determine TimeZone. + +See The TIMEZONES section below for more information. + +=item Date_ConvTZ + + $date = Date_ConvTZ($date); + $date = Date_ConvTZ($date,$from); + $date = Date_ConvTZ($date,"",$to); + $date = Date_ConvTZ($date,$from,$to); + +This converts a date (which MUST be in the format returned by ParseDate) +from one timezone to another. + +If it is called with no arguments, the date is converted from the local +timezone to the timezone specified by the config variable ConvTZ (see +documentation on ConvTZ below). If ConvTZ is set to "IGNORE", no +conversion is done. + +If called with $from but no $to, the timezone is converted from the +timezone in $from to ConvTZ (of TZ if ConvTZ is not set). Again, no +conversion is done if ConvTZ is set to "IGNORE". + +If called with $to but no $from, $from defaults to ConvTZ (if set) or the +local timezone otherwise. Although this does not seem immediately obvious, +it actually makes sense. By default, all dates that are parsed are +converted to ConvTZ, so most of the dates being worked with will be stored +in that timezone. + +If Date_ConvTZ is called with both $from and $to, the date is converted +from the timezone $from to $to. + +NOTE: As in all other cases, the $date returned from Date_ConvTZ has no +timezone information included as part of it, so calling UnixDate with the +"%z" format will return the timezone that Date::Manip is working in +(usually the local timezone). + +Example: To convert 2/2/96 noon PST to CST (regardless of what timezone +you are in, do the following: + + $date = ParseDate("2/2/96 noon"); + $date = Date_ConvTZ($date,"PST","CST"); + +Both timezones MUST be in one of the formats listed below in the section +TIMEZONES. + +=item Date_Init + + &Date_Init(); + &Date_Init("VAR=VAL","VAR=VAL",...); + @list = Date_Init(); + @list = Date_Init("VAR=VAL","VAR=VAL",...); + +Normally, it is not necessary to explicitly call Date_Init. The first +time any of the other routines are called, Date_Init will be called to set +everything up. If for some reason you want to change the configuration of +Date::Manip, you can pass the appropriate string or strings into Date_Init +to reinitialize things. + +The strings to pass in are of the form "VAR=VAL". Any number may be +included and they can come in any order. VAR may be any configuration +variable. A list of all configuration variables is given in the section +CUSTOMIZING DATE::MANIP below. VAL is any allowed value for that variable. +For example, to switch from English to French and use non-US format (so +that 12/10 is Oct 12), do the following: + + &Date_Init("Language=French","DateFormat=non-US"); + +If Date_Init is called in list context, it will return a list of all +config variables and their values suitable for passing in to Date_Init +to return Date::Manip to the current state. The only possible problem is +that by default, holidays will not be erased, so you may need to prepend +the "EraseHolidays=1" element to the list. + +=item Date_IsWorkDay + + $flag = Date_IsWorkDay($date [,$flag]); + +This returns 1 if $date is a work day. If $flag is non-zero, the time is +checked to see if it falls within work hours. It returns an empty string +if $date is not valid. + +=item Date_NextWorkDay + + $date = Date_NextWorkDay($date,$off [,$time]); + +Finds the day $off work days from now. If $time is passed in, we must also +take into account the time of day. + +If $time is not passed in, day 0 is today (if today is a workday) or the +next work day if it isn't. In any case, the time of day is unaffected. + +If $time is passed in, day 0 is now (if now is part of a workday) or the +start of the very next work day. + +=item Date_PrevWorkDay + + $date = Date_PrevWorkDay($date,$off [,$time]); + +Similar to Date_NextWorkDay. + +=item Date_NearestWorkDay + + $date = Date_NearestWorkDay($date [,$tomorrowfirst]); + +This looks for the work day nearest to $date. If $date is a work day, it +is returned. Otherwise, it will look forward or backwards in time 1 day +at a time until a work day is found. If $tomorrowfirst is non-zero (or if +it is omitted and the config variable TomorrowFirst is non-zero), we look +to the future first. Otherwise, we look in the past first. In other words, +in a normal week, if $date is Wednesday, $date is returned. If $date is +Saturday, Friday is returned. If $date is Sunday, Monday is returned. If +Wednesday is a holiday, Thursday is returned if $tomorrowfirst is non-nil +or Tuesday otherwise. + +=item DateManipVersion + + $version = DateManipVersion; + +Returns the version of Date::Manip. + +=back + +=head1 TIMEZONES + +The following timezone names are currently understood (and can be used in +parsing dates). These are zones defined in RFC 822. + + Universal: GMT, UT + US zones : EST, EDT, CST, CDT, MST, MDT, PST, PDT + Military : A to Z (except J) + Other : +HHMM or -HHMM + ISO 8601 : +HH:MM, +HH, -HH:MM, -HH + +In addition, the following timezone abbreviations are also accepted. In a +few cases, the same abbreviation is used for two different timezones (for +example, NST stands for Newfoundland Standard -0330 and North Sumatra +0630). +In these cases, only 1 of the two is available. The one preceded by a "#" +sign is NOT available but is documented here for completeness. This list of +zones comes in part from the Time::Zone module by Graham Barr, David Muir +Sharnoff, and Paul Foley (with several additions by myself). + + IDLW -1200 International Date Line West + NT -1100 Nome + HST -1000 Hawaii Standard + CAT -1000 Central Alaska + AHST -1000 Alaska-Hawaii Standard + AKST -0900 Alaska Standard + YST -0900 Yukon Standard + HDT -0900 Hawaii Daylight + AKDT -0800 Alaska Daylight + YDT -0800 Yukon Daylight + PST -0800 Pacific Standard + PDT -0700 Pacific Daylight + MST -0700 Mountain Standard + MDT -0600 Mountain Daylight + CST -0600 Central Standard + CDT -0500 Central Daylight + EST -0500 Eastern Standard + ACT -0500 Brazil, Acre + SAT -0400 Chile + BOT -0400 Bolivia + EDT -0400 Eastern Daylight + AST -0400 Atlantic Standard + AMT -0400 Brazil, Amazon + ACST -0400 Brazil, Acre Daylight + #NST -0330 Newfoundland Standard nst=North Sumatra +0630 + NFT -0330 Newfoundland + #GST -0300 Greenland Standard gst=Guam Standard +1000 + #BST -0300 Brazil Standard bst=British Summer +0100 + BRST -0300 Brazil Standard + BRT -0300 Brazil Standard + AMST -0300 Brazil, Amazon Daylight + ADT -0300 Atlantic Daylight + ART -0300 Argentina + NDT -0230 Newfoundland Daylight + AT -0200 Azores + BRST -0200 Brazil Daylight (official time) + FNT -0200 Brazil, Fernando de Noronha + WAT -0100 West Africa + FNST -0100 Brazil, Fernando de Noronha Daylight + GMT +0000 Greenwich Mean + UT +0000 Universal (Coordinated) + UTC +0000 Universal (Coordinated) + WET +0000 Western European + CET +0100 Central European + FWT +0100 French Winter + MET +0100 Middle European + MEZ +0100 Middle European + MEWT +0100 Middle European Winter + SWT +0100 Swedish Winter + BST +0100 British Summer bst=Brazil standard -0300 + GB +0100 GMT with daylight savings + WEST +0000 Western European Daylight + CEST +0200 Central European Summer + EET +0200 Eastern Europe, USSR Zone 1 + FST +0200 French Summer + MEST +0200 Middle European Summer + MESZ +0200 Middle European Summer + METDST +0200 An alias for MEST used by HP-UX + SAST +0200 South African Standard + SST +0200 Swedish Summer sst=South Sumatra +0700 + EEST +0300 Eastern Europe Summer + BT +0300 Baghdad, USSR Zone 2 + MSK +0300 Moscow + EAT +0300 East Africa + IT +0330 Iran + ZP4 +0400 USSR Zone 3 + MSD +0300 Moscow Daylight + ZP5 +0500 USSR Zone 4 + IST +0530 Indian Standard + ZP6 +0600 USSR Zone 5 + NOVST +0600 Novosibirsk time zone, Russia + NST +0630 North Sumatra nst=Newfoundland Std -0330 + #SST +0700 South Sumatra, USSR Zone 6 sst=Swedish Summer +0200 + JAVT +0700 Java + CCT +0800 China Coast, USSR Zone 7 + AWST +0800 Australian Western Standard + WST +0800 West Australian Standard + PHT +0800 Asia Manila + JST +0900 Japan Standard, USSR Zone 8 + ROK +0900 Republic of Korea + ACST +0930 Australian Central Standard + CAST +0930 Central Australian Standard + AEST +1000 Australian Eastern Standard + EAST +1000 Eastern Australian Standard + GST +1000 Guam Standard, USSR Zone 9 gst=Greenland Std -0300 + ACDT +1030 Australian Central Daylight + CADT +1030 Central Australian Daylight + AEDT +1100 Australian Eastern Daylight + EADT +1100 Eastern Australian Daylight + IDLE +1200 International Date Line East + NZST +1200 New Zealand Standard + NZT +1200 New Zealand + NZDT +1300 New Zealand Daylight + +Others can be added in the future upon request. + +Date::Manip must be able to determine the timezone the user is in. It does +this by looking in the following places: + + $Date::Manip::TZ (set with Date_Init or in Manip.pm) + $ENV{TZ} + the unix `date` command (if available) + $main::TZ + /etc/TIMEZONE + /etc/timezone + +At least one of these should contain a timezone in one of the supported +forms. If none do by default, the TZ variable must be set with Date_Init. + +The timezone may be in the STD#DST format (in which case both abbreviations +must be in the table above) or any of the formats described above. The +STD#DST format is NOT available when parsing a date however. The following +forms are also available and are treated similar to the STD#DST forms: + + US/Pacific + US/Mountain + US/Central + US/Eastern + Canada/Pacific + Canada/Mountain + Canada/Central + Canada/Eastern + +=head1 BUSINESS MODE + +Anyone using business mode is going to notice a few quirks about it which +should be explained. When I designed business mode, I had in mind what UPS +tells me when they say 2 day delivery, or what the local business which +promises 1 business day turnaround really means. + +If you do a business day calculation (with the workday set to 9:00-5:00), +you will get the following: + + Saturday at noon + 1 business day = Tuesday at 9:00 + Saturday at noon - 1 business day = Friday at 9:00 + +What does this mean? + +We have a business that works 9-5 and they have a drop box so I can drop +things off over the weekend and they promise 1 business day turnaround. If +I drop something off Friday night, Saturday, or Sunday, it doesn't matter. +They're going to get started on it Monday morning. It'll be 1 business day +to finish the job, so the earliest I can expect it to be done is around +17:00 Monday or 9:00 Tuesday morning. Unfortunately, there is some +ambiguity as to what day 17:00 really falls on, similar to the ambiguity +that occurs when you ask what day midnight falls on. Although it's not the +only answer, Date::Manip treats midnight as the beginning of a day rather +than the end of one. In the same way, 17:00 is equivalent to 9:00 the next +day and any time the date calculations encounter 17:00, it automatically +switch to 9:00 the next day. Although this introduces some quirks, I think +this is justified. You just have to treat 17:00/9:00 as being ambiguous +(in the same way you treat midnight as being ambiguous). + +Equivalently, if I want a job to be finished on Saturday (despite the fact +that I cannot pick it up since the business is closed), I have to drop it +off no later than Friday at 9:00. That gives them a full business day to +finish it off. Of course, I could just as easily drop it off at 17:00 +Thursday, or any time between then and 9:00 Friday. Again, it's a matter +of treating 9:00 as ambiguous. + +So, in case the business date calculations ever produce results that you +find confusing, I believe the solution is to write a wrapper which, +whenever it sees a date with the time of exactly 9:00, it treats it +specially (depending on what you want. + +So Saturday + 1 business day = Tuesday at 9:00 (which means anything +from Monday 17:00 to Tuesday 9:00), but Monday at 9:01 + 1 business +day = Tuesday at 9:01 which is exact. + +If this is not exactly what you have in mind, don't use the DateCalc +routine. You can probably get whatever behavior you want using the +routines Date_IsWorkDay, Date_NextWorkDay, and Date_PrevWorkDay described +above. + +=head1 CUSTOMIZING DATE::MANIP + +There are a number of variables which can be used to customize the way +Date::Manip behaves. There are also several ways to set these variables. + +At the top of the Manip.pm file, there is a section which contains all +customization variables. These provide the default values. + +These can be overridden in a global config file if one is present (this +file is optional). If the GlobalCnf variable is set in the Manip.pm file, +it contains the full path to a config file. If the file exists, it's +values will override those set in the Manip.pm file. A sample config file +is included with the Date::Manip distribution. Modify it as appropriate +and copy it to some appropriate directory and set the GlobalCnf variable in +the Manip.pm file. + +Each user can have a personal config file which is of the same form as the +global config file. The variables PersonalCnf and PersonalCnfPath set the +name and search path for the personal config file. This file is also +optional. If present, it overrides any values set in the global file. + +NOTE: if you use business mode calculations, you must have a config file +(either global or personal) since this is the only place where you can +define holidays. + +Finally, any variables passed in through Date_Init override all other +values. + +A config file can be composed of several sections. The first section sets +configuration variables. Lines in this section are of the form: + + VARIABLE = VALUE + +For example, to make the default language French, include the line: + + Language = French + +Only variables described below may be used. Blank lines and lines beginning +with a pound sign (#) are ignored. All spaces are optional and strings are +case insensitive. + +A line which starts with an asterisk (*) designates a new section. For +example, the HOLIDAY section starts with a line: + + *Holiday + +The various sections are defined below. + +=head1 DATE::MANIP VARIABLES + +All Date::Manip variables which can be used are described in the following +section. + +=over 4 + +=item IgnoreGlobalCnf + +If this variable is used (any value is ignored), the global config file +is not read. It must be present in the initial call to Date_Init or the +global config file will be read. + +=item EraseHolidays + +If this variable is used (any value is ignored), the current list of +defined holidays is erased. A new set will be set the next time a +config file is read in. This can be set in either the global config file +or as a Date_Init argument (in which case holidays can be read in from +both the global and personal config files) or in the personal config file +(in which case, only holidays in the personal config file are counted). + +=item PathSep + +This is a regular expression used to separate multiple paths. For example, +on Unix, it defaults to a colon (:) so that multiple paths can be written +PATH1:PATH2 . For Win32 platforms, it defaults to a semicolon (;) so that +paths such as "c:\;d:\" will work. + +=item GlobalCnf + +This variable can be passed into Date_Init to point to a global +configuration file. The value must be the complete path to a config file. + +By default, no global config file is read. Any time a global config file +is read, the holidays are erased. + +Paths may have a tilde (~) expansion on platforms where this is supported +(currently Unix and VMS). + +=item PersonalCnf + +This variable can be passed into Date_Init or set in a global config file +to set the name of the personal configuration file. + +The default name for the config file is .DateManip.cnf on all Unix +platforms and Manip.cnf on all non-Unix platforms (because some of them +insist on 8.3 character filenames :-). + +=item PersonalCnfPath + +This is a list of paths separated by the separator specified by the PathSep +variable. These paths are each checked for the PersonalCnf config file. + +Paths may have a tilde (~) expansion on platforms where this is supported +(currently Unix and VMS). + +=item Language + +Date::Manip can be used to parse dates in many different languages. +Currently, it is configured to read the following languages (the version +in which they added is included for historical interest): + + English (default) + French (5.02) + Swedish (5.05) + German (5.31) + Dutch (5.32) aka Nederlands + Polish (5.32) + Spanish (5.33) + Portuguese (5.34) + Romanian (5.35) + Italian (5.35) + Russian (5.41) + Turkish (5.41) + Danish (5.41) + +Others can be added easily. Language is set to the language used to parse +dates. If you are interested in providing a translation for a new +language, email me (see the AUTHOR section below) and I'll send you a list +of things that I need. + +=item DateFormat + +Different countries look at the date 12/10 as Dec 10 or Oct 12. In the +United States, the first is most common, but this certainly doesn't hold +true for other countries. Setting DateFormat to "US" forces the first +behavior (Dec 10). Setting DateFormat to anything else forces the second +behavior (Oct 12). + +=item TZ + +If set, this defines the local timezone. See the TIMEZONES section above +for information on it's format. + +=item ConvTZ + +All date comparisons and calculations must be done in a single time zone in +order for them to work correctly. So, when a date is parsed, it should be +converted to a specific timezone. This allows dates to easily be compared +and manipulated as if they are all in a single timezone. + +The ConvTZ variable determines which timezone should be used to store dates +in. If it is left blank, all dates are converted to the local timezone +(see the TZ variable above). If it is set to one of the timezones listed +above, all dates are converted to this timezone. Finally, if it is set to +the string "IGNORE", all timezone information is ignored as the dates are +read in (in this case, the two dates "1/1/96 12:00 GMT" and "1/1/96 12:00 +EST" would be treated as identical). + +=item Internal + +When a date is parsed using ParseDate, that date is stored in an internal +format which is understood by the Date::Manip routines UnixDate and +DateCalc. Originally, the format used to store the date internally was: + + YYYYMMDDHH:MN:SS + +It has been suggested that I remove the colons (:) to shorten this to: + + YYYYMMDDHHMNSS + +The main advantage of this is that some databases are colon delimited which +makes storing a date from Date::Manip tedious. + +In order to maintain backwards compatibility, the Internal variable was +introduced. Set it to 0 (to use the old format) or 1 (to use the new +format). + +=item FirstDay + +It is sometimes necessary to know what day of week is regarded as first. +By default, this is set to Monday, but many countries and people will +prefer Sunday (and in a few cases, a different day may be desired). Set +the FirstDay variable to be the first day of the week (1=Monday, 7=Sunday) +Monday should be chosen to to comply with ISO 8601. + +=item WorkWeekBeg, WorkWeekEnd + +The first and last days of the work week. By default, Monday and Friday. +WorkWeekBeg must come before WorkWeekEnd numerically. The days are +numbered from 1 (Monday) to 7 (Sunday). + +There is no way to handle an odd work week of Thu to Mon for example or 10 +days on, 4 days off. + +=item WorkDay24Hr + +If this is non-nil, a work day is treated as being 24 hours long. The +WorkDayBeg and WorkDayEnd variables are ignored in this case. + +=item WorkDayBeg, WorkDayEnd + +The times when the work day starts and ends. WorkDayBeg must come before +WorkDayEnd (i.e. there is no way to handle the night shift where the work +day starts one day and ends another). Also, the workday MUST be more than +one hour long (of course, if this isn't the case, let me know... I want a +job there!). + +The time in both can be in any valid time format (including international +formats), but seconds will be ignored. + +=item TomorrowFirst + +Periodically, if a day is not a business day, we need to find the nearest +business day to it. By default, we'll look to "tomorrow" first, but if this +variable is set to 0, we'll look to "yesterday" first. This is only used in +the Date_NearestWorkDay and is easily overridden (see documentation for that +function). + +=item DeltaSigns + +Prior to Date::Manip version 5.07, a negative delta would put negative +signs in front of every component (i.e. "0:0:-1:-3:0:-4"). By default, +5.07 changes this behavior to print only 1 or two signs in front of the +year and day elements (even if these elements might be zero) and the sign +for year/month and day/hour/minute/second are the same. Setting this +variable to non-zero forces deltas to be stored with a sign in front of +every element (including elements equal to 0). + +=item Jan1Week1 + +ISO 8601 states that the first week of the year is the one which contains +Jan 4 (i.e. it is the first week in which most of the days in that week +fall in that year). This means that the first 3 days of the year may +be treated as belonging to the last week of the previous year. If this +is set to non-nil, the ISO 8601 standard will be ignored and the first +week of the year contains Jan 1. + +=item YYtoYYYY + +By default, a 2 digit year is treated as falling in the 100 year period of +CURR-89 to CURR+10. YYtoYYYY may be set to any integer N to force a 2 +digit year into the period CURR-N to CURR+(99-N). A value of 0 forces +the year to be the current year or later. A value of 99 forces the year +to be the current year or earlier. Since I do no checking on the value of +YYtoYYYY, you can actually have it any positive or negative value to force +it into any century you want. + +YYtoYYYY can also be set to "C" to force it into the current century, or +to "C##" to force it into a specific century. So, no (1998), "C" forces +2 digit years to be 1900-1999 and "C18" would force it to be 1800-1899. + +It can also be set to the form "C####" to force it into a specific 100 +year period. C1950 refers to 1950-2049. + +=item UpdateCurrTZ + +If a script is running over a long period of time, the timezone may change +during the course of running it (i.e. when daylight savings time starts or +ends). As a result, parsing dates may start putting them in the wrong time +zone. Since a lot of overhead can be saved if we don't have to check the +current timezone every time a date is parsed, by default checking is turned +off. Setting this to non-nil will force timezone checking to be done every +time a date is parsed... but this will result in a considerable performance +penalty. + +A better solution would be to restart the process on the two days per year +where the timezone switch occurs. + +=item IntCharSet + +If set to 0, use the US character set (7-bit ASCII) to return strings such +as the month name. If set to 1, use the appropriate international character +set. For example, If you want your French representation of Decemeber to +have the accent over the first "e", you'll want to set this to 1. + +=item ForceDate + +This variable can be set to a date in the format: YYYY-MM-DD-HH:MN:SS +to force the current date to be interpreted as this date. Since the current +date is used in parsing, this string will not be parsed and MUST be in the +format given above. + +=back + +=head1 HOLIDAY SECTION + +The holiday section of the config file is used to define holidays. Each +line is of the form: + + DATE = HOLIDAY + +HOLIDAY is the name of the holiday (or it can be blank in which case the +day will still be treated as a holiday... for example the day after +Thanksgiving or Christmas is often a work holiday though neither are +named). + +DATE is a string which can be parsed to give a valid date in any year. It +can be of the form + + Date + Date + Delta + Date - Delta + Recur + +A valid holiday section would be: + + *Holiday + + 1/1 = New Year's Day + third Monday in Feb = Presidents' Day + fourth Thu in Nov = Thanksgiving + + # The Friday after Thanksgiving is an unnamed holiday most places + fourth Thu in Nov + 1 day = + + 1*0:0:0:0:0:0*EASTER = Easter + 1*11:0:11:0:0:0*CWD = Veteran's Day (observed) + 1*0:0:0:0:0:0*EASTER,PD5 = Good Friday + +In a Date + Delta or Date - Delta string, you can use business mode by +including the appropriate string (see documentation on DateCalc) in the +Date or Delta. So (in English), the first workday before Christmas could +be defined as: + + 12/25 - 1 business day = + +The date's may optionally contain the year. For example, the dates + + 1/1 + 1/1/1999 + +refers to Jan 1 in any year or in only 1999 respectively. For dates that +refer to any year, the date must be written such that by simply appending +the year (separated by spaces) it can be correctly interpreted. This +will work for everything except ISO 8601 dates, so ISO 8601 dates may +not be used in this case. + +In cases where you are interested in business type calculations, you'll +want to define most holidays using recurrences, since they can define +when a holiday is celebrated in the financial world. For example, +Christmas chould be defined as: + + 1*12:0:24:0:0:0*FW1 = Christmas + +NOTE: It was pointed out to me that using a similar type recurrence to +define New Years does not work. The recurrence: + + 1*12:0:31:0:0:0*FW1 + +fails (worse, it goes into an infinite loop). The problem is that each +holiday definition is applied to a specific year and it expects to find +the holiday for that year. When this recurrence is applied to the year +1995, it returns the holiday for 1996 and fails. + +Use the recurrence: + + 1*1:0:1:0:0:0*NWD + +instead. + +If you wanted to define both Christmas and Boxing days (Boxing is the +day after Christmas, and is celebrated in some parts of the world), you +could do it in one of the following ways: + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:25:0:0:0*FW1 = Boxing + + 1*12:0:24:0:0:0*FW1 = Christmas + 01*12:0:24:0:0:0*FW1 = Boxing + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:25:0:0:0*FW1,a = Boxing + +The following examples will NOT work: + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:24:0:0:0*FW2 = Boxing + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:24:0:0:0*FW1 = Boxing + +The reasoning behind all this is as follows: + +Holidays go into affect the minute they are parsed. So, in the case of: + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:24:0:0:0*FW2 = Boxing + +the minute the first line is parsed, Christmas is defined as a holiday. +The second line then steps forward 2 work days (skipping Christmas since +that's no longer a work day) and define the work day two days after +Christmas, NOT the day after Christmas. + +An good alternative would appear to be: + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:24:0:0:0*FW1 = Boxing + +This unfortunately fails because the recurrences are currently stored in a +hash. Since these two recurrences are identical, they fail (the first one +is overwritten by the second and in essense, Christmas is never defined). + +To fix this, make them unique with either a fake flag (which is ignored): + + 1*12:0:24:0:0:0*FW1,a = Boxing + +or adding an innocuous 0 somewhere: + + 01*12:0:24:0:0:0*FW1 = Boxing + +The other good alternative would be to make two completely different +recurrences such as: + + 1*12:0:24:0:0:0*FW1 = Christmas + 1*12:0:25:0:0:0*FW1 = Boxing + +At times, you may want to switch back and forth between two holiday files. +This can be done by calling the following: + + &Date_Init("EraseHolidays=1","PersonalCnf=FILE1"); + ... + &Date_Init("EraseHolidays=1","PersonalCnf=FILE2"); + ... + +=head1 EVENTS SECTION + +The Events section of the config file is similar to the Holiday section. +It is used to name certain days or times, but there are a few important +differences: + +=over 4 + +=item Events can be assigned to any time and duration + +All holidays are exactly 1 day long. They are assigned to a period +of time from midnight to midnight. + +Events can be based at any time of the day, and may be of any duration. + +=item Events don't affect business mode calculations + +Unlike holidays, events are completely ignored when doing business +mode calculations. + +=back + +Whereas holidays were added with business mode math in mind, events +were added with calendar and scheduling applications in mind. + +Every line in the events section is of the form: + + EVENT = NAME + +where NAME is the name of the event, and EVENT defines when it occurs +and it's duration. An EVENT can be defined in the following ways: + + Date + Date* + Recur [NYI] + Recur* [NYI] + + Date ; Date + Date ; Delta + Recur ; Delta [NYI] + + Date ; Delta ; Delta [NYI] + Recur ; Delta ; Delta [NYI] + +Here, Date* refers to a string containing a Date with NO TIME fields +(Jan 12, 1/1/2000, 2010-01-01) while Date does contain time fields. +Similarily, Recur* stands for a recurrence with the time fields all +equal to 0) while Recur stands for a recurrence with at least one +non-zero time field. + +Both Date* and Recur* refer to an event very similar to a holiday which +goes from midnight to midnight. + +Date and Recur refer to events which occur at the time given and with +a duration of 1 hour. + +Events given by "Date ; Date", "Date ; Delta", and "Recur ; Delta" +contain both the starting date and either ending date or duration. + +Events given as three elements "Date ; Delta ; Delta" or "Recur ; Delta ; +Delta" take a date and add both deltas to it to give the starting and +ending time of the event. The order and sign of the deltas is +unimportant (and both can be the same sign to give a range of times +which does not contain the base date). + +Items marked with [NYI] are not yet implemented but will be by the +time this is released. + +=head1 BACKWARDS INCOMPATIBILITIES + +For the most part, Date::Manip has remained backward compatible at every +release. There have been a few minor incompatibilities introduced at +various stages. Major differences are marked with bullets. + +=over 4 + +=item VERSION 5.41 + +=item Changed path separator for VMS + +Since ":" is used in some VMS paths, it should not have been used as +the path separator. It has been changed to a newline ("\n") character. + +=item Delta_Format behavior changed + +The entire delta is exact if no month component is present (previously, +no year or month component could be present). + +=item VERSION 5.38 + +=item Removed Date_DaysSince999 + +The Date_DaysSince999 function (deprecated in 5.35) has been removed. + +=item VERSION 5.35 + +=over 4 + +=item Deprected Date_DaysSince999 + +In fixing support for the years 0000-0999, I rewrote Date_DaysSince999 to +be Date_DaysSince1BC. The Date_DaysSince999 function will be removed. + +=item * Added PathSep variable + +In order to better support Win32 platforms, I added the PathSep config +variable. This will allow the use of paths such as "c:\date" on Win32 +platforms. Old config files on Win32 platforms (which were not working +correctly in many cases) may not work if they contain path information to +the personal config file. + +=back + +=item VERSION 5.34 + +=over 4 + +=item * All Date::Manip variables are no longer accessible + +Previously, Date::Manip variables were declared using a full package name. +Now, they are declared with the my() function. This means that internal +variables are no longer accessible outside of the module. + +=item Week interpretation in business mode deltas + +A business mode delta containing a week value used to be treated as 7 days. +A much more likely interpretation of a week is Monday to Monday, regardless +of holidays, so this is now the behavior. + +=item %z UnixDate format + +The %z UnixDate format used to return the Timezone abbreviation. It now +returns it as a GMT offset (i.e. -0500). %Z still returns the Timezone +abbreviation. + +=item Formats "22nd sunday" returns the intuitive value + +The date "22nd sunday" used to return the Sunday of the 22nd week of the +year (which could be the 21st, 22nd, or 23rd Sunday of the year depending +on how weeks were defined). Now, it returns the 22nd Sunday of the year +regardless. + +=item Separator in DD/YYmmm and mmmDD/YY formats no longer optional + +Previously, the date "Dec1065" would return Dec 10, 1965. After adding +the YYYYmmm and mmmYYYY formats, this was no longer possible. The separator +between DD and YY is no longer optional, so + + Dec1065 returns December 1, 1065 + Dec10/65 returns December 10, 1965 + +=item * Date_Cmp added + +This is not a backwards incompatibility... but is added to help prepare for +a future incompatibility. In one of the next versions of Date::Manip, the +internal format of the date will change to include timezone information. +All date comparisons should be made using Date_Cmp (which currently does +nothing more than call the perl "cmp" command, but which will important +when comparing dates that include the timezone). + +=back + +=item VERSION 5.32 + +=over 4 + +=item Date_Init arguments + +The old style Date_Init arguments that were deprecated in version 5.07 +have been removed. + +=item * DateManip.cnf change + +Changed .DateManip.cnf to Manip.cnf (to get rid of problems on OS's +that insist on 8.3 filenames) for all non-Unix platforms (Wintel, VMS, +Mac). For all Unix platforms, it's still .DateManip.cnf . It will only +look in the user's home directory on VMS and Unix. + +=back + +=item VERSION 5.30 + +=over 4 + +=item * Delta format changed + +A week field has been added to the internal format of the delta. It now +reads "Y:M:W:D:H:MN:S" instead of "Y:M:D:H:MN:S". + +=back + +=item VERSION 5.21 + +=over 4 + +=item Long running processes may give incorrect timezone + +A process that runs during a timezone change (Daylight Saving Time +specifically) may report the wrong timezone. See the UpdateCurrTZ variable +for more information. + +=item UnixDate "%J", "%W", and "%U" formats fixed + +The %J, %W, and %U will no longer report a week 0 or a week 53 if it should +really be week 1 of the following year. They now report the correct week +number according to ISO 8601. + +=back + +=item VERSION 5.20 + +=over 4 + +=item * ParseDate formats removed (ISO 8601 compatibility) + +Full support for ISO 8601 formats was added. As a result, some formats +which previously worked may no longer be parsed since they conflict with an +ISO 8601 format. These include MM-DD-YY (conflicts with YY-MM-DD) and +YYMMDD (conflicts with YYYYMM). MM/DD/YY still works, so the first form +can be kept easily by changing "-" to "/". YYMMDD can be changed to +YY-MM-DD before being parsed. Whenever parsing dates using dashes as +separators, they will be treated as ISO 8601 dates. You can get around +this by converting all dashes to slashes. + +=item * Week day numbering + +The day numbering was changed from 0-6 (sun-sat) to 1-7 (mon-sun) to be +ISO 8601 compatible. Weeks start on Monday (though this can be overridden +using the FirstDay config variable) and the 1st week of the year contains +Jan 4 (though it can be forced to contain Jan 1 with the Jan1Week1 config +variable). + +=back + +=item VERSION 5.07 + +=over 4 + +=item UnixDate "%s" format + +Used to return the number of seconds since 1/1/1970 in the current +timezone. It now returns the number of seconds since 1/1/1970 GMT. +The "%o" format was added which returns what "%s" previously did. + +=item Internal format of delta + +The format for the deltas returned by ParseDateDelta changed. Previously, +each element of a delta had a sign attached to it (+1:+2:+3:+4:+5:+6). The +new format removes all unnecessary signs by default (+1:2:3:4:5:6). Also, +because of the way deltas are normalized (see documentation on +ParseDateDelta), at most two signs are included. For backwards +compatibility, the config variable DeltaSigns was added. If set to 1, all +deltas include all 6 signs. + +=item Date_Init arguments + +The format of the Date_Init calling arguments changed. The +old method + + &Date_Init($language,$format,$tz,$convtz); + +is still supported , but this support will likely disappear in the future. +Use the new calling format instead: + + &Date_Init("var=val","var=val",...); + +NOTE: The old format is no longer supported as of version 5.32 . + +=back + +=back + +=head1 KNOWN PROBLEMS + +The following are not bugs in Date::Manip, but they may give some people +problems. + +=over 4 + +=item Unable to determine TimeZone + +Perhaps the most common problem occurs when you get the error: + + Error: Date::Manip unable to determine TimeZone. + +Date::Manip tries hard to determine the local timezone, but on some +machines, it cannot do this (especially non-unix systems). To fix this, +just set the TZ variable, either at the top of the Manip.pm file,, in the +DateManip.cnf file, or in a call to Date_Init. I suggest using the form +"EST5EDT" so you don't have to change it every 6 months when going to or +from daylight savings time. + +Windows NT does not seem to set the TimeZone by default. From the +Perl-Win32-Users mailing list: + + > How do I get the TimeZone on my NT? + > + > $time_zone = $ENV{'TZ'}; + > + You have to set the variable before, WinNT doesn't set it by + default. Open the properties of "My Computer" and set a SYSTEM + variable TZ to your timezone. Jenda@Krynicky.cz + +This might help out some NT users. + +A minor (false) assumption that some users might make is that since +Date::Manip passed all of it's tests at install time, this should not occur +and are surprised when it does. + +Some of the tests are timezone dependent. Since the tests all include +input and expected output, I needed to know in advance what timezone they +would be run in. So, the tests all explicitly set the timezone using the +TZ configuration variable passed into Date_Init. Since this overrides any +other method of determining the timezone, Date::Manip uses this and doesn't +have to look elsewhere for the timezone. + +When running outside the tests, Date::Manip has to rely on it's other +methods for determining the timezone. + +=item Complaining about getpwnam/getpwuid + +Another problem is when running on Micro$oft OS'es. I have added many +tests to catch them, but they still slip through occasionally. If any ever +complain about getpwnam/getpwuid, simply add one of the lines: + + $ENV{OS} = Windows_NT + $ENV{OS} = Windows_95 + +to your script before + + use Date::Manip + +=item Date::Manip is slow + +The reasons for this are covered in the SHOULD I USE DATE::MANIP section +above. + +Some things that will definitely help: + +Version 5.21 does run noticeably faster than earlier versions due to +rethinking some of the initialization, so at the very least, make sure you +are running this version or later. + +ISO-8601 dates are parsed first and fastest. Use them whenever possible. + +Avoid parsing dates that are referenced against the current time (in 2 +days, today at noon, etc.). These take a lot longer to parse. + + Example: parsing 1065 dates with version 5.11 took 48.6 seconds, 36.2 + seconds with version 5.21, and parsing 1065 ISO-8601 dates with version + 5.21 took 29.1 seconds (these were run on a slow, overloaded computer with + little memory... but the ratios should be reliable on a faster computer). + +Business date calculations are extremely slow. You should consider +alternatives if possible (i.e. doing the calculation in exact mode and then +multiplying by 5/7). There will be an approximate business mode in one of +the next versions which will be much faster (though less accurate) which +will do something like this. Whenever possible, use this mode. And who +needs a business date more accurate than "6 to 8 weeks" anyway huh :-) + +Never call Date_Init more than once. Unless you're doing something very +strange, there should never be a reason to anyway. + +=item Sorting Problems + +If you use Date::Manip to sort a number of dates, you must call Date_Init +either explicitly, or by way of some other Date::Manip routine before it +is used in the sort. For example, the following code fails: + + use Date::Manip; + # &Date_Init; + sub sortDate { + my($date1, $date2); + $date1 = &ParseDate($a); + $date2 = &ParseDate($b); + return (&Date_Cmp($date1,$date2)); + } + @dates = ("Fri 16 Aug 96", + "Mon 19 Aug 96", + "Thu 15 Aug 96"); + @i=sort sortDate @dates; + +but if you uncomment the Date_Init line, it works. The reason for this is +that the first time you call Date_Init, it initializes a number of items +used by Date::Manip. Some of these have to be sorted (regular expressions +sorted by length to ensure the longest match). It turns out that perl +has a bug in it which does not allow a sort within a sort. At some point, +this should be fixed, but for now, the best thing to do is to call Date_Init +explicitly. The bug exists in all versions up to 5.005 (I haven't +tested 5.6.0 yet). + +NOTE: This is an EXTREMELY inefficient way to sort data. Instead, you +should parse the dates with ParseDate, sort them using a normal string +comparison, and then convert them back to the format desired using +UnixDate. + +=item RCS Control + +If you try to put Date::Manip under RCS control, you are going to have +problems. Apparently, RCS replaces strings of the form "$Date...$" with +the current date. This form occurs all over in Date::Manip. To prevent the +RCS keyword expansion, checkout files using "co -ko". Since very few people +will ever have a desire to do this (and I don't use RCS), I have not worried +about it. + +=back + +=head1 KNOWN BUGS + +=over 4 + +=item Daylight Savings Times + +Date::Manip does not handle daylight savings time, though it does handle +timezones to a certain extent. Converting from EST to PST works fine. +Going from EST to PDT is unreliable. + +The following examples are run in the winter of the US East coast (i.e. +in the EST timezone). + + print UnixDate(ParseDate("6/1/97 noon"),"%u"),"\n"; + => Sun Jun 1 12:00:00 EST 1997 + +June 1 EST does not exist. June 1st is during EDT. It should print: + + => Sun Jun 1 00:00:00 EDT 1997 + +Even explicitly adding the timezone doesn't fix things (if anything, it +makes them worse): + + print UnixDate(ParseDate("6/1/97 noon EDT"),"%u"),"\n"; + => Sun Jun 1 11:00:00 EST 1997 + +Date::Manip converts everything to the current timezone (EST in this case). + +Related problems occur when trying to do date calculations over a timezone +change. These calculations may be off by an hour. + +Also, if you are running a script which uses Date::Manip over a period of +time which starts in one time zone and ends in another (i.e. it switches +form Daylight Savings Time to Standard Time or vice versa), many things may +be wrong (especially elapsed time). + +I hope to fix these problems in a future release so that it would convert +everything to the current zones (EST or EDT). + +=back + +=head1 BUGS AND QUESTIONS + +If you find a bug in Date::Manip, please send it directly to me (see the +AUTHOR section below) rather than posting it to one of the newsgroups. +Although I try to keep up with the comp.lang.perl.* groups, all too often I +miss news (flaky news server, articles expiring before I caught them, 1200 +articles to wade through and I missed one that I was interested in, etc.). + +When filing a bug report, please include the following information: + + o The version of Date::Manip you are using. You can get this by using + the script: + + use Date::Manip; + print &DateManipVersion(),"\n"; + + o The output from "perl -V" + +If you have a problem using Date::Manip that perhaps isn't a bug (can't +figure out the syntax, etc.), you're in the right place. Go right back to +the top of this man page and start reading. If this still doesn't answer +your question, mail me (again, please mail me rather than post to the +newsgroup). + +=head1 YEAR 2000 + +In hindsight, the fact that I've only been asked once (so far) if Date::Manip +is year 2000 compliant surprises me a bit. Still, as 2000 approaches and +this buzzword starts flying around more and more frantically, other's might +follow suit, so this section answers the question. + +Is Date::Manip year 2000 compliant? + +This question is largely meaningless. Date::Manip is basically just a +parser. You give it a date and it'll manipulate it. Date::Manip does +store the date internally as a 4 digit year, and performs all operations +using this internal representation, so I will state that Date::Manip is +CAPABLE of writing Y2K compliant code. + +But Date::Manip is simply a library. If you use it correctly, your code +can be Y2K compliant. If you don't, your code may not be Y2K compliant. + +The bottom line is this: + + Date::Manip is a library that is capable of being used to write Y2K + compliant code. It may also be used to write non-Y2K compliant code. + + If your code is NOT Y2K compliant, it is NOT due to any deficiency in + Date::Manip. Rather, it is due to poor programming on the part of the + person using Date::Manip. + +For an excellent treatment of the Y2K problem, see the article by Tom +Christiansen at: + + http://language.perl.com/news/y2k.html + +A slightly better question is "Is Perl year 2000 compliant"? This is +covered in the perl FAQ (section 4) and in the article by Tom Crhistiansen. + +The best question is "For what dates is Date::Manip useful?" It definitely +can't handle BC dates, or dates past Dec 31, 9999. So Date::Manip works +during the years 1000 to 9999. + +In practical terms however, Date::Manip deals with the Gregorian calendar, +and is therefore useful in the period that that calendar has been, or will +be, in effect. The Gregorian calendar was first adopted by the Catholic +church in 1582, but some countries were still using the Julian calendar as +late as the early part of the 20th century. Also, at some point (probably +no earlier than the year 3000 and possibly much later), the Gregorian +system is going to have to be modified slightly since the current system of +leap years is off by a few seconds a year. So... in practical terms, +Date::Manip is _probably_ useful from 1900 to 3000. + +One other note is that Date::Manip will NOT handle 3 digit years. So, if +you store the year as an offset from 1900 (which is 2 digits now, but will +become 3 digits in 2000), these will NOT be parsable by Date::Manip. + +=head1 VERSION NUMBERS + +A note about version numbers. + +Prior to version 5.00, Date::Manip was distributed as a perl4 library. +There were no numbering conventions in place, so I used a simple +MAJOR.MINOR numbering scheme. + +With version 5.00, I switched to a perl5 module and at that time switched +to the perl5 numbering convention of a major version followed by a 2 digit +minor version. + +As of 5.41/5.42, all versions released to CPAN will be even numbered. Odd +numbered will be development versions available from my web site. For +example, after 5.40 was released, I started making changes, and called +the development version 5.41. When released to CPAN, it was called 5.42. +I may add a third digit to development versions (i.e. 5.41.9) to keep +track of important changes in the development version. + +=head1 ACKNOWLEDGMENTS + +There are many people who have contributed to Date::Manip over the years +that I'd like to thank. The most important contributions have come in the +form of suggestions and bug reports by users. I have tried to include the +name of every person who first suggested each improvement or first reported +each bug. These are included in the HISTORY file in the Date::Manip +distribution in the order the changes are made. The list is simply too +long to appear here, but I appreciate their help. + +A number of people have made suggestions or reported bugs which are not +mentioned in the HISTORY file. These include suggestions which have not +been implemented and people who have made a suggestion or bug report which +has already been suggested/reported by someone else. For those who's +suggestions have not yet been implemented, they will be added to the +HISTORY file when (if) their suggestions are implemented. For everyone +else, thank you too. I'd much rather have a suggestion made twice than not +at all. + +Thanks to Alan Cezar and Greg Schiedler for paying me to implement the +Events_List routine. They gave me the idea, and were then willing to pay +me for my time to get it implemented quickly. + +I'd also like a couple of authors. Date::Manip has recently been getting +some really good press in a couple of books. Since no one's paying me to +write Date::Manip, seeing my module get a good review in a book written by +someone else really makes my day. My thanks to Nate Padwardhan and Clay +Irving (Programming with Perl Modules -- part of the O'Reilly Perl Resource +Kit); and Tom Christiansen and Nathan Torkington (The Perl Cookbook). +Also, thanks to any other authors who've written about Date::Manip who's +books I haven't seen. + +=head1 AUTHOR + +Sullivan Beck (sbeck@cpan.org) + +You can always get the newest beta version of Date::Manip (which may fix +problems in the current CPAN version... and may add others) from my home +page: + +http://www.cise.ufl.edu/~sbeck/ + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,512 @@ +# +# Module Parse::Yapp.pm. +# +# Copyright (c) 1998-2001, Francois Desarmenien, all right reserved. +# +# See the Copyright section at the end of the Parse/Yapp.pm pod section +# for usage and distribution rights. +# +# +package Parse::Yapp; + +use strict; +use vars qw($VERSION @ISA); +@ISA = qw(Parse::Yapp::Output); + +use Parse::Yapp::Output; + +# $VERSION is in Parse/Yapp/Driver.pm + + +1; + +__END__ + +=head1 NAME + +Parse::Yapp - Perl extension for generating and using LALR parsers. + +=head1 SYNOPSIS + + yapp -m MyParser grammar_file.yp + + ... + + use MyParser; + + $parser=new MyParser(); + $value=$parser->YYParse(yylex => \&lexer_sub, yyerror => \&error_sub); + + $nberr=$parser->YYNberr(); + + $parser->YYData->{DATA}= [ 'Anything', 'You Want' ]; + + $data=$parser->YYData->{DATA}[0]; + +=head1 DESCRIPTION + +Parse::Yapp (Yet Another Perl Parser compiler) is a collection of modules +that let you generate and use yacc like thread safe (reentrant) parsers with +perl object oriented interface. + +The script yapp is a front-end to the Parse::Yapp module and let you +easily create a Perl OO parser from an input grammar file. + +=head2 The Grammar file + +=over 4 + +=item C + +Through all your files, comments are either Perl style, introduced by I<#> +up to the end of line, or C style, enclosed between I and I<*/>. + + +=item C + + +Through all the grammar files, two kind of symbols may appear: +I symbols, called also I symbols, +which are the names of your rules, and I symbols, called +also I. + +Tokens are the symbols your lexer function will feed your parser with +(see below). They are of two flavours: symbolic tokens and string +literals. + +Non-terminals and symbolic tokens share the same identifier syntax: + + [A-Za-z][A-Za-z0-9_]* + +String literals are enclosed in single quotes and can contain almost +anything. They will be output to your parser file double-quoted, making +any special character as such. '"', '$' and '@' will be automatically +quoted with '\', making their writing more natural. On the other hand, +if you need a single quote inside your literal, just quote it with '\'. + +You cannot have a literal I<'error'> in your grammar as it would +confuse the driver with the I token. Use a symbolic token instead. +In case you inadvertently use it, this will produce a warning telling you +you should have written it I and will treat it as if it were the +I token, which is certainly NOT what you meant. + + +=item C + +It is very close to yacc syntax (in fact, I should compile +a clean I grammar without any modification, whereas the opposite +is not true). + +This file is divided in three sections, separated by C<%%>: + + header section + %% + rules section + %% + footer section + +=over 4 + +=item B section may optionally contain: + +=item * + +One or more code blocks enclosed inside C<%{> and C<%}> just like in +yacc. They may contain any valid Perl code and will be copied verbatim +at the very beginning of the parser module. They are not as useful as +they are in yacc, but you can use them, for example, for global variable +declarations, though you will notice later that such global variables can +be avoided to make a reentrant parser module. + +=item * + +Precedence declarations, introduced by C<%left>, C<%right> and C<%nonassoc> +specifying associativity, followed by the list of tokens or litterals +having the same precedence and associativity. +The precedence beeing the latter declared will be having the highest level. +(see the yacc or bison manuals for a full explanation of how they work, +as they are implemented exactly the same way in Parse::Yapp) + +=item * + +C<%start> followed by a rule's left hand side, declaring this rule to +be the starting rule of your grammar. The default, when C<%start> is not +used, is the first rule in your grammar section. + +=item * + +C<%token> followed by a list of symbols, forcing them to be recognized +as tokens, generating a syntax error if used in the left hand side of +a rule declaration. +Note that in Parse::Yapp, you I need to declare tokens as in yacc: any +symbol not appearing as a left hand side of a rule is considered to be +a token. +Other yacc declarations or constructs such as C<%type> and C<%union> are +parsed but (almost) ignored. + +=item * + +C<%expect> followed by a number, suppress warnings about number of Shift/Reduce +conflicts when both numbers match, a la bison. + + +=item B contains your grammar rules: + +A rule is made of a left-hand-side symbol, followed by a C<':'> and one +or more right-hand-sides separated by C<'|'> and terminated by a C<';'>: + + exp: exp '+' exp + | exp '-' exp + ; + +A right hand side may be empty: + + input: #empty + | input line + ; + +(if you have more than one empty rhs, Parse::Yapp will issue a warning, +as this is usually a mistake, and you will certainly have a reduce/reduce +conflict) + + +A rhs may be followed by an optional C<%prec> directive, followed +by a token, giving the rule an explicit precedence (see yacc manuals +for its precise meaning) and optionnal semantic action code block (see +below). + + exp: '-' exp %prec NEG { -$_[1] } + | exp '+' exp { $_[1] + $_[3] } + | NUM + ; + +Note that in Parse::Yapp, a lhs I appear more than once as +a rule name (This differs from yacc). + + +=item C + +may contain any valid Perl code and will be appended at the very end +of your parser module. Here you can write your lexer, error report +subs and anything relevant to you parser. + +=item C + +Semantic actions are run every time a I occurs in the +parsing flow and they must return a semantic value. + +They are (usually, but see below C) written at +the very end of the rhs, enclosed with C<{ }>, and are copied verbatim +to your parser file, inside of the rules table. + +Be aware that matching braces in Perl is much more difficult than +in C: inside strings they don't need to match. While in C it is +very easy to detect the beginning of a string construct, or a +single character, it is much more difficult in Perl, as there +are so many ways of writing such literals. So there is no check +for that today. If you need a brace in a double-quoted string, just +quote it (C<\{> or C<\}>). For single-quoted strings, you will need +to make a comment matching it I. +Sorry for the inconvenience. + + { + "{ My string block }". + "\{ My other string block \}". + qq/ My unmatched brace \} /. + # Force the match: { + q/ for my closing brace } / + q/ My opening brace { / + # must be closed: } + } + +All of these constructs should work. + + +In Parse::Yapp, semantic actions are called like normal Perl sub calls, +with their arguments passed in C<@_>, and their semantic value are +their return values. + +$_[1] to $_[n] are the parameters just as $1 to $n in yacc, while +$_[0] is the parser object itself. + +Having $_[0] beeing the parser object itself allows you to call +parser methods. Thats how the yacc macros are implemented: + + yyerrok is done by calling $_[0]->YYErrok + YYERROR is done by calling $_[0]->YYError + YYACCEPT is done by calling $_[0]->YYAccept + YYABORT is done by calling $_[0]->YYAbort + +All those methods explicitly return I, for convenience. + + YYRECOVERING is done by calling $_[0]->YYRecovering + +Four useful methods in error recovery sub + + $_[0]->YYCurtok + $_[0]->YYCurval + $_[0]->YYExpect + $_[0]->YYLexer + +return respectivly the current input token that made the parse fail, +its semantic value (both can be used to modify their values too, but +I ! See I section for +an example), a list which contains the tokens the parser expected when +the failure occured and a reference to the lexer routine. + +Note that if C<$_[0]-EYYCurtok> is declared as a C<%nonassoc> token, +it can be included in C<$_[0]-EYYExpect> list whenever the input +try to use it in an associative way. This is not a bug: the token +IS expected to report an error if encountered. + +To detect such a thing in your error reporting sub, the following +example should do the trick: + + grep { $_[0]->YYCurtok eq $_ } $_[0]->YYExpect + and do { + #Non-associative token used in an associative expression + }; + +Accessing semantics values on the left of your reducing rule is done +through the method + + $_[0]->YYSemval( index ) + +where index is an integer. Its value being I<1 .. n> returns the same values +than I<$_[1] .. $_[n]>, but I<-n .. 0> returns values on the left of the rule +beeing reduced (It is related to I<$-n .. $0 .. $n> in yacc, but you +cannot use I<$_[0]> or I<$_[-n]> constructs in Parse::Yapp for obvious reasons) + + +There is also a provision for a user data area in the parser object, +accessed by the method: + + $_[0]->YYData + +which returns a reference to an anonymous hash, which let you have +all of your parsing data held inside the object (see the Calc.yp +or ParseYapp.yp files in the distribution for some examples). +That's how you can make you parser module reentrant: all of your +module states and variables are held inside the parser object. + +Note: unfortunatly, method calls in Perl have a lot of overhead, + and when YYData is used, it may be called a huge number + of times. If your are not a *real* purist and efficiency + is your concern, you may access directly the user-space + in the object: $parser->{USER} wich is a reference to an + anonymous hash array, and then benchmark. + +If no action is specified for a rule, the equivalant of a default +action is run, which returns the first parameter: + + { $_[1] } + +=item C + +It is also possible to embed semantic actions inside of a rule: + + typedef: TYPE { $type = $_[1] } identlist { ... } ; + +When the Parse::Yapp's parser encounter such an embedded action, it modifies +the grammar as if you wrote (although @x-1 is not a legal lhs value): + + @x-1: /* empty */ { $type = $_[1] }; + typedef: TYPE @x-1 identlist { ... } ; + +where I is a sequential number incremented for each "in rule" action, +and I<-1> represents the "dot position" in the rule where the action arises. + +In such actions, you can use I<$_[1]..$_[n]> variables, which are the +semantic values on the left of your action. + +Be aware that the way Parse::Yapp modifies your grammar because of +I can produce, in some cases, spurious conflicts +that wouldn't happen otherwise. + +=item C + +Now that you grammar file is written, you can use yapp on it +to generate your parser module: + + yapp -v Calc.yp + +will create two files F, your parser module, and F +a verbose output of your parser rules, conflicts, warnings, states +and summary. + +What your are missing now is a lexer routine. + +=item C + +is called each time the parser need to read the next token. + +It is called with only one argument that is the parser object itself, +so you can access its methods, specially the + + $_[0]->YYData + +data area. + +It is its duty to return the next token and value to the parser. +They C be returned as a list of two variables, the first one +is the token known by the parser (symbolic or literal), the second +one beeing anything you want (usualy the content of the token, or the +literal value) from a simple scalar value to any complex reference, +as the parsing driver never use it but to call semantic actions: + + ( 'NUMBER', $num ) +or + ( '>=', '>=' ) +or + ( 'ARRAY', [ @values ] ) + +When the lexer reach the end of input, it must return the C<''> +empty token with an undef value: + + ( '', undef ) + +Note that your lexer should I return C<'error'> as token +value: for the driver, this is the error token used for error +recovery and would lead to odd reactions. + +Now that you have your lexer written, maybe you will need to output +meaningful error messages, instead of the default which is to print +'Parse error.' on STDERR. + +So you will need an Error reporting sub. + +item C + +If you want one, write it knowing that it is passed as parameter +the parser object. So you can share information whith the lexer +routine quite easily. + +You can also use the C<$_[0]-EYYErrok> method in it, which will +resume parsing as if no error occured. Of course, since the invalid +token is still invalid, you're supposed to fix the problem by +yourself. + +The method C<$_[0]-EYYLexer> may help you, as it returns a reference +to the lexer routine, and can be called as + + ($tok,$val)=&{$_[0]->Lexer} + +to get the next token and semantic value from the input stream. To +make them current for the parser, use: + + ($_[0]->YYCurtok, $_[0]->YYCurval) = ($tok, $val) + +and know what you're doing... + +=item C + +Now you've got everything to do the parsing. + +First, use the parser module: + + use Calc; + +Then create the parser object: + + $parser=new Calc; + +Now, call the YYParse method, telling it where to find the lexer +and error report subs: + + $result=$parser->YYParse(yylex => \&Lexer, + yyerror => \&ErrorReport); + +(assuming Lexer and ErrorReport subs have been written in your current +package) + +The order in which parameters appear is unimportant. + +Et voila. + +The YYParse method will do the parse, then return the last semantic +value returned, or undef if error recovery cannot recover. + +If you need to be sure the parse has been successful (in case your +last returned semantic value I undef) make a call to: + + $parser->YYNberr() + +which returns the total number of time the error reporting sub has been called. + +=item C + +in Parse::Yapp is implemented the same way it is in yacc. + +=item C + +To debug your parser, you can call the YYParse method with a debug parameter: + + $parser->YYParse( ... , yydebug => value, ... ) + +where value is a bitfield, each bit representing a specific debug output: + + Bit Value Outputs + 0x01 Token reading (useful for Lexer debugging) + 0x02 States information + 0x04 Driver actions (shifts, reduces, accept...) + 0x08 Parse Stack dump + 0x10 Error Recovery tracing + +To have a full debugging ouput, use + + debug => 0x1F + +Debugging output is sent to STDERR, and be aware that it can produce +C outputs. + +=item C + +By default, the parser modules generated will need the Parse::Yapp +module installed on the system to run. They use the Parse::Yapp::Driver +which can be safely shared between parsers in the same script. + +In the case you'd prefer to have a standalone module generated, use +the C<-s> switch with yapp: this will automagically copy the driver +code into your module so you can use/distribute it without the need +of the Parse::Yapp module, making it really a C. + +If you do so, please remember to include Parse::Yapp's copyright notice +in your main module copyright, so others can know about Parse::Yapp module. + +=item C + +by default will be included in the generated parser module, which will help +to find the guilty line in your source file in case of a syntax error. +You can disable this feature by compiling your grammar with yapp using +the C<-n> switch. + +=back + +=head1 BUGS AND SUGGESTIONS + +If you find bugs, think of anything that could improve Parse::Yapp +or have any questions related to it, feel free to contact the author. + +=head1 AUTHOR + +Francois Desarmenien + +=head1 SEE ALSO + +yapp(1) perl(1) yacc(1) bison(1). + +=head1 COPYRIGHT + +The Parse::Yapp module and its related modules and shell scripts are copyright +(c) 1998-2001 Francois Desarmenien, France. All rights reserved. + +You may use and distribute them under the terms of either +the GNU General Public License or the Artistic License, +as specified in the Perl README file. + +If you use the "standalone parser" option so people don't need to install +Parse::Yapp on their systems in order to run you software, this copyright +noticed should be included in your software copyright too, and the copyright +notice in the embedded driver should be left untouched. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Driver.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Driver.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,471 @@ +# +# Module Parse::Yapp::Driver +# +# This module is part of the Parse::Yapp package available on your +# nearest CPAN +# +# Any use of this module in a standalone parser make the included +# text under the same copyright as the Parse::Yapp module itself. +# +# This notice should remain unchanged. +# +# (c) Copyright 1998-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# + +package Parse::Yapp::Driver; + +require 5.004; + +use strict; + +use vars qw ( $VERSION $COMPATIBLE $FILENAME ); + +$VERSION = '1.05'; +$COMPATIBLE = '0.07'; +$FILENAME=__FILE__; + +use Carp; + +#Known parameters, all starting with YY (leading YY will be discarded) +my(%params)=(YYLEX => 'CODE', 'YYERROR' => 'CODE', YYVERSION => '', + YYRULES => 'ARRAY', YYSTATES => 'ARRAY', YYDEBUG => ''); +#Mandatory parameters +my(@params)=('LEX','RULES','STATES'); + +sub new { + my($class)=shift; + my($errst,$nberr,$token,$value,$check,$dotpos); + my($self)={ ERROR => \&_Error, + ERRST => \$errst, + NBERR => \$nberr, + TOKEN => \$token, + VALUE => \$value, + DOTPOS => \$dotpos, + STACK => [], + DEBUG => 0, + CHECK => \$check }; + + _CheckParams( [], \%params, \@_, $self ); + + exists($$self{VERSION}) + and $$self{VERSION} < $COMPATIBLE + and croak "Yapp driver version $VERSION ". + "incompatible with version $$self{VERSION}:\n". + "Please recompile parser module."; + + ref($class) + and $class=ref($class); + + bless($self,$class); +} + +sub YYParse { + my($self)=shift; + my($retval); + + _CheckParams( \@params, \%params, \@_, $self ); + + if($$self{DEBUG}) { + _DBLoad(); + $retval = eval '$self->_DBParse()';#Do not create stab entry on compile + $@ and die $@; + } + else { + $retval = $self->_Parse(); + } + $retval +} + +sub YYData { + my($self)=shift; + + exists($$self{USER}) + or $$self{USER}={}; + + $$self{USER}; + +} + +sub YYErrok { + my($self)=shift; + + ${$$self{ERRST}}=0; + undef; +} + +sub YYNberr { + my($self)=shift; + + ${$$self{NBERR}}; +} + +sub YYRecovering { + my($self)=shift; + + ${$$self{ERRST}} != 0; +} + +sub YYAbort { + my($self)=shift; + + ${$$self{CHECK}}='ABORT'; + undef; +} + +sub YYAccept { + my($self)=shift; + + ${$$self{CHECK}}='ACCEPT'; + undef; +} + +sub YYError { + my($self)=shift; + + ${$$self{CHECK}}='ERROR'; + undef; +} + +sub YYSemval { + my($self)=shift; + my($index)= $_[0] - ${$$self{DOTPOS}} - 1; + + $index < 0 + and -$index <= @{$$self{STACK}} + and return $$self{STACK}[$index][1]; + + undef; #Invalid index +} + +sub YYCurtok { + my($self)=shift; + + @_ + and ${$$self{TOKEN}}=$_[0]; + ${$$self{TOKEN}}; +} + +sub YYCurval { + my($self)=shift; + + @_ + and ${$$self{VALUE}}=$_[0]; + ${$$self{VALUE}}; +} + +sub YYExpect { + my($self)=shift; + + keys %{$self->{STATES}[$self->{STACK}[-1][0]]{ACTIONS}} +} + +sub YYLexer { + my($self)=shift; + + $$self{LEX}; +} + + +################# +# Private stuff # +################# + + +sub _CheckParams { + my($mandatory,$checklist,$inarray,$outhash)=@_; + my($prm,$value); + my($prmlst)={}; + + while(($prm,$value)=splice(@$inarray,0,2)) { + $prm=uc($prm); + exists($$checklist{$prm}) + or croak("Unknow parameter '$prm'"); + ref($value) eq $$checklist{$prm} + or croak("Invalid value for parameter '$prm'"); + $prm=unpack('@2A*',$prm); + $$outhash{$prm}=$value; + } + for (@$mandatory) { + exists($$outhash{$_}) + or croak("Missing mandatory parameter '".lc($_)."'"); + } +} + +sub _Error { + print "Parse error.\n"; +} + +sub _DBLoad { + { + no strict 'refs'; + + exists(${__PACKAGE__.'::'}{_DBParse})#Already loaded ? + and return; + } + my($fname)=__FILE__; + my(@drv); + open(DRV,"<$fname") or die "Report this as a BUG: Cannot open $fname"; + while() { + /^\s*sub\s+_Parse\s*{\s*$/ .. /^\s*}\s*#\s*_Parse\s*$/ + and do { + s/^#DBG>//; + push(@drv,$_); + } + } + close(DRV); + + $drv[0]=~s/_P/_DBP/; + eval join('',@drv); +} + +#Note that for loading debugging version of the driver, +#this file will be parsed from 'sub _Parse' up to '}#_Parse' inclusive. +#So, DO NOT remove comment at end of sub !!! +sub _Parse { + my($self)=shift; + + my($rules,$states,$lex,$error) + = @$self{ 'RULES', 'STATES', 'LEX', 'ERROR' }; + my($errstatus,$nberror,$token,$value,$stack,$check,$dotpos) + = @$self{ 'ERRST', 'NBERR', 'TOKEN', 'VALUE', 'STACK', 'CHECK', 'DOTPOS' }; + +#DBG> my($debug)=$$self{DEBUG}; +#DBG> my($dbgerror)=0; + +#DBG> my($ShowCurToken) = sub { +#DBG> my($tok)='>'; +#DBG> for (split('',$$token)) { +#DBG> $tok.= (ord($_) < 32 or ord($_) > 126) +#DBG> ? sprintf('<%02X>',ord($_)) +#DBG> : $_; +#DBG> } +#DBG> $tok.='<'; +#DBG> }; + + $$errstatus=0; + $$nberror=0; + ($$token,$$value)=(undef,undef); + @$stack=( [ 0, undef ] ); + $$check=''; + + while(1) { + my($actions,$act,$stateno); + + $stateno=$$stack[-1][0]; + $actions=$$states[$stateno]; + +#DBG> print STDERR ('-' x 40),"\n"; +#DBG> $debug & 0x2 +#DBG> and print STDERR "In state $stateno:\n"; +#DBG> $debug & 0x08 +#DBG> and print STDERR "Stack:[". +#DBG> join(',',map { $$_[0] } @$stack). +#DBG> "]\n"; + + + if (exists($$actions{ACTIONS})) { + + defined($$token) + or do { + ($$token,$$value)=&$lex($self); +#DBG> $debug & 0x01 +#DBG> and print STDERR "Need token. Got ".&$ShowCurToken."\n"; + }; + + $act= exists($$actions{ACTIONS}{$$token}) + ? $$actions{ACTIONS}{$$token} + : exists($$actions{DEFAULT}) + ? $$actions{DEFAULT} + : undef; + } + else { + $act=$$actions{DEFAULT}; +#DBG> $debug & 0x01 +#DBG> and print STDERR "Don't need token.\n"; + } + + defined($act) + and do { + + $act > 0 + and do { #shift + +#DBG> $debug & 0x04 +#DBG> and print STDERR "Shift and go to state $act.\n"; + + $$errstatus + and do { + --$$errstatus; + +#DBG> $debug & 0x10 +#DBG> and $dbgerror +#DBG> and $$errstatus == 0 +#DBG> and do { +#DBG> print STDERR "**End of Error recovery.\n"; +#DBG> $dbgerror=0; +#DBG> }; + }; + + + push(@$stack,[ $act, $$value ]); + + $$token ne '' #Don't eat the eof + and $$token=$$value=undef; + next; + }; + + #reduce + my($lhs,$len,$code,@sempar,$semval); + ($lhs,$len,$code)=@{$$rules[-$act]}; + +#DBG> $debug & 0x04 +#DBG> and $act +#DBG> and print STDERR "Reduce using rule ".-$act." ($lhs,$len): "; + + $act + or $self->YYAccept(); + + $$dotpos=$len; + + unpack('A1',$lhs) eq '@' #In line rule + and do { + $lhs =~ /^\@[0-9]+\-([0-9]+)$/ + or die "In line rule name '$lhs' ill formed: ". + "report it as a BUG.\n"; + $$dotpos = $1; + }; + + @sempar = $$dotpos + ? map { $$_[1] } @$stack[ -$$dotpos .. -1 ] + : (); + + $semval = $code ? &$code( $self, @sempar ) + : @sempar ? $sempar[0] : undef; + + splice(@$stack,-$len,$len); + + $$check eq 'ACCEPT' + and do { + +#DBG> $debug & 0x04 +#DBG> and print STDERR "Accept.\n"; + + return($semval); + }; + + $$check eq 'ABORT' + and do { + +#DBG> $debug & 0x04 +#DBG> and print STDERR "Abort.\n"; + + return(undef); + + }; + +#DBG> $debug & 0x04 +#DBG> and print STDERR "Back to state $$stack[-1][0], then "; + + $$check eq 'ERROR' + or do { +#DBG> $debug & 0x04 +#DBG> and print STDERR +#DBG> "go to state $$states[$$stack[-1][0]]{GOTOS}{$lhs}.\n"; + +#DBG> $debug & 0x10 +#DBG> and $dbgerror +#DBG> and $$errstatus == 0 +#DBG> and do { +#DBG> print STDERR "**End of Error recovery.\n"; +#DBG> $dbgerror=0; +#DBG> }; + + push(@$stack, + [ $$states[$$stack[-1][0]]{GOTOS}{$lhs}, $semval ]); + $$check=''; + next; + }; + +#DBG> $debug & 0x04 +#DBG> and print STDERR "Forced Error recovery.\n"; + + $$check=''; + + }; + + #Error + $$errstatus + or do { + + $$errstatus = 1; + &$error($self); + $$errstatus # if 0, then YYErrok has been called + or next; # so continue parsing + +#DBG> $debug & 0x10 +#DBG> and do { +#DBG> print STDERR "**Entering Error recovery.\n"; +#DBG> ++$dbgerror; +#DBG> }; + + ++$$nberror; + + }; + + $$errstatus == 3 #The next token is not valid: discard it + and do { + $$token eq '' # End of input: no hope + and do { +#DBG> $debug & 0x10 +#DBG> and print STDERR "**At eof: aborting.\n"; + return(undef); + }; + +#DBG> $debug & 0x10 +#DBG> and print STDERR "**Dicard invalid token ".&$ShowCurToken.".\n"; + + $$token=$$value=undef; + }; + + $$errstatus=3; + + while( @$stack + and ( not exists($$states[$$stack[-1][0]]{ACTIONS}) + or not exists($$states[$$stack[-1][0]]{ACTIONS}{error}) + or $$states[$$stack[-1][0]]{ACTIONS}{error} <= 0)) { + +#DBG> $debug & 0x10 +#DBG> and print STDERR "**Pop state $$stack[-1][0].\n"; + + pop(@$stack); + } + + @$stack + or do { + +#DBG> $debug & 0x10 +#DBG> and print STDERR "**No state left on stack: aborting.\n"; + + return(undef); + }; + + #shift the error token + +#DBG> $debug & 0x10 +#DBG> and print STDERR "**Shift \$error token and go to state ". +#DBG> $$states[$$stack[-1][0]]{ACTIONS}{error}. +#DBG> ".\n"; + + push(@$stack, [ $$states[$$stack[-1][0]]{ACTIONS}{error}, undef ]); + + } + + #never reached + croak("Error in driver logic. Please, report it as a BUG"); + +}#_Parse +#DO NOT remove comment + +1; + diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Grammar.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Grammar.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,381 @@ +# +# Module Parse::Yapp::Grammar +# +# (c) Copyright 1998-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# +package Parse::Yapp::Grammar; +@ISA=qw( Parse::Yapp::Options ); + +require 5.004; + +use Carp; +use strict; +use Parse::Yapp::Options; +use Parse::Yapp::Parse; + +############### +# Constructor # +############### +sub new { + my($class)=shift; + my($values); + + my($self)=$class->SUPER::new(@_); + + my($parser)=new Parse::Yapp::Parse; + + defined($self->Option('input')) + or croak "No input grammar"; + + $values = $parser->Parse($self->Option('input')); + + undef($parser); + + $$self{GRAMMAR}=_ReduceGrammar($values); + + ref($class) + and $class=ref($class); + + bless($self, $class); +} + +########### +# Methods # +########### +########################## +# Method To View Grammar # +########################## +sub ShowRules { + my($self)=shift; + my($rules)=$$self{GRAMMAR}{RULES}; + my($ruleno)=-1; + my($text); + + for (@$rules) { + my($lhs,$rhs)=@$_; + + $text.=++$ruleno.":\t".$lhs." -> "; + if(@$rhs) { + $text.=join(' ',map { $_ eq chr(0) ? '$end' : $_ } @$rhs); + } + else { + $text.="/* empty */"; + } + $text.="\n"; + } + $text; +} + +########################### +# Method To View Warnings # +########################### +sub Warnings { + my($self)=shift; + my($text); + my($grammar)=$$self{GRAMMAR}; + + exists($$grammar{UUTERM}) + and do { + $text="Unused terminals:\n\n"; + for (@{$$grammar{UUTERM}}) { + $text.="\t$$_[0], declared line $$_[1]\n"; + } + $text.="\n"; + }; + exists($$grammar{UUNTERM}) + and do { + $text.="Useless non-terminals:\n\n"; + for (@{$$grammar{UUNTERM}}) { + $text.="\t$$_[0], declared line $$_[1]\n"; + } + $text.="\n"; + }; + exists($$grammar{UURULES}) + and do { + $text.="Useless rules:\n\n"; + for (@{$$grammar{UURULES}}) { + $text.="\t$$_[0] -> ".join(' ',@{$$_[1]})."\n"; + } + $text.="\n"; + }; + $text; +} + +###################################### +# Method to get summary about parser # +###################################### +sub Summary { + my($self)=shift; + my($text); + + $text ="Number of rules : ". + scalar(@{$$self{GRAMMAR}{RULES}})."\n"; + $text.="Number of terminals : ". + scalar(keys(%{$$self{GRAMMAR}{TERM}}))."\n"; + $text.="Number of non-terminals : ". + scalar(keys(%{$$self{GRAMMAR}{NTERM}}))."\n"; + $text; +} + +############################### +# Method to Ouput rules table # +############################### +sub RulesTable { + my($self)=shift; + my($inputfile)=$self->Option('inputfile'); + my($linenums)=$self->Option('linenumbers'); + my($rules)=$$self{GRAMMAR}{RULES}; + my($ruleno); + my($text); + + defined($inputfile) + or $inputfile = 'unkown'; + + $text="[\n\t"; + + $text.=join(",\n\t", + map { + my($lhs,$rhs,$code)=@$_[0,1,3]; + my($len)=scalar(@$rhs); + my($text); + + $text.="[#Rule ".$ruleno++."\n\t\t '$lhs', $len,"; + if($code) { + $text.= "\nsub". + ( $linenums + ? qq(\n#line $$code[1] "$inputfile"\n) + : " "). + "{$$code[0]}"; + } + else { + $text.=' undef'; + } + $text.="\n\t]"; + + $text; + } @$rules); + + $text.="\n]"; + + $text; +} + +################################ +# Methods to get HEAD and TAIL # +################################ +sub Head { + my($self)=shift; + my($inputfile)=$self->Option('inputfile'); + my($linenums)=$self->Option('linenumbers'); + my($text); + + $$self{GRAMMAR}{HEAD}[0] + or return ''; + + defined($inputfile) + or $inputfile = 'unkown'; + + for (@{$$self{GRAMMAR}{HEAD}}) { + $linenums + and $text.=qq(#line $$_[1] "$inputfile"\n); + $text.=$$_[0]; + } + $text +} + +sub Tail { + my($self)=shift; + my($inputfile)=$self->Option('inputfile'); + my($linenums)=$self->Option('linenumbers'); + my($text); + + $$self{GRAMMAR}{TAIL}[0] + or return ''; + + defined($inputfile) + or $inputfile = 'unkown'; + + $linenums + and $text=qq(#line $$self{GRAMMAR}{TAIL}[1] "$inputfile"\n); + $text.=$$self{GRAMMAR}{TAIL}[0]; + + $text +} + + +################# +# Private Stuff # +################# + +sub _UsefulRules { + my($rules,$nterm) = @_; + my($ufrules,$ufnterm); + my($done); + + $ufrules=pack('b'.@$rules); + $ufnterm={}; + + vec($ufrules,0,1)=1; #start rules IS always useful + + RULE: + for (1..$#$rules) { # Ignore start rule + for my $sym (@{$$rules[$_][1]}) { + exists($$nterm{$sym}) + and next RULE; + } + vec($ufrules,$_,1)=1; + ++$$ufnterm{$$rules[$_][0]}; + } + + do { + $done=1; + + RULE: + for (grep { vec($ufrules,$_,1) == 0 } 1..$#$rules) { + for my $sym (@{$$rules[$_][1]}) { + exists($$nterm{$sym}) + and not exists($$ufnterm{$sym}) + and next RULE; + } + vec($ufrules,$_,1)=1; + exists($$ufnterm{$$rules[$_][0]}) + or do { + $done=0; + ++$$ufnterm{$$rules[$_][0]}; + }; + } + + }until($done); + + ($ufrules,$ufnterm) + +}#_UsefulRules + +sub _Reachable { + my($rules,$nterm,$term,$ufrules,$ufnterm)=@_; + my($reachable); + my(@fifo)=( 0 ); + + $reachable={ '$start' => 1 }; #$start is always reachable + + while(@fifo) { + my($ruleno)=shift(@fifo); + + for my $sym (@{$$rules[$ruleno][1]}) { + + exists($$term{$sym}) + and do { + ++$$reachable{$sym}; + next; + }; + + ( not exists($$ufnterm{$sym}) + or exists($$reachable{$sym}) ) + and next; + + ++$$reachable{$sym}; + push(@fifo, grep { vec($ufrules,$_,1) } @{$$nterm{$sym}}); + } + } + + $reachable + +}#_Reachable + +sub _SetNullable { + my($rules,$term,$nullable) = @_; + my(@nrules); + my($done); + + RULE: + for (@$rules) { + my($lhs,$rhs)=@$_; + + exists($$nullable{$lhs}) + and next; + + for (@$rhs) { + exists($$term{$_}) + and next RULE; + } + push(@nrules,[$lhs,$rhs]); + } + + do { + $done=1; + + RULE: + for (@nrules) { + my($lhs,$rhs)=@$_; + + exists($$nullable{$lhs}) + and next; + + for (@$rhs) { + exists($$nullable{$_}) + or next RULE; + } + $done=0; + ++$$nullable{$lhs}; + } + + }until($done); +} + +sub _ReduceGrammar { + my($values)=@_; + my($ufrules,$ufnterm,$reachable); + my($grammar)={ HEAD => $values->{HEAD}, + TAIL => $values->{TAIL}, + EXPECT => $values->{EXPECT} }; + my($rules,$nterm,$term) = @$values {'RULES', 'NTERM', 'TERM'}; + + ($ufrules,$ufnterm) = _UsefulRules($rules,$nterm); + + exists($$ufnterm{$values->{START}}) + or die "*Fatal* Start symbol $values->{START} derives nothing, at eof\n"; + + $reachable = _Reachable($rules,$nterm,$term,$ufrules,$ufnterm); + + $$grammar{TERM}{chr(0)}=undef; + for my $sym (keys %$term) { + ( exists($$reachable{$sym}) + or exists($values->{PREC}{$sym}) ) + and do { + $$grammar{TERM}{$sym} + = defined($$term{$sym}[0]) ? $$term{$sym} : undef; + next; + }; + push(@{$$grammar{UUTERM}},[ $sym, $values->{SYMS}{$sym} ]); + } + + $$grammar{NTERM}{'$start'}=[]; + for my $sym (keys %$nterm) { + exists($$reachable{$sym}) + and do { + exists($values->{NULL}{$sym}) + and ++$$grammar{NULLABLE}{$sym}; + $$grammar{NTERM}{$sym}=[]; + next; + }; + push(@{$$grammar{UUNTERM}},[ $sym, $values->{SYMS}{$sym} ]); + } + + for my $ruleno (0..$#$rules) { + vec($ufrules,$ruleno,1) + and exists($$grammar{NTERM}{$$rules[$ruleno][0]}) + and do { + push(@{$$grammar{RULES}},$$rules[$ruleno]); + push(@{$$grammar{NTERM}{$$rules[$ruleno][0]}},$#{$$grammar{RULES}}); + next; + }; + push(@{$$grammar{UURULES}},[ @{$$rules[$ruleno]}[0,1] ]); + } + + _SetNullable(@$grammar{'RULES', 'TERM', 'NULLABLE'}); + + $grammar; +}#_ReduceGrammar + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Lalr.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Lalr.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,939 @@ +# +# Module Parse::Yapp::Lalr +# +# (c) Copyright 1998-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# +package Parse::Yapp::Lalr; +@ISA=qw( Parse::Yapp::Grammar ); + +require 5.004; + +use Parse::Yapp::Grammar; + +=for nobody + +Parse::Yapp::Compile Object Structure: +-------------------------------------- +{ + GRAMMAR => Parse::Yapp::Grammar, + STATES => [ { CORE => [ items... ], + ACTIONS => { term => action } + GOTOS => { nterm => stateno } + }... ] + CONFLICTS=>{ SOLVED => { stateno => [ ruleno, token, solved ] }, + FORCED => { TOTAL => [ nbsr, nbrr ], + DETAIL => { stateno => { TOTAL => [ nbsr, nbrr ] } + LIST => [ ruleno, token ] + } + } + } +} + +'items' are of form: [ ruleno, dotpos ] +'term' in ACTIONS is '' means default action +'action' may be: + undef: explicit error (nonassociativity) + 0 : accept + >0 : shift and go to state 'action' + <0 : reduce using rule -'action' +'solved' may have values of: + 'shift' if solved as Shift + 'reduce' if solved as Reduce + 'error' if solved by discarding both Shift and Reduce (nonassoc) + +SOLVED is a set of states containing Solved conflicts +FORCED are forced conflict resolutions + +nbsr and nbrr are number of shift/reduce and reduce/reduce conflicts + +TOTAL is the total number of SR/RR conflicts for the parser + +DETAIL is the detail of conflicts for each state +TOTAL is the total number of SR/RR conflicts for a state +LIST is the list of discarded reductions (for display purpose only) + + +=cut + +use strict; + +use Carp; + +############### +# Constructor # +############### +sub new { + my($class)=shift; + + ref($class) + and $class=ref($class); + + my($self)=$class->SUPER::new(@_); + $self->_Compile(); + bless($self,$class); +} +########### +# Methods # +########### + +########################### +# Method To View Warnings # +########################### +sub Warnings { + my($self)=shift; + my($text); + my($nbsr,$nbrr)=@{$$self{CONFLICTS}{FORCED}{TOTAL}}; + + $text=$self->SUPER::Warnings(); + + $nbsr != $$self{GRAMMAR}{EXPECT} + and $text.="$nbsr shift/reduce conflict".($nbsr > 1 ? "s" : ""); + + $nbrr + and do { + $nbsr + and $text.=" and "; + $text.="$nbrr reduce/reduce conflict".($nbrr > 1 ? "s" : ""); + }; + + ( $nbsr != $$self{GRAMMAR}{EXPECT} + or $nbrr) + and $text.="\n"; + + $text; +} +############################# +# Method To View DFA States # +############################# +sub ShowDfa { + my($self)=shift; + my($text); + my($grammar,$states)=($$self{GRAMMAR}, $$self{STATES}); + + for my $stateno (0..$#$states) { + my(@shifts,@reduces,@errors,$default); + + $text.="State $stateno:\n\n"; + + #Dump Kernel Items + for (sort { $$a[0] <=> $$b[0] + or $$a[1] <=> $$b[1] } @{$$states[$stateno]{'CORE'}}) { + my($ruleno,$pos)=@$_; + my($lhs,$rhs)=@{$$grammar{RULES}[$ruleno]}[0,1]; + my(@rhscopy)=@$rhs; + + $ruleno + or $rhscopy[-1] = '$end'; + + splice(@rhscopy,$pos,0,'.'); + $text.= "\t$lhs -> ".join(' ',@rhscopy)."\t(Rule $ruleno)\n"; + } + + #Prepare Actions + for (keys(%{$$states[$stateno]{ACTIONS}})) { + my($term,$action)=($_,$$states[$stateno]{ACTIONS}{$_}); + + $term eq chr(0) + and $term = '$end'; + + not defined($action) + and do { + push(@errors,$term); + next; + }; + + $action > 0 + and do { + push(@shifts,[ $term, $action ]); + next; + }; + + $action = -$action; + + $term + or do { + $default= [ '$default', $action ]; + next; + }; + + push(@reduces,[ $term, $action ]); + } + + #Dump shifts + @shifts + and do { + $text.="\n"; + for (sort { $$a[0] cmp $$b[0] } @shifts) { + my($term,$shift)=@$_; + + $text.="\t$term\tshift, and go to state $shift\n"; + } + }; + + #Dump errors + @errors + and do { + $text.="\n"; + for my $term (sort { $a cmp $b } @errors) { + $text.="\t$term\terror (nonassociative)\n"; + } + }; + + #Prepare reduces + exists($$self{CONFLICTS}{FORCED}{DETAIL}{$stateno}) + and push(@reduces,@{$$self{CONFLICTS}{FORCED}{DETAIL}{$stateno}{LIST}}); + + @reduces=sort { $$a[0] cmp $$b[0] or $$a[1] <=> $$b[1] } @reduces; + + defined($default) + and push(@reduces,$default); + + #Dump reduces + @reduces + and do { + $text.="\n"; + for (@reduces) { + my($term,$ruleno)=@$_; + my($discard); + + $ruleno < 0 + and do { + ++$discard; + $ruleno = -$ruleno; + }; + + $text.= "\t$term\t".($discard ? "[" : ""); + if($ruleno) { + $text.= "reduce using rule $ruleno ". + "($$grammar{RULES}[$ruleno][0])"; + } + else { + $text.='accept'; + } + $text.=($discard ? "]" : "")."\n"; + } + }; + + #Dump gotos + exists($$states[$stateno]{GOTOS}) + and do { + $text.= "\n"; + for (keys(%{$$states[$stateno]{GOTOS}})) { + $text.= "\t$_\tgo to state $$states[$stateno]{GOTOS}{$_}\n"; + } + }; + + $text.="\n"; + } + $text; +} + +###################################### +# Method to get summary about parser # +###################################### +sub Summary { + my($self)=shift; + my($text); + + $text=$self->SUPER::Summary(); + $text.="Number of states : ". + scalar(@{$$self{STATES}})."\n"; + $text; +} + +####################################### +# Method To Get Infos about conflicts # +####################################### +sub Conflicts { + my($self)=shift; + my($states)=$$self{STATES}; + my($conflicts)=$$self{CONFLICTS}; + my($text); + + for my $stateno ( sort { $a <=> $b } keys(%{$$conflicts{SOLVED}})) { + + for (@{$$conflicts{SOLVED}{$stateno}}) { + my($ruleno,$token,$how)=@$_; + + $token eq chr(0) + and $token = '$end'; + + $text.="Conflict in state $stateno between rule ". + "$ruleno and token $token resolved as $how.\n"; + } + }; + + for my $stateno ( sort { $a <=> $b } keys(%{$$conflicts{FORCED}{DETAIL}})) { + my($nbsr,$nbrr)=@{$$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}}; + + $text.="State $stateno contains "; + + $nbsr + and $text.="$nbsr shift/reduce conflict". + ($nbsr > 1 ? "s" : ""); + + $nbrr + and do { + $nbsr + and $text.=" and "; + + $text.="$nbrr reduce/reduce conflict". + ($nbrr > 1 ? "s" : ""); + }; + $text.="\n"; + }; + + $text; +} + +################################# +# Method to dump parsing tables # +################################# +sub DfaTable { + my($self)=shift; + my($states)=$$self{STATES}; + my($stateno); + my($text); + + $text="[\n\t{"; + + $text.=join("\n\t},\n\t{", + map { + my($state)=$_; + my($text); + + $text="#State ".$stateno++."\n\t\t"; + + ( not exists($$state{ACTIONS}{''}) + or keys(%{$$state{ACTIONS}}) > 1) + and do { + + $text.="ACTIONS => {\n\t\t\t"; + + $text.=join(",\n\t\t\t", + map { + my($term,$action)=($_,$$state{ACTIONS}{$_}); + my($text); + + if(substr($term,0,1) eq "'") { + $term=~s/([\@\$\"])/\\$1/g; + $term=~s/^'|'$/"/g; + } + else { + $term= $term eq chr(0) + ? "''" + : "'$term'"; + } + + if(defined($action)) { + $action=int($action); + } + else { + $action='undef'; + } + + "$term => $action"; + + } grep { $_ } keys(%{$$state{ACTIONS}})); + + $text.="\n\t\t}"; + }; + + exists($$state{ACTIONS}{''}) + and do { + keys(%{$$state{ACTIONS}}) > 1 + and $text.=",\n\t\t"; + + $text.="DEFAULT => $$state{ACTIONS}{''}"; + }; + + exists($$state{GOTOS}) + and do { + $text.=",\n\t\tGOTOS => {\n\t\t\t"; + $text.=join(",\n\t\t\t", + map { + my($nterm,$stateno)=($_,$$state{GOTOS}{$_}); + my($text); + + "'$nterm' => $stateno"; + + } keys(%{$$state{GOTOS}})); + $text.="\n\t\t}"; + }; + + $text; + + }@$states); + + $text.="\n\t}\n]"; + + $text; + +} + + +#################################### +# Method to build Dfa from Grammar # +#################################### +sub _Compile { + my($self)=shift; + my($grammar,$states); + + $grammar=$self->{GRAMMAR}; + + $states = _LR0($grammar); + + $self->{CONFLICTS} = _LALR($grammar,$states); + + $self->{STATES}=$states; +} + +######################### +# LR0 States Generation # +######################### +# +########################### +# General digraph routine # +########################### +sub _Digraph { + my($rel,$F)=@_; + my(%N,@S); + my($infinity)=(~(1<<31)); + my($Traverse); + + $Traverse = sub { + my($x,$d)=@_; + my($y); + + push(@S,$x); + $N{$x}=$d; + + exists($$rel{$x}) + and do { + for $y (keys(%{$$rel{$x}})) { + exists($N{$y}) + or &$Traverse($y,$d+1); + + $N{$y} < $N{$x} + and $N{$x} = $N{$y}; + + $$F{$x}|=$$F{$y}; + } + }; + + $N{$x} == $d + and do { + for(;;) { + $y=pop(@S); + $N{$y}=$infinity; + $y eq $x + and last; + $$F{$y}=$$F{$x}; + } + }; + }; + + for (keys(%$rel)) { + exists($N{$_}) + or &$Traverse($_,1); + } +} +####################### +# Generate LR0 states # +####################### +=for nobody +Formula used for closures: + + CLOSE(A) = DCLOSE(A) u U (CLOSE(B) | A close B) + +where: + + DCLOSE(A) = { [ A -> alpha ] in P } + + A close B iff [ A -> B gamma ] in P + +=cut +sub _SetClosures { + my($grammar)=@_; + my($rel,$closures); + + for my $symbol (keys(%{$$grammar{NTERM}})) { + $closures->{$symbol}=pack('b'.@{$$grammar{RULES}}); + + for my $ruleno (@{$$grammar{NTERM}{$symbol}}) { + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + vec($closures->{$symbol},$ruleno,1)=1; + + @$rhs > 0 + and exists($$grammar{NTERM}{$$rhs[0]}) + and ++$rel->{$symbol}{$$rhs[0]}; + } + } + _Digraph($rel,$closures); + + $closures +} + +sub _Closures { + my($grammar,$core,$closures)=@_; + my($ruleset)=pack('b'.@{$$grammar{RULES}}); + + for (@$core) { + my($ruleno,$pos)=@$_; + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + $pos < @$rhs + and exists($closures->{$$rhs[$pos]}) + and $ruleset|=$closures->{$$rhs[$pos]}; + } + [ @$core, map { [ $_, 0 ] } + grep { vec($ruleset,$_,1) } + 0..$#{$$grammar{RULES}} ]; +} + +sub _Transitions { + my($grammar,$cores,$closures,$states,$stateno)=@_; + my($core)=$$states[$stateno]{'CORE'}; + my(%transitions); + + for (@{_Closures($grammar,$core,$closures)}) { + my($ruleno,$pos)=@$_; + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + $pos == @$rhs + and do { + push(@{$$states[$stateno]{ACTIONS}{''}},$ruleno); + next; + }; + push(@{$transitions{$$rhs[$pos]}},[ $ruleno, $pos+1 ]); + } + + for (keys(%transitions)) { + my($symbol,$core)=($_,$transitions{$_}); + my($corekey)=join(',',map { join('.',@$_) } + sort { $$a[0] <=> $$b[0] + or $$a[1] <=> $$b[1] } + @$core); + my($tostateno); + + exists($cores->{$corekey}) + or do { + push(@$states,{ 'CORE' => $core }); + $cores->{$corekey}=$#$states; + }; + + $tostateno=$cores->{$corekey}; + push(@{$$states[$tostateno]{FROM}},$stateno); + + exists($$grammar{TERM}{$_}) + and do { + $$states[$stateno]{ACTIONS}{$_} = [ $tostateno ]; + next; + }; + $$states[$stateno]{GOTOS}{$_} = $tostateno; + } +} + +sub _LR0 { + my($grammar)=@_; + my($states) = []; + my($stateno); + my($closures); #$closures={ nterm => ruleset,... } + my($cores)={}; # { "itemlist" => stateno, ... } + # where "itemlist" has the form: + # "ruleno.pos,ruleno.pos" ordered by ruleno,pos + + $closures = _SetClosures($grammar); + push(@$states,{ 'CORE' => [ [ 0, 0 ] ] }); + for($stateno=0;$stateno<@$states;++$stateno) { + _Transitions($grammar,$cores,$closures,$states,$stateno); + } + + $states +} + +######################################################### +# Add Lookahead tokens where needed to make LALR states # +######################################################### +=for nobody + Compute First sets for non-terminal using the following formula: + + FIRST(A) = { a in T u { epsilon } | A l a } + u + U { FIRST(B) | B in V and A l B } + + where: + + A l x iff [ A -> X1 X2 .. Xn x alpha ] in P and Xi =>* epsilon, 1 <= i <= n +=cut +sub _SetFirst { + my($grammar,$termlst,$terminx)=@_; + my($rel,$first)=( {}, {} ); + + for my $symbol (keys(%{$$grammar{NTERM}})) { + $first->{$symbol}=pack('b'.@$termlst); + + RULE: + for my $ruleno (@{$$grammar{NTERM}{$symbol}}) { + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + for (@$rhs) { + exists($terminx->{$_}) + and do { + vec($first->{$symbol},$terminx->{$_},1)=1; + next RULE; + }; + ++$rel->{$symbol}{$_}; + exists($$grammar{NULLABLE}{$_}) + or next RULE; + } + vec($first->{$symbol},0,1)=1; + } + } + _Digraph($rel,$first); + + $first +} + +sub _Preds { + my($states,$stateno,$len)=@_; + my($queue, $preds); + + $len + or return [ $stateno ]; + + $queue=[ [ $stateno, $len ] ]; + while(@$queue) { + my($pred) = shift(@$queue); + my($stateno, $len) = @$pred; + + $len == 1 + and do { + push(@$preds,@{$states->[$stateno]{FROM}}); + next; + }; + + push(@$queue, map { [ $_, $len - 1 ] } + @{$states->[$stateno]{FROM}}); + } + + # Pass @$preds through a hash to ensure unicity + [ keys( %{ +{ map { ($_,1) } @$preds } } ) ]; +} + +sub _FirstSfx { + my($grammar,$firstset,$termlst,$terminx,$ruleno,$pos,$key)=@_; + my($first)=pack('b'.@$termlst); + my($rhs)=$$grammar{RULES}[$ruleno][1]; + + for (;$pos < @$rhs;++$pos) { + exists($terminx->{$$rhs[$pos]}) + and do { + vec($first,$terminx->{$$rhs[$pos]},1)=1; + return($first); + }; + $first|=$firstset->{$$rhs[$pos]}; + + vec($first,0,1) + and vec($first,0,1)=0; + + exists($$grammar{NULLABLE}{$$rhs[$pos]}) + or return($first); + + } + vec($first,0,1)=1; + $first; +} + +=for noboby + Compute Follow sets using following formula: + + FOLLOW(p,A) = READ(p,A) + u + U { FOLLOW(q,B) | (p,A) include (q,B) + + where: + + READ(p,A) = U { FIRST(beta) | [ A -> alpha A . beta ] in KERNEL(GOTO(p,A)) + } - { epsilon } + + (p,a) include (q,B) iff [ B -> alpha A . beta ] in KERNEL(GOTO(p,A), + epsilon in FIRST(beta) and + q in PRED(p,alpha) +=cut +sub _ComputeFollows { + my($grammar,$states,$termlst)=@_; + my($firstset,$terminx); + my($inconsistent, $rel, $follows, $sfx)= ( {}, {}, {}, {} ); + + %$terminx= map { ($termlst->[$_],$_) } 0..$#$termlst; + + $firstset=_SetFirst($grammar,$termlst,$terminx); + + for my $stateno (0..$#$states) { + my($state)=$$states[$stateno]; + + exists($$state{ACTIONS}{''}) + and ( @{$$state{ACTIONS}{''}} > 1 + or keys(%{$$state{ACTIONS}}) > 1 ) + and do { + ++$inconsistent->{$stateno}; + + for my $ruleno (@{$$state{ACTIONS}{''}}) { + my($lhs,$rhs)=@{$$grammar{RULES}[$ruleno]}[0,1]; + + for my $predno (@{_Preds($states,$stateno,scalar(@$rhs))}) { + ++$rel->{"$stateno.$ruleno"}{"$predno.$lhs"}; + } + } + }; + + exists($$state{GOTOS}) + or next; + + for my $symbol (keys(%{$$state{GOTOS}})) { + my($tostate)=$$states[$$state{GOTOS}{$symbol}]; + my($goto)="$stateno.$symbol"; + + $follows->{$goto}=pack('b'.@$termlst); + + for my $item (@{$$tostate{'CORE'}}) { + my($ruleno,$pos)=@$item; + my($key)="$ruleno.$pos"; + + exists($sfx->{$key}) + or $sfx->{$key} = _FirstSfx($grammar,$firstset, + $termlst,$terminx, + $ruleno,$pos,$key); + + $follows->{$goto}|=$sfx->{$key}; + + vec($follows->{$goto},0,1) + and do { + my($lhs)=$$grammar{RULES}[$ruleno][0]; + + vec($follows->{$goto},0,1)=0; + + for my $predno (@{_Preds($states,$stateno,$pos-1)}) { + ++$rel->{$goto}{"$predno.$lhs"}; + } + }; + } + } + } + _Digraph($rel,$follows); + + ($follows,$inconsistent) +} + +sub _ComputeLA { + my($grammar,$states)=@_; + my($termlst)= [ '',keys(%{$$grammar{TERM}}) ]; + + my($follows,$inconsistent) = _ComputeFollows($grammar,$states,$termlst); + + for my $stateno ( keys(%$inconsistent ) ) { + my($state)=$$states[$stateno]; + my($conflict); + + #NB the sort is VERY important for conflicts resolution order + for my $ruleno (sort { $a <=> $b } + @{$$state{ACTIONS}{''}}) { + for my $term ( map { $termlst->[$_] } grep { + vec($follows->{"$stateno.$ruleno"},$_,1) } + 0..$#$termlst) { + exists($$state{ACTIONS}{$term}) + and ++$conflict; + push(@{$$state{ACTIONS}{$term}},-$ruleno); + } + } + delete($$state{ACTIONS}{''}); + $conflict + or delete($inconsistent->{$stateno}); + } + + $inconsistent +} + +############################# +# Solve remaining conflicts # +############################# + +sub _SolveConflicts { + my($grammar,$states,$inconsistent)=@_; + my(%rulesprec,$RulePrec); + my($conflicts)={ SOLVED => {}, + FORCED => { TOTAL => [ 0, 0 ], + DETAIL => {} + } + }; + + $RulePrec = sub { + my($ruleno)=@_; + my($rhs,$rprec)=@{$$grammar{RULES}[$ruleno]}[1,2]; + my($lastterm); + + defined($rprec) + and return($rprec); + + exists($rulesprec{$ruleno}) + and return($rulesprec{$ruleno}); + + $lastterm=(grep { exists($$grammar{TERM}{$_}) } @$rhs)[-1]; + + defined($lastterm) + and ref($$grammar{TERM}{$lastterm}) + and do { + $rulesprec{$ruleno}=$$grammar{TERM}{$lastterm}[1]; + return($rulesprec{$ruleno}); + }; + + undef; + }; + + for my $stateno (keys(%$inconsistent)) { + my($state)=$$states[$stateno]; + my($actions)=$$state{ACTIONS}; + my($nbsr,$nbrr); + + for my $term ( keys(%$actions) ) { + my($act)=$$actions{$term}; + + @$act > 1 + or next; + + $$act[0] > 0 + and ref($$grammar{TERM}{$term}) + and do { + my($assoc,$tprec)=@{$$grammar{TERM}{$term}}; + my($k,$error); + + for ($k=1;$k<@$act;++$k) { + my($ruleno)=-$$act[$k]; + my($rprec)=&$RulePrec($ruleno); + + defined($rprec) + or next; + + ( $tprec > $rprec + or ( $tprec == $rprec and $assoc eq 'RIGHT')) + and do { + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'shift' ]); + splice(@$act,$k--,1); + next; + }; + ( $tprec < $rprec + or $assoc eq 'LEFT') + and do { + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'reduce' ]); + $$act[0] > 0 + and do { + splice(@$act,0,1); + --$k; + }; + next; + }; + push(@{$$conflicts{SOLVED}{$stateno}}, + [ $ruleno, $term, 'error' ]); + splice(@$act,$k--,1); + $$act[0] > 0 + and do { + splice(@$act,0,1); + ++$error; + --$k; + }; + } + $error + and unshift(@$act,undef); + }; + + @$act > 1 + and do { + $nbrr += @$act - 2; + ($$act[0] > 0 ? $nbsr : $nbrr) += 1; + push(@{$$conflicts{FORCED}{DETAIL}{$stateno}{LIST}}, + map { [ $term, $_ ] } splice(@$act,1)); + }; + } + + $nbsr + and do { + $$conflicts{FORCED}{TOTAL}[0]+=$nbsr; + $$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}[0]+=$nbsr; + }; + + $nbrr + and do { + $$conflicts{FORCED}{TOTAL}[1]+=$nbrr; + $$conflicts{FORCED}{DETAIL}{$stateno}{TOTAL}[1]+=$nbrr; + }; + + } + + $conflicts +} + +############################### +# Make default reduce actions # +############################### +sub _SetDefaults { + my($states)=@_; + + for my $state (@$states) { + my($actions)=$$state{ACTIONS}; + my(%reduces,$default,$nodefault); + + exists($$actions{''}) + and do { + $$actions{''}[0] = -$$actions{''}[0]; + ++$nodefault; + }; + + #shift error token => no default + exists($$actions{error}) + and $$actions{error}[0] > 0 + and ++$nodefault; + + for my $term (keys(%$actions)) { + + $$actions{$term}=$$actions{$term}[0]; + + ( not defined($$actions{$term}) + or $$actions{$term} > 0 + or $nodefault) + and next; + + push(@{$reduces{$$actions{$term}}},$term); + } + + keys(%reduces) > 0 + or next; + + $default=( map { $$_[0] } + sort { $$b[1] <=> $$a[1] or $$b[0] <=> $$a[0] } + map { [ $_, scalar(@{$reduces{$_}}) ] } + keys(%reduces))[0]; + + delete(@$actions{ @{$reduces{$default}} }); + $$state{ACTIONS}{''}=$default; + } +} + +sub _LALR { + my($grammar,$states) = @_; + my($conflicts,$inconsistent); + + $inconsistent = _ComputeLA($grammar,$states); + + $conflicts = _SolveConflicts($grammar,$states,$inconsistent); + _SetDefaults($states); + + $conflicts +} + + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Options.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Options.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,186 @@ +# +# Module Parse::Yapp::Options +# +# (c) Copyright 1999-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# +package Parse::Yapp::Options; + +use strict; +use Carp; + +############################################################################ +#Definitions of options +# +# %known_options allowed options +# +# %default_options default +# +# %actions sub refs to execute if option is set with ($self,$value) +# as parameters +############################################################################ +# +#A value of '' means any value can do +# +my(%known_options)= ( + language => { + perl => "Ouput parser for Perl language", +# for future use... +# 'c++' => "Output parser for C++ language", +# c => "Output parser for C language" + }, + linenumbers => { + 0 => "Don't embbed line numbers in parser", + 1 => "Embbed source line numbers in parser" + }, + inputfile => { + '' => "Input file name: will automagically fills input" + }, + classname => { + '' => "Class name of parser object (Perl and C++)" + }, + standalone => { + 0 => "Don't create a standalone parser (Perl and C++)", + 1 => "Create a standalone parser" + }, + input => { + '' => "Input text of grammar" + }, + template => { + '' => "Template text for generating grammar file" + }, +); + +my(%default_options)= ( + language => 'perl', + linenumbers => 1, + inputfile => undef, + classname => 'Parser', + standalone => 0, + input => undef, + template => undef, + shebang => undef, +); + +my(%actions)= ( + inputfile => \&__LoadFile +); + +############################################################################# +# +# Actions +# +# These are NOT a method, although they look like... +# +# They are super-private routines (that's why I prepend __ to their names) +# +############################################################################# +sub __LoadFile { + my($self,$filename)=@_; + + open(IN,"<$filename") + or croak "Cannot open input file '$filename' for reading"; + $self->{OPTIONS}{input}=join('',); + close(IN); +} + +############################################################################# +# +# Private methods +# +############################################################################# + +sub _SetOption { + my($self)=shift; + my($key,$value)=@_; + + $key=lc($key); + + @_ == 2 + or croak "Invalid number of arguments"; + + exists($known_options{$key}) + or croak "Unknown option: '$key'"; + + if(exists($known_options{$key}{lc($value)})) { + $value=lc($value); + } + elsif(not exists($known_options{$key}{''})) { + croak "Invalid value '$value' for option '$key'"; + } + + exists($actions{$key}) + and &{$actions{$key}}($self,$value); + + $self->{OPTIONS}{$key}=$value; +} + +sub _GetOption { + my($self)=shift; + my($key)=map { lc($_) } @_; + + @_ == 1 + or croak "Invalid number of arguments"; + + exists($known_options{$key}) + or croak "Unknown option: '$key'"; + + $self->{OPTIONS}{$key}; +} + +############################################################################# +# +# Public methods +# +############################################################################# + +# +# Constructor +# +sub new { + my($class)=shift; + my($self)={ OPTIONS => { %default_options } }; + + ref($class) + and $class=ref($class); + + bless($self,$class); + + $self->Options(@_); + + $self; +} + +# +# Specify one or more options to set +# +sub Options { + my($self)=shift; + my($key,$value); + + @_ % 2 == 0 + or croak "Invalid number of arguments"; + + while(($key,$value)=splice(@_,0,2)) { + $self->_SetOption($key,$value); + } +} + +# +# Set (2 parameters) or Get (1 parameter) values for one option +# +sub Option { + my($self)=shift; + my($key,$value)=@_; + + @_ == 1 + and return $self->_GetOption($key); + + @_ == 2 + and return $self->_SetOption($key,$value); + + croak "Invalid number of arguments"; + +} + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Output.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Output.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,92 @@ +# +# Module Parse::Yapp::Output +# +# (c) Copyright 1998-2001 Francois Desarmenien, all rights reserved. +# (see the pod text in Parse::Yapp module for use and distribution rights) +# +package Parse::Yapp::Output; +@ISA=qw ( Parse::Yapp::Lalr ); + +require 5.004; + +use Parse::Yapp::Lalr; +use Parse::Yapp::Driver; + +use strict; + +use Carp; + +sub _CopyDriver { + my($text)='#Included Parse/Yapp/Driver.pm file'.('-' x 40)."\n"; + open(DRV,$Parse::Yapp::Driver::FILENAME) + or die "BUG: could not open $Parse::Yapp::Driver::FILENAME"; + $text.="{\n".join('',)."}\n"; + close(DRV); + $text.='#End of include'.('-' x 50)."\n"; +} + +sub Output { + my($self)=shift; + + $self->Options(@_); + + my($package)=$self->Option('classname'); + my($head,$states,$rules,$tail,$driver); + my($version)=$Parse::Yapp::Driver::VERSION; + my($datapos); + my($text)=$self->Option('template') ||<<'EOT'; +#################################################################### +# +# This file was generated using Parse::Yapp version <<$version>>. +# +# Don't edit this file, use source file instead. +# +# ANY CHANGE MADE HERE WILL BE LOST ! +# +#################################################################### +package <<$package>>; +use vars qw ( @ISA ); +use strict; + +@ISA= qw ( Parse::Yapp::Driver ); +<<$driver>> + +<<$head>> + +sub new { + my($class)=shift; + ref($class) + and $class=ref($class); + + my($self)=$class->SUPER::new( yyversion => '<<$version>>', + yystates => +<<$states>>, + yyrules => +<<$rules>>, + @_); + bless($self,$class); +} + +<<$tail>> +1; +EOT + + $driver='use Parse::Yapp::Driver;'; + + defined($package) + or $package='Parse::Yapp::Default'; + + $head= $self->Head(); + $rules=$self->RulesTable(); + $states=$self->DfaTable(); + $tail= $self->Tail(); + + $self->Option('standalone') + and $driver=_CopyDriver(); + + $text=~s/<<(\$.+)>>/$1/gee; + + $text; +} + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/Parse/Yapp/Parse.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/Parse/Yapp/Parse.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,1093 @@ +#################################################################### +# +# This file was generated using Parse::Yapp version 1.05. +# +# Don't edit this file, use source file instead. +# +# ANY CHANGE MADE HERE WILL BE LOST ! +# +#################################################################### +package Parse::Yapp::Parse; +use vars qw ( @ISA ); +use strict; + +@ISA= qw ( Parse::Yapp::Driver ); +use Parse::Yapp::Driver; + +#line 1 "YappParse.yp" +# (c) Copyright Francois Desarmenien 1998-2001, all rights reserved. +# (see COPYRIGHT in Parse::Yapp.pm pod section for use and distribution rights) +# +# Parse/Yapp/Parser.yp: Parse::Yapp::Parser.pm source file +# +# Use: yapp -m 'Parse::Yapp::Parse' -o Parse/Yapp/Parse.pm YappParse.yp +# +# to generate the Parser module. +# +#line 12 "YappParse.yp" + +require 5.004; + +use Carp; + +my($input,$lexlevel,@lineno,$nberr,$prec,$labelno); +my($syms,$head,$tail,$token,$term,$nterm,$rules,$precterm,$start,$nullable); +my($expect); + + + +sub new { + my($class)=shift; + ref($class) + and $class=ref($class); + + my($self)=$class->SUPER::new( yyversion => '1.05', + yystates => +[ + {#State 0 + ACTIONS => { + "%%" => -6, + 'HEADCODE' => 3, + 'UNION' => 2, + 'TOKEN' => 5, + 'ASSOC' => 7, + 'START' => 6, + 'error' => 9, + 'TYPE' => 10, + "\n" => 11, + 'EXPECT' => 13 + }, + GOTOS => { + 'head' => 1, + 'decls' => 12, + 'yapp' => 4, + 'decl' => 14, + 'headsec' => 8 + } + }, + {#State 1 + ACTIONS => { + 'error' => 19, + "%%" => 16, + 'IDENT' => 18 + }, + GOTOS => { + 'rules' => 15, + 'rulesec' => 20, + 'body' => 17 + } + }, + {#State 2 + ACTIONS => { + 'CODE' => 21 + } + }, + {#State 3 + ACTIONS => { + "\n" => 22 + } + }, + {#State 4 + ACTIONS => { + '' => 23 + } + }, + {#State 5 + ACTIONS => { + "<" => 25 + }, + DEFAULT => -19, + GOTOS => { + 'typedecl' => 24 + } + }, + {#State 6 + ACTIONS => { + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 27 + } + }, + {#State 7 + ACTIONS => { + "<" => 25 + }, + DEFAULT => -19, + GOTOS => { + 'typedecl' => 28 + } + }, + {#State 8 + ACTIONS => { + "%%" => 29 + } + }, + {#State 9 + ACTIONS => { + "\n" => 30 + } + }, + {#State 10 + ACTIONS => { + "<" => 25 + }, + DEFAULT => -19, + GOTOS => { + 'typedecl' => 31 + } + }, + {#State 11 + DEFAULT => -10 + }, + {#State 12 + ACTIONS => { + "%%" => -7, + 'HEADCODE' => 3, + 'UNION' => 2, + 'TOKEN' => 5, + 'ASSOC' => 7, + 'START' => 6, + 'error' => 9, + 'TYPE' => 10, + "\n" => 11, + 'EXPECT' => 13 + }, + GOTOS => { + 'decl' => 32 + } + }, + {#State 13 + ACTIONS => { + 'NUMBER' => 33 + } + }, + {#State 14 + DEFAULT => -9 + }, + {#State 15 + DEFAULT => -28 + }, + {#State 16 + DEFAULT => -26 + }, + {#State 17 + ACTIONS => { + 'TAILCODE' => 34 + }, + DEFAULT => -45, + GOTOS => { + 'tail' => 35 + } + }, + {#State 18 + ACTIONS => { + ":" => 36 + } + }, + {#State 19 + ACTIONS => { + ";" => 37 + } + }, + {#State 20 + ACTIONS => { + 'error' => 19, + "%%" => 39, + 'IDENT' => 18 + }, + GOTOS => { + 'rules' => 38 + } + }, + {#State 21 + ACTIONS => { + "\n" => 40 + } + }, + {#State 22 + DEFAULT => -14 + }, + {#State 23 + DEFAULT => -0 + }, + {#State 24 + ACTIONS => { + 'LITERAL' => 41, + 'IDENT' => 26 + }, + GOTOS => { + 'symlist' => 43, + 'ident' => 44, + 'symbol' => 42 + } + }, + {#State 25 + ACTIONS => { + 'IDENT' => 45 + } + }, + {#State 26 + DEFAULT => -4 + }, + {#State 27 + ACTIONS => { + "\n" => 46 + } + }, + {#State 28 + ACTIONS => { + 'LITERAL' => 41, + 'IDENT' => 26 + }, + GOTOS => { + 'symlist' => 47, + 'ident' => 44, + 'symbol' => 42 + } + }, + {#State 29 + DEFAULT => -5 + }, + {#State 30 + DEFAULT => -18 + }, + {#State 31 + ACTIONS => { + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 48, + 'identlist' => 49 + } + }, + {#State 32 + DEFAULT => -8 + }, + {#State 33 + ACTIONS => { + "\n" => 50 + } + }, + {#State 34 + DEFAULT => -46 + }, + {#State 35 + DEFAULT => -1 + }, + {#State 36 + ACTIONS => { + 'CODE' => 57, + 'LITERAL' => 41, + 'IDENT' => 26 + }, + DEFAULT => -35, + GOTOS => { + 'rhselts' => 56, + 'rule' => 51, + 'code' => 52, + 'rhs' => 53, + 'ident' => 44, + 'rhselt' => 58, + 'rhss' => 55, + 'symbol' => 54 + } + }, + {#State 37 + DEFAULT => -30 + }, + {#State 38 + DEFAULT => -27 + }, + {#State 39 + DEFAULT => -25 + }, + {#State 40 + DEFAULT => -15 + }, + {#State 41 + DEFAULT => -2 + }, + {#State 42 + DEFAULT => -22 + }, + {#State 43 + ACTIONS => { + "\n" => 60, + 'LITERAL' => 41, + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 44, + 'symbol' => 59 + } + }, + {#State 44 + DEFAULT => -3 + }, + {#State 45 + ACTIONS => { + ">" => 61 + } + }, + {#State 46 + DEFAULT => -13 + }, + {#State 47 + ACTIONS => { + "\n" => 62, + 'LITERAL' => 41, + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 44, + 'symbol' => 59 + } + }, + {#State 48 + DEFAULT => -24 + }, + {#State 49 + ACTIONS => { + "\n" => 63, + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 64 + } + }, + {#State 50 + DEFAULT => -17 + }, + {#State 51 + DEFAULT => -32 + }, + {#State 52 + DEFAULT => -40 + }, + {#State 53 + ACTIONS => { + 'PREC' => 66 + }, + DEFAULT => -34, + GOTOS => { + 'prec' => 65 + } + }, + {#State 54 + DEFAULT => -39 + }, + {#State 55 + ACTIONS => { + "|" => 68, + ";" => 67 + } + }, + {#State 56 + ACTIONS => { + 'CODE' => 57, + 'LITERAL' => 41, + 'IDENT' => 26 + }, + DEFAULT => -36, + GOTOS => { + 'code' => 52, + 'ident' => 44, + 'rhselt' => 69, + 'symbol' => 54 + } + }, + {#State 57 + DEFAULT => -44 + }, + {#State 58 + DEFAULT => -38 + }, + {#State 59 + DEFAULT => -21 + }, + {#State 60 + DEFAULT => -11 + }, + {#State 61 + DEFAULT => -20 + }, + {#State 62 + DEFAULT => -12 + }, + {#State 63 + DEFAULT => -16 + }, + {#State 64 + DEFAULT => -23 + }, + {#State 65 + ACTIONS => { + 'CODE' => 57 + }, + DEFAULT => -42, + GOTOS => { + 'code' => 70, + 'epscode' => 71 + } + }, + {#State 66 + ACTIONS => { + 'LITERAL' => 41, + 'IDENT' => 26 + }, + GOTOS => { + 'ident' => 44, + 'symbol' => 72 + } + }, + {#State 67 + DEFAULT => -29 + }, + {#State 68 + ACTIONS => { + 'CODE' => 57, + 'LITERAL' => 41, + 'IDENT' => 26 + }, + DEFAULT => -35, + GOTOS => { + 'rhselts' => 56, + 'rule' => 73, + 'code' => 52, + 'rhs' => 53, + 'ident' => 44, + 'rhselt' => 58, + 'symbol' => 54 + } + }, + {#State 69 + DEFAULT => -37 + }, + {#State 70 + DEFAULT => -43 + }, + {#State 71 + DEFAULT => -33 + }, + {#State 72 + DEFAULT => -41 + }, + {#State 73 + DEFAULT => -31 + } +], + yyrules => +[ + [#Rule 0 + '$start', 2, undef + ], + [#Rule 1 + 'yapp', 3, undef + ], + [#Rule 2 + 'symbol', 1, +sub +#line 30 "YappParse.yp" +{ + exists($$syms{$_[1][0]}) + or do { + $$syms{$_[1][0]} = $_[1][1]; + $$term{$_[1][0]} = undef; + }; + $_[1] + } + ], + [#Rule 3 + 'symbol', 1, undef + ], + [#Rule 4 + 'ident', 1, +sub +#line 41 "YappParse.yp" +{ + exists($$syms{$_[1][0]}) + or do { + $$syms{$_[1][0]} = $_[1][1]; + $$term{$_[1][0]} = undef; + }; + $_[1] + } + ], + [#Rule 5 + 'head', 2, undef + ], + [#Rule 6 + 'headsec', 0, undef + ], + [#Rule 7 + 'headsec', 1, undef + ], + [#Rule 8 + 'decls', 2, undef + ], + [#Rule 9 + 'decls', 1, undef + ], + [#Rule 10 + 'decl', 1, undef + ], + [#Rule 11 + 'decl', 4, +sub +#line 66 "YappParse.yp" +{ + for (@{$_[3]}) { + my($symbol,$lineno)=@$_; + + exists($$token{$symbol}) + and do { + _SyntaxError(0, + "Token $symbol redefined: ". + "Previously defined line $$syms{$symbol}", + $lineno); + next; + }; + $$token{$symbol}=$lineno; + $$term{$symbol} = [ ]; + } + undef + } + ], + [#Rule 12 + 'decl', 4, +sub +#line 84 "YappParse.yp" +{ + for (@{$_[3]}) { + my($symbol,$lineno)=@$_; + + defined($$term{$symbol}[0]) + and do { + _SyntaxError(1, + "Precedence for symbol $symbol redefined: ". + "Previously defined line $$syms{$symbol}", + $lineno); + next; + }; + $$token{$symbol}=$lineno; + $$term{$symbol} = [ $_[1][0], $prec ]; + } + ++$prec; + undef + } + ], + [#Rule 13 + 'decl', 3, +sub +#line 102 "YappParse.yp" +{ $start=$_[2][0]; undef } + ], + [#Rule 14 + 'decl', 2, +sub +#line 103 "YappParse.yp" +{ push(@$head,$_[1]); undef } + ], + [#Rule 15 + 'decl', 3, +sub +#line 104 "YappParse.yp" +{ undef } + ], + [#Rule 16 + 'decl', 4, +sub +#line 106 "YappParse.yp" +{ + for ( @{$_[3]} ) { + my($symbol,$lineno)=@$_; + + exists($$nterm{$symbol}) + and do { + _SyntaxError(0, + "Non-terminal $symbol redefined: ". + "Previously defined line $$syms{$symbol}", + $lineno); + next; + }; + delete($$term{$symbol}); #not a terminal + $$nterm{$symbol}=undef; #is a non-terminal + } + } + ], + [#Rule 17 + 'decl', 3, +sub +#line 122 "YappParse.yp" +{ $expect=$_[2][0]; undef } + ], + [#Rule 18 + 'decl', 2, +sub +#line 123 "YappParse.yp" +{ $_[0]->YYErrok } + ], + [#Rule 19 + 'typedecl', 0, undef + ], + [#Rule 20 + 'typedecl', 3, undef + ], + [#Rule 21 + 'symlist', 2, +sub +#line 130 "YappParse.yp" +{ push(@{$_[1]},$_[2]); $_[1] } + ], + [#Rule 22 + 'symlist', 1, +sub +#line 131 "YappParse.yp" +{ [ $_[1] ] } + ], + [#Rule 23 + 'identlist', 2, +sub +#line 134 "YappParse.yp" +{ push(@{$_[1]},$_[2]); $_[1] } + ], + [#Rule 24 + 'identlist', 1, +sub +#line 135 "YappParse.yp" +{ [ $_[1] ] } + ], + [#Rule 25 + 'body', 2, +sub +#line 140 "YappParse.yp" +{ + $start + or $start=$$rules[1][0]; + + ref($$nterm{$start}) + or _SyntaxError(2,"Start symbol $start not found ". + "in rules section",$_[2][1]); + + $$rules[0]=[ '$start', [ $start, chr(0) ], undef, undef ]; + } + ], + [#Rule 26 + 'body', 1, +sub +#line 150 "YappParse.yp" +{ _SyntaxError(2,"No rules in input grammar",$_[1][1]); } + ], + [#Rule 27 + 'rulesec', 2, undef + ], + [#Rule 28 + 'rulesec', 1, undef + ], + [#Rule 29 + 'rules', 4, +sub +#line 157 "YappParse.yp" +{ _AddRules($_[1],$_[3]); undef } + ], + [#Rule 30 + 'rules', 2, +sub +#line 158 "YappParse.yp" +{ $_[0]->YYErrok } + ], + [#Rule 31 + 'rhss', 3, +sub +#line 161 "YappParse.yp" +{ push(@{$_[1]},$_[3]); $_[1] } + ], + [#Rule 32 + 'rhss', 1, +sub +#line 162 "YappParse.yp" +{ [ $_[1] ] } + ], + [#Rule 33 + 'rule', 3, +sub +#line 165 "YappParse.yp" +{ push(@{$_[1]}, $_[2], $_[3]); $_[1] } + ], + [#Rule 34 + 'rule', 1, +sub +#line 166 "YappParse.yp" +{ + my($code)=undef; + + defined($_[1]) + and $_[1][-1][0] eq 'CODE' + and $code = ${pop(@{$_[1]})}[1]; + + push(@{$_[1]}, undef, $code); + + $_[1] + } + ], + [#Rule 35 + 'rhs', 0, undef + ], + [#Rule 36 + 'rhs', 1, undef + ], + [#Rule 37 + 'rhselts', 2, +sub +#line 183 "YappParse.yp" +{ push(@{$_[1]},$_[2]); $_[1] } + ], + [#Rule 38 + 'rhselts', 1, +sub +#line 184 "YappParse.yp" +{ [ $_[1] ] } + ], + [#Rule 39 + 'rhselt', 1, +sub +#line 187 "YappParse.yp" +{ [ 'SYMB', $_[1] ] } + ], + [#Rule 40 + 'rhselt', 1, +sub +#line 188 "YappParse.yp" +{ [ 'CODE', $_[1] ] } + ], + [#Rule 41 + 'prec', 2, +sub +#line 192 "YappParse.yp" +{ + defined($$term{$_[2][0]}) + or do { + _SyntaxError(1,"No precedence for symbol $_[2][0]", + $_[2][1]); + return undef; + }; + + ++$$precterm{$_[2][0]}; + $$term{$_[2][0]}[1]; + } + ], + [#Rule 42 + 'epscode', 0, +sub +#line 205 "YappParse.yp" +{ undef } + ], + [#Rule 43 + 'epscode', 1, +sub +#line 206 "YappParse.yp" +{ $_[1] } + ], + [#Rule 44 + 'code', 1, +sub +#line 209 "YappParse.yp" +{ $_[1] } + ], + [#Rule 45 + 'tail', 0, undef + ], + [#Rule 46 + 'tail', 1, +sub +#line 215 "YappParse.yp" +{ $tail=$_[1] } + ] +], + @_); + bless($self,$class); +} + +#line 218 "YappParse.yp" + +sub _Error { + my($value)=$_[0]->YYCurval; + + my($what)= $token ? "input: '$$value[0]'" : "end of input"; + + _SyntaxError(1,"Unexpected $what",$$value[1]); +} + +sub _Lexer { + + #At EOF + pos($$input) >= length($$input) + and return('',[ undef, -1 ]); + + #In TAIL section + $lexlevel > 1 + and do { + my($pos)=pos($$input); + + $lineno[0]=$lineno[1]; + $lineno[1]=-1; + pos($$input)=length($$input); + return('TAILCODE',[ substr($$input,$pos), $lineno[0] ]); + }; + + #Skip blanks + $lexlevel == 0 + ? $$input=~m{\G((?: + [\t\ ]+ # Any white space char but \n + | \#[^\n]* # Perl like comments + | /\*.*?\*/ # C like comments + )+)}xsgc + : $$input=~m{\G((?: + \s+ # any white space char + | \#[^\n]* # Perl like comments + | /\*.*?\*/ # C like comments + )+)}xsgc + and do { + my($blanks)=$1; + + #Maybe At EOF + pos($$input) >= length($$input) + and return('',[ undef, -1 ]); + + $lineno[1]+= $blanks=~tr/\n//; + }; + + $lineno[0]=$lineno[1]; + + $$input=~/\G([A-Za-z_][A-Za-z0-9_]*)/gc + and return('IDENT',[ $1, $lineno[0] ]); + + $$input=~/\G('(?:[^'\\]|\\\\|\\'|\\)+?')/gc + and do { + $1 eq "'error'" + and do { + _SyntaxError(0,"Literal 'error' ". + "will be treated as error token",$lineno[0]); + return('IDENT',[ 'error', $lineno[0] ]); + }; + return('LITERAL',[ $1, $lineno[0] ]); + }; + + $$input=~/\G(%%)/gc + and do { + ++$lexlevel; + return($1, [ $1, $lineno[0] ]); + }; + + $$input=~/\G{/gc + and do { + my($level,$from,$code); + + $from=pos($$input); + + $level=1; + while($$input=~/([{}])/gc) { + substr($$input,pos($$input)-1,1) eq '\\' #Quoted + and next; + $level += ($1 eq '{' ? 1 : -1) + or last; + } + $level + and _SyntaxError(2,"Unmatched { opened line $lineno[0]",-1); + $code = substr($$input,$from,pos($$input)-$from-1); + $lineno[1]+= $code=~tr/\n//; + return('CODE',[ $code, $lineno[0] ]); + }; + + if($lexlevel == 0) {# In head section + $$input=~/\G%(left|right|nonassoc)/gc + and return('ASSOC',[ uc($1), $lineno[0] ]); + $$input=~/\G%(start)/gc + and return('START',[ undef, $lineno[0] ]); + $$input=~/\G%(expect)/gc + and return('EXPECT',[ undef, $lineno[0] ]); + $$input=~/\G%{/gc + and do { + my($code); + + $$input=~/\G(.*?)%}/sgc + or _SyntaxError(2,"Unmatched %{ opened line $lineno[0]",-1); + + $code=$1; + $lineno[1]+= $code=~tr/\n//; + return('HEADCODE',[ $code, $lineno[0] ]); + }; + $$input=~/\G%(token)/gc + and return('TOKEN',[ undef, $lineno[0] ]); + $$input=~/\G%(type)/gc + and return('TYPE',[ undef, $lineno[0] ]); + $$input=~/\G%(union)/gc + and return('UNION',[ undef, $lineno[0] ]); + $$input=~/\G([0-9]+)/gc + and return('NUMBER',[ $1, $lineno[0] ]); + + } + else {# In rule section + $$input=~/\G%(prec)/gc + and return('PREC',[ undef, $lineno[0] ]); + } + + #Always return something + $$input=~/\G(.)/sg + or die "Parse::Yapp::Grammar::Parse: Match (.) failed: report as a BUG"; + + $1 eq "\n" + and ++$lineno[1]; + + ( $1 ,[ $1, $lineno[0] ]); + +} + +sub _SyntaxError { + my($level,$message,$lineno)=@_; + + $message= "*". + [ 'Warning', 'Error', 'Fatal' ]->[$level]. + "* $message, at ". + ($lineno < 0 ? "eof" : "line $lineno"). + ".\n"; + + $level > 1 + and die $message; + + warn $message; + + $level > 0 + and ++$nberr; + + $nberr == 20 + and die "*Fatal* Too many errors detected.\n" +} + +sub _AddRules { + my($lhs,$lineno)=@{$_[0]}; + my($rhss)=$_[1]; + + ref($$nterm{$lhs}) + and do { + _SyntaxError(1,"Non-terminal $lhs redefined: ". + "Previously declared line $$syms{$lhs}",$lineno); + return; + }; + + ref($$term{$lhs}) + and do { + my($where) = exists($$token{$lhs}) ? $$token{$lhs} : $$syms{$lhs}; + _SyntaxError(1,"Non-terminal $lhs previously ". + "declared as token line $where",$lineno); + return; + }; + + ref($$nterm{$lhs}) #declared through %type + or do { + $$syms{$lhs}=$lineno; #Say it's declared here + delete($$term{$lhs}); #No more a terminal + }; + $$nterm{$lhs}=[]; #It's a non-terminal now + + my($epsrules)=0; #To issue a warning if more than one epsilon rule + + for my $rhs (@$rhss) { + my($tmprule)=[ $lhs, [ ], splice(@$rhs,-2) ]; #Init rule + + @$rhs + or do { + ++$$nullable{$lhs}; + ++$epsrules; + }; + + for (0..$#$rhs) { + my($what,$value)=@{$$rhs[$_]}; + + $what eq 'CODE' + and do { + my($name)='@'.++$labelno."-$_"; + push(@$rules,[ $name, [], undef, $value ]); + push(@{$$tmprule[1]},$name); + next; + }; + push(@{$$tmprule[1]},$$value[0]); + } + push(@$rules,$tmprule); + push(@{$$nterm{$lhs}},$#$rules); + } + + $epsrules > 1 + and _SyntaxError(0,"More than one empty rule for symbol $lhs",$lineno); +} + +sub Parse { + my($self)=shift; + + @_ > 0 + or croak("No input grammar\n"); + + my($parsed)={}; + + $input=\$_[0]; + + $lexlevel=0; + @lineno=(1,1); + $nberr=0; + $prec=0; + $labelno=0; + + $head=(); + $tail=""; + + $syms={}; + $token={}; + $term={}; + $nterm={}; + $rules=[ undef ]; #reserve slot 0 for start rule + $precterm={}; + + $start=""; + $nullable={}; + $expect=0; + + pos($$input)=0; + + + $self->YYParse(yylex => \&_Lexer, yyerror => \&_Error); + + $nberr + and _SyntaxError(2,"Errors detected: No output",-1); + + @$parsed{ 'HEAD', 'TAIL', 'RULES', 'NTERM', 'TERM', + 'NULL', 'PREC', 'SYMS', 'START', 'EXPECT' } + = ( $head, $tail, $rules, $nterm, $term, + $nullable, $precterm, $syms, $start, $expect); + + undef($input); + undef($lexlevel); + undef(@lineno); + undef($nberr); + undef($prec); + undef($labelno); + + undef($head); + undef($tail); + + undef($syms); + undef($token); + undef($term); + undef($nterm); + undef($rules); + undef($precterm); + + undef($start); + undef($nullable); + undef($expect); + + $parsed +} + + +1; diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Checker.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Checker.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,2006 @@ +# +# +# TO DO +# - update docs regarding PerlSAX interface +# - add current node to error context when checking DOM subtrees +# - add parsed Entity to test XML files +# - free circular references +# - Implied handler? +# - Notation, Entity, Unparsed checks, Default handler? +# - check no root element (it's checked by expat) ? + +package XML::Checker::Term; +use strict; + +sub new +{ + my ($class, %h) = @_; + bless \%h, $class; +} + +sub str +{ + '<' . $_[0]->{C} . $_[0]->{N} . '>' +} + +sub re +{ + $_[0]->{S} +} + +sub rel +{ + my $self = shift; + defined $self->{SL} ? @{ $self->{SL} } : ( $self->{S} ); +} + +sub debug +{ + my $t = shift; + my ($c, $n, $s) = ($t->{C}, $t->{N}, $t->{S}); + my @sl = $t->rel; + "{C=$c N=$n S=$s SL=@sl}"; +} + +#------------------------------------------------------------------------- + +package XML::Checker::Context; + +sub new +{ + my ($class) = @_; + my $scalar; + bless \$scalar, $class; +} + +sub Start {} +sub End {} +sub Char {} + +# +# The initial Context when checking an entire XML Document +# +package XML::Checker::DocContext; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::Context ); + +sub new +{ +#??checker not used + my ($class, $checker) = @_; + bless { }, $class; +} + +sub setRootElement +{ + $_[0]->{RootElement} = $_[1]; +} + +sub Start +{ + my ($self, $checker, $tag) = @_; + if (exists $self->{Elem}) + { + my $tags = join (", ", @{$self->{Elem}}); + $checker->fail (155, "more than one root Element [$tags]"); + push @{$self->{Elem}}, $tag; + } + else + { + $self->{Elem} = [ $tag ]; + } + + my $exp_root = $self->{RootElement}; + $checker->fail (156, "unexpected root Element [$tag], expected [$exp_root]") + if defined ($exp_root) and $tag ne $exp_root; +} + +sub debug +{ + my $self = shift; + "DocContext[Count=" . $self->{Count} . ",Root=" . + $self->{RootElement} . "]"; +} + +package XML::Checker::Context::ANY; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::Context ); + +# No overrides, because everything is accepted + +sub debug { "XML::Checker::Context::ANY" } + +package XML::Checker::Context::EMPTY; +use vars qw( @ISA $ALLOW_WHITE_SPACE ); +@ISA = qw( XML::Checker::Context ); + +$ALLOW_WHITE_SPACE = 0; + +sub debug { "XML::Checker::Context::EMPTY" } + +sub Start +{ + my ($self, $checker, $tag) = @_; + $checker->fail (152, "Element should be EMPTY, found Element [$tag]"); +} + +sub Char +{ + my ($self, $checker, $str) = @_; + $checker->fail (153, "Element should be EMPTY, found text [$str]") + unless ($ALLOW_WHITE_SPACE and $checker->isWS ($str)); + + # NOTE: if $ALLOW_WHITE_SPACE = 1, the isWS call does not only check + # whether it is whitespace, but it also informs the checker that this + # might be insignificant whitespace +} + +#?? what about Comments + +package XML::Checker::Context::Children; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::Context ); + +sub new +{ + my ($class, $rule) = @_; + bless { Name => $rule->{Name}, RE => $rule->{RE}, Buf => "", N => 0 }, $class; +} + +sub phash +{ + my $href = shift; + my $str = ""; + for (keys %$href) + { + $str .= ' ' if $str; + $str .= $_ . '=' . $href->{$_}; + } + $str; +} + +sub debug +{ + my $self = shift; + "Context::Children[Name=(" . phash ($self->{Name}) . ",N=" . $self->{N} . + ",RE=" . $self->{RE} . ",Buf=[" . $self->{Buf} . "]"; +} + +sub Start +{ + my ($self, $checker, $tag) = @_; + +#print "Children.Start tag=$tag rule=$checker drule=" . $checker->debug . "\n"; + + if (exists $self->{Name}->{$tag}) + { +#print "Buf=[".$self->{Buf}. "] tag=[" . $self->{Name}->{$tag}->{S} . "]\n"; + $self->{Buf} .= $self->{Name}->{$tag}->{S}; + } + else + { + $checker->fail (157, "unexpected Element [$tag]", + ChildElementIndex => $self->{N}) + } + $self->{N}++; +} + +sub decode +{ + my ($self) = @_; + my $re = $self->{RE}; + my $name = $self->{Name}; + my $buf = $self->{Buf}; + + my %s = (); + while (my ($key, $val) = each %$name) + { + $s{$val->{S}} = $key; + } + + my ($len) = scalar (keys %$name); + $len = length $len; + my $dots = "[^()*+?]" x $len; + + $buf =~ s/($dots)/$s{$1} . ","/ge; + chop $buf; + + $re =~ s/($dots)/"(" . $s{$1} . ")"/ge; + + "Found=[$buf] RE=[$re]" +} + +sub End +{ + my ($self, $checker) = @_; + my $re = $self->{RE}; + +#print "End " . $self->debug . "\n"; + $checker->fail (154, "bad order of Elements " . $self->decode) + unless $self->{Buf} =~ /^$re$/; +} + +sub Char +{ + my ($self, $checker, $str) = @_; + + # Inform the checker that this might be insignificant whitespace + $checker->isWS ($str); +} + +package XML::Checker::Context::Mixed; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::Context ); + +sub new +{ + my ($class, $rule) = @_; + bless { Name => $rule->{Name}, N => 0 }, $class; +} + +sub debug +{ + my $self = shift; + "Context::Mixed[Name=" . $self->{Name} . ",N=" , $self->{N} . "]"; +} + +sub Start +{ + my ($self, $checker, $tag) = @_; + + $checker->fail (157, "unexpected Element [$tag]", + ChildElementIndex => $self->{N}) + unless exists $self->{Name}->{$tag}; + $self->{N}++; +} + +package XML::Checker::ERule; + +package XML::Checker::ERule::EMPTY; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::ERule ); + +sub new +{ + my ($class) = @_; + bless {}, $class; +} + +my $context = new XML::Checker::Context::EMPTY; +sub context { $context } # share the context + +sub debug { "EMPTY" } + +package XML::Checker::ERule::ANY; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::ERule ); + +sub new +{ + my ($class) = @_; + bless {}, $class; +} + +my $any_context = new XML::Checker::Context::ANY; +sub context { $any_context } # share the context + +sub debug { "ANY" } + +package XML::Checker::ERule::Mixed; +use vars qw( @ISA ); +@ISA = qw( XML::Checker::ERule ); + +sub new +{ + my ($class) = @_; + bless { Name => {} }, $class; +} + +sub context +{ + my ($self) = @_; + new XML::Checker::Context::Mixed ($self); +} + +sub setModel +{ + my ($self, $model) = @_; + my $rule = $model; + + # Mixed := '(' '#PCDATA' ')' '*'? + if ($rule =~ /^\(\s*#PCDATA\s*\)(\*)?$/) + { +#? how do we interpret the '*' ?? + return 1; + } + else # Mixed := '(' '#PCDATA' ('|' Name)* ')*' + { + return 0 unless $rule =~ s/^\(\s*#PCDATA\s*//; + return 0 unless $rule =~ s/\s*\)\*$//; + + my %names = (); + while ($rule =~ s/^\s*\|\s*($XML::RegExp::Name)//) + { + $names{$1} = 1; + } + if ($rule eq "") + { + $self->{Name} = \%names; + return 1; + } + } + return 0; +} + +sub debug +{ + my ($self) = @_; + "Mixed[Names=" . join("|", keys %{$self->{Name}}) . "]"; +} + +package XML::Checker::ERule::Children; +use vars qw( @ISA %_name %_map $_n ); +@ISA = qw( XML::Checker::ERule ); + +sub new +{ + my ($class) = @_; + bless {}, $class; +} + +sub context +{ + my ($self) = @_; + new XML::Checker::Context::Children ($self); +} + +sub _add # static +{ + my $exp = new XML::Checker::Term (@_); + $_map{$exp->{N}} = $exp; + $exp->str; +} + +my $IDS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +sub _tokenize +{ + my ($self, $rule) = @_; + + # Replace names with Terms of the form "", e.g. "". + # Lookup already used names and store new names in %_name. + # + $$rule =~ s/($XML::RegExp::Name)(?!>)/ + if (exists $_name{$1}) # name already used? + { + $_name{$1}->str; + } + else + { + my $exp = new XML::Checker::Term (C => 'n', N => $_n++, + Name => $1); + $_name{$1} = $_map{$exp->{N}} = $exp; + $exp->str; + } + /eg; + + if ($_n < length $IDS) + { + # Generate regular expression for the name Term, i.e. + # a single character from $IDS + my $i = 0; + for (values %_name) + { + $_->{S} = substr ($IDS, $i++, 1); +#print "tokenized " . $_->{Name} . " num=" . $_->{N} . " to " . $_->{S} . "\n"; + } + } + else + { + # Generate RE, convert Term->{N} to hex string a la "(#)", + # e.g. "(03d)". Calculate needed length of hex string first. + my $len = 1; + for (my $n = $_n - 1; ($n >> 4) > 0; $len++) {} + + my $i = 0; + for (values %_name) + { + $_->{S} = sprintf ("(0${len}lx)", $i++); +#print "tokenized " . $_->{Name} . " num=" . $_->{N} . " to " . $_->{S} . "\n"; + } + } +} + +sub setModel +{ + my ($self, $rule) = @_; + + local $_n = 0; + local %_map = (); + local %_name = (); + + $self->_tokenize (\$rule); + +#?? check for single name - die "!ELEMENT contents can't be just a NAME" if $rule =~ /^$XML::RegExp::Name$/; + + for ($rule) + { + my $n = 1; + while ($n) + { + $n = 0; + + # cp := ( name | choice | seq ) ('?' | '*' | '+')? + $n++ while s/<[ncs](\d+)>([?*+]?)/_add + (C => 'a', N => $_n++, + S => ($_map{$1}->re . $2))/eg; + + # choice := '(' ch_l ')' + $n++ while s/\(\s*<[ad](\d+)>\s*\)/_add + (C => 'c', N => $_n++, + S => "(" . join ("|", $_map{$1}->rel) . ")")/eg; + + # ch_l := ( cp | ch_l ) '|' ( cp | ch_l ) + $n++ while s/<[ad](\d+)>\s*\|\s*<[ad](\d+)>/_add + (C => 'd', N => $_n++, + SL => [ $_map{$1}->rel, $_map{$2}->rel ])/eg; + + # seq := '(' (seq_l ')' + $n++ while s/\(\s*<[at](\d+)>\s*\)/_add + (C => 's', N => $_n++, + S => "(".join("", $_map{$1}->rel).")")/eg; + + # seq_l := ( cp | seq_l ) ',' ( cp | seq_l ) + $n++ while s/<[at](\d+)>\s*,\s*<[at](\d+)>/_add + (C => 't', N => $_n++, + SL => [ $_map{$1}->rel, $_map{$2}->rel ])/eg; + } + } + + return 0 if ($rule !~ /^$/); + + $self->{Name} = \%_name; + $self->{RE} = $_map{$1}->re; + + return 1; +} + +sub debug +{ + my ($self) = @_; + "Children[RE=" . $self->{RE} . "]"; +} + + +package XML::Checker::ARule; +use XML::RegExp; + +sub new +{ + my ($class, $elem, $checker) = @_; + bless { Elem => $elem, Checker => $checker, Required => {} }, $class; +} + +sub Attlist +{ + my ($self, $attr, $type, $default, $fixed, $checker) = @_; + my ($c1, $c2); + + if ($self->{Defined}->{$attr}) + { + my $tag = $self->{Elem}; + $self->fail ($attr, 110, "attribute [$attr] of element [$tag] already defined"); + } + else + { + $self->{Defined}->{$attr} = 1; + } + + if ($default =~ /^\#(REQUIRED|IMPLIED)$/) + { + $c1 = $1; + + # Keep list of all required attributes + if ($default eq '#REQUIRED') + { + $self->{Required}->{$attr} = 1; + } + } + else + { + $self->fail ($attr, 122, "invalid default attribute value [$default]") + unless $default =~ /^$XML::RegExp::AttValue$/; + + $default = substr ($default, 1, length($default)-2); + $self->{Default}->{$attr} = $default; + $c1 = 'FIXED' if $fixed; + } + + if ($type eq 'ID') + { + $self->fail ($attr, 123, "invalid default ID [$default], must be #REQUIRED or #IMPLIED") + unless $default =~ /^#(REQUIRED|IMPLIED)$/; + + if (exists ($self->{ID}) && $self->{ID} ne $attr) + { + $self->fail ($attr, 151, "only one ID allowed per ELEMENT " . + "first=[" . $self->{ID} . "]"); + } + else + { + $self->{ID} = $attr; + } + $c2 = 'ID'; + } + elsif ($type =~ /^(IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS)$/) + { + my $def = $self->{Default}->{$attr}; + if (defined $def) + { + my $re = ($type =~ /^[IE]/) ? $XML::RegExp::Name : $XML::RegExp::NmToken; + if ($type =~ /S$/) + { + for (split (/\s+/, $def)) + { + $self->fail ($attr, 121, + "invalid default [$_] in $type [$def]") + unless $_ =~ /^$re$/; + } + } + else # singular + { + $self->fail ($attr, 120, "invalid default $type [$def]") + unless $def =~ /^$re$/; + } + } + $c2 = $type; + } + elsif ($type ne 'CDATA') # Enumerated := NotationType | Enumeration + { + if ($type =~ /^\s*NOTATION\s*\(\s*($XML::RegExp::Name(\s*\|\s*$XML::RegExp::Name)*)\s*\)\s*$/) + { + $self->fail ($attr, 135, "empty NOTATION list in ATTLIST") + unless defined $1; + + my @tok = split (/\s*\|\s*/, $1); + for (@tok) + { + $self->fail ($attr, 100, "undefined NOTATION [$_] in ATTLIST") + unless exists $checker->{NOTATION}->{$_}; + } + + my $re = join ("|", @tok); + $self->{NotationRE} = "^($re)\$"; + $c2 = 'NotationType'; + } + elsif ($type =~ /^\s*\(\s*($XML::RegExp::NmToken(\s*\|\s*$XML::RegExp::NmToken)*)\s*\)\s*$/) + { + # Enumeration + + $self->fail ($attr, 136, "empty Enumeration list in ATTLIST") + unless defined $1; + + my @tok = split (/\s*\|\s*/, $1); + for (@tok) + { + $self->fail ($attr, 134, + "invalid Enumeration value [$_] in ATTLIST") + unless $_ =~ /^$XML::RegExp::NmToken$/; + } + $self->{EnumRE}->{$attr} = '^(' . join ("|", @tok) . ')$'; #'; + $c2 = 'Enumeration'; + } + else + { + $self->fail ($attr, 137, "invalid ATTLIST type [$type]"); + } + } + + $self->{Check1}->{$attr} = $c1 if $c1; + $self->{Check2}->{$attr} = $c2 if $c2; +} + +sub fail +{ + my $self = shift; + my $attr = shift; + $self->{Checker}->fail (@_, Element => $self->{Elem}, Attr => $attr); +} + +sub check +{ + my ($self, $attr) = @_; + my $func1 = $self->{Check1}->{$attr}; + my $func2 = $self->{Check2}->{$attr}; +# print "check func1=$func1 func2=$func2 @_\n"; + + if (exists $self->{ReqNotSeen}->{$attr}) + { + delete $self->{ReqNotSeen}->{$attr}; + } + no strict; + + &$func1 (@_) if defined $func1; + &$func2 (@_) if defined $func2; +} + +# Copies the list of all required attributes from $self->{Required} to +# $self->{ReqNotSeen}. +# When check() encounters a required attribute, it is removed from ReqNotSeen. +# In EndAttr we look at which attribute names are still in ReqNotSeen - those +# are the ones that were not specified and are, therefore, in error. +sub StartAttr +{ + my $self = shift; + my %not_seen = %{ $self->{Required} }; + $self->{ReqNotSeen} = \%not_seen; +} + +# Checks which of the #REQUIRED attributes were not specified +sub EndAttr +{ + my $self = shift; + + for my $attr (keys %{ $self->{ReqNotSeen} }) + { + $self->fail ($attr, 159, + "unspecified value for \#REQUIRED attribute [$attr]"); + } +} + +sub FIXED +{ + my ($self, $attr, $val, $specified) = @_; + + my $default = $self->{Default}->{$attr}; + $self->fail ($attr, 150, + "bad \#FIXED attribute value [$val], it should be [$default]") + unless ($val eq $default); +} + +sub IMPLIED +{ + my ($self, $attr, $val, $specified) = @_; + +#?? should #IMPLIED be specified? + $self->fail ($attr, 158, + "unspecified value for \#IMPLIED attribute [$attr]") + unless $specified; + +#?? Implied handler ? +} + +# This is called when an attribute is passed to the check() method by +# XML::Checker::Attr(), i.e. when the attribute was specified explicitly +# or defaulted by the parser (which should never happen), *NOT* when the +# attribute was omitted. (The latter is checked by StartAttr/EndAttr) +sub REQUIRED +{ + my ($self, $attr, $val, $specified) = @_; +# print "REQUIRED attr=$attr val=$val spec=$specified\n"; + + $self->fail ($attr, 159, + "unspecified value for \#REQUIRED attribute [$attr]") + unless $specified; +} + +sub ID # must be #IMPLIED or #REQUIRED +{ + my ($self, $attr, $val, $specified) = @_; + + $self->fail ($attr, 131, "invalid ID [$val]") + unless $val =~ /^$XML::RegExp::Name$/; + + $self->fail ($attr, 111, "ID [$val] already defined") + if $self->{Checker}->{ID}->{$val}++; +} + +sub IDREF +{ + my ($self, $attr, $val, $specified) = @_; + + $self->fail ($attr, 132, "invalid IDREF [$val]") + unless $val =~ /^$XML::RegExp::Name$/; + + $self->{Checker}->{IDREF}->{$val}++; +} + +sub IDREFS +{ + my ($self, $attr, $val, $specified) = @_; + for (split /\s+/, $val) + { + $self->IDREF ($attr, $_); + } +} + +sub ENTITY +{ + my ($self, $attr, $val, $specified) = @_; +#?? should it be specified? + + $self->fail ($attr, 133, "invalid ENTITY name [$val]") + unless $val =~ /^$XML::RegExp::Name$/; + + $self->fail ($attr, 102, "undefined unparsed ENTITY [$val]") + unless exists $self->{Checker}->{Unparsed}->{$val}; +} + +sub ENTITIES +{ + my ($self, $attr, $val, $specified) = @_; + for (split /\s+/, $val) + { + $self->ENTITY ($attr, $_); + } +} + +sub NMTOKEN +{ + my ($self, $attr, $val, $specified) = @_; + $self->fail ($attr, 130, "invalid NMTOKEN [$val]") + unless $val =~ /^$XML::RegExp::NmToken$/; +} + +sub NMTOKENS +{ + my ($self, $attr, $val, $specified) = @_; + for (split /\s+/, $val) + { + $self->NMTOKEN ($attr, $_, $specified); + } +} + +sub Enumeration +{ + my ($self, $attr, $val, $specified) = @_; + my $re = $self->{EnumRE}->{$attr}; + + $self->fail ($attr, 160, "invalid Enumeration value [$val]") + unless $val =~ /$re/; +} + +sub NotationType +{ + my ($self, $attr, $val, $specified) = @_; + my $re = $self->{NotationRE}; + + $self->fail ($attr, 161, "invalid NOTATION value [$val]") + unless $val =~ /$re/; + + $self->fail ($attr, 162, "undefined NOTATION [$val]") + unless exists $self->{Checker}->{NOTATION}->{$val}; +} + +package XML::Checker; +use vars qw ( $VERSION $FAIL $INSIGNIF_WS ); + +BEGIN +{ + $VERSION = '0.09'; +} + +$FAIL = \&print_error; + +# Whether the last seen Char data was insignicant whitespace +$INSIGNIF_WS = 0; + +sub new +{ + my ($class, %args) = @_; + + $args{ERule} = {}; + $args{ARule} = {}; + $args{InCDATA} = 0; + +# $args{Debug} = 1; + bless \%args, $class; +} + +# PerlSAX API +sub element_decl +{ + my ($self, $hash) = @_; + $self->Element ($hash->{Name}, $hash->{Model}); +} + +# Same parameter order as the Element handler in XML::Parser module +sub Element +{ + my ($self, $name, $model) = @_; + + if (defined $self->{ERule}->{$name}) + { + $self->fail (115, "ELEMENT [$name] already defined", + Element => $name); + } + + if ($model eq "EMPTY") + { + $self->{ERule}->{$name} = new XML::Checker::ERule::EMPTY; + } + elsif ($model eq "ANY") + { + $self->{ERule}->{$name} = new XML::Checker::ERule::ANY; + } + elsif ($model =~ /#PCDATA/) + { + my $rule = new XML::Checker::ERule::Mixed; + if ($rule->setModel ($model)) + { + $self->{ERule}->{$name} = $rule; + } + else + { + $self->fail (124, "bad model [$model] for ELEMENT [$name]", + Element => $name); + } + } + else + { + my $rule = new XML::Checker::ERule::Children; + if ($rule->setModel ($model)) + { + $self->{ERule}->{$name} = $rule; + } + else + { + $self->fail (124, "bad model [$model] for ELEMENT [$name]", + Element => $name); + } + } + my $rule = $self->{ERule}->{$name}; + print "added ELEMENT model for $name: " . $rule->debug . "\n" + if $rule and $self->{Debug}; +} + +# PerlSAX API +sub attlist_decl +{ + my ($self, $hash) = @_; + $self->Attlist ($hash->{ElementName}, $hash->{AttributeName}, + $hash->{Type}, $hash->{Default}, $hash->{Fixed}); +} + +sub Attlist +{ + my ($self, $tag, $attrName, $type, $default, $fixed) = @_; + my $arule = $self->{ARule}->{$tag} ||= + new XML::Checker::ARule ($tag, $self); + + $arule->Attlist ($attrName, $type, $default, $fixed, $self); +} + +# Initializes the context stack to check an XML::DOM::Element +sub InitDomElem +{ + my $self = shift; + + # initialize Context stack + $self->{Context} = [ new XML::Checker::Context::ANY ($self) ]; + $self->{InCDATA} = 0; +} + +# Clears the context stack after checking an XML::DOM::Element +sub FinalDomElem +{ + my $self = shift; + delete $self->{Context}; +} + +# PerlSAX API +sub start_document +{ + shift->Init; +} + +sub Init +{ + my $self = shift; + + # initialize Context stack + $self->{Context} = [ new XML::Checker::DocContext ($self) ]; + $self->{InCDATA} = 0; +} + +# PerlSAX API +sub end_document +{ + shift->Final; +} + +sub Final +{ + my $self = shift; +#?? could add more statistics: unreferenced Unparsed, ID + + for (keys %{ $self->{IDREF} }) + { + my $n = $self->{IDREF}->{$_}; + $self->fail (200, "undefined ID [$_] was referenced [$n] times") + unless defined $self->{ID}->{$_}; + } + + for (keys %{ $self->{ID} }) + { + my $n = $self->{IDREF}->{$_} || 0; + $self->fail (300, "[$n] references to ID [$_]"); + } + + delete $self->{Context}; +} + +sub getRootElement +{ + my $self = shift; +# print "getRoot $self " . $self->{RootElement} . "\n"; + $_[0]->{RootElement}; +} + +# PerlSAX API +sub doctype_decl +{ + my ($self, $hash) = @_; + $self->Doctype ($hash->{Name}, $hash->{SystemId}, + $hash->{PublicId}, $hash->{Internal}); +} + +sub Doctype +{ + my ($self, $name, $sysid, $pubid, $internal) = @_; + $self->{RootElement} = $name; + + my $context = $self->{Context}->[0]; + $context->setRootElement ($name); + +#?? what else +} + +sub Attr +{ + my ($self, $tag, $attr, $val, $specified) = @_; + +#print "Attr for tag=$tag attr=$attr val=$val spec=$specified\n"; + + my $arule = $self->{ARule}->{$tag}; + if (defined $arule && $arule->{Defined}->{$attr}) + { + $arule->check ($attr, $val, $specified); + } + else + { + $self->fail (103, "undefined attribute [$attr]", Element => $tag); + } +} + +sub EndAttr +{ + my $self = shift; + + my $arule = $self->{CurrARule}; + if (defined $arule) + { + $arule->EndAttr; + } +} + +# PerlSAX API +sub start_element +{ + my ($self, $hash) = @_; + my $tag = $hash->{Name}; + my $attr = $hash->{Attributes}; + + $self->Start ($tag); + + if (exists $hash->{AttributeOrder}) + { + my $defaulted = $hash->{Defaulted}; + my @order = @{ $hash->{AttributeOrder} }; + + # Specified attributes + for (my $i = 0; $i < $defaulted; $i++) + { + my $a = $order[$i]; + $self->Attr ($tag, $a, $attr->{$a}, 1); + } + + # Defaulted attributes + for (my $i = $defaulted; $i < @order; $i++) + { + my $attr = $order[$i]; + $self->Attr ($tag, $a, $attr->{$a}, 0); + } + } + else + { + # Assume all attributes were specified + my @attr = %$attr; + my ($key, $val); + while ($key = shift @attr) + { + $val = shift @attr; + + $self->Attr ($tag, $key, $val, 1); + } + } + $self->EndAttr; +} + +sub Start +{ + my ($self, $tag) = @_; +#?? if first tag, check with root element - or does expat check this already? + + my $context = $self->{Context}; + $context->[0]->Start ($self, $tag); + + my $erule = $self->{ERule}->{$tag}; + if (defined $erule) + { + unshift @$context, $erule->context; + } + else + { + # It's not a real error according to the XML Spec. + $self->fail (101, "undefined ELEMENT [$tag]"); + unshift @$context, new XML::Checker::Context::ANY; + } + +#?? what about ARule ?? + my $arule = $self->{ARule}->{$tag}; + if (defined $arule) + { + $self->{CurrARule} = $arule; + $arule->StartAttr; + } +} + +# PerlSAX API +sub end_element +{ + shift->End; +} + +sub End +{ + my ($self) = @_; + my $context = $self->{Context}; + + $context->[0]->End ($self); + shift @$context; +} + +# PerlSAX API +sub characters +{ + my ($self, $hash) = @_; + my $data = $hash->{Data}; + + if ($self->{InCDATA}) + { + $self->CData ($data); + } + else + { + $self->Char ($data); + } +} + +# PerlSAX API +sub start_cdata +{ + $_[0]->{InCDATA} = 1; +} + +# PerlSAX API +sub end_cdata +{ + $_[0]->{InCDATA} = 0; +} + +sub Char +{ + my ($self, $text) = @_; + my $context = $self->{Context}; + + # NOTE: calls to isWS may set this to 1. + $INSIGNIF_WS = 0; + + $context->[0]->Char ($self, $text); +} + +# Treat CDATASection same as Char (Text) +sub CData +{ + my ($self, $cdata) = @_; + my $context = $self->{Context}; + + $context->[0]->Char ($self, $cdata); + + # CDATASection can never be insignificant whitespace + $INSIGNIF_WS = 0; +#?? I'm not sure if this assumption is correct +} + +# PerlSAX API +sub comment +{ + my ($self, $hash) = @_; + $self->Comment ($hash->{Data}); +} + +sub Comment +{ +# ?? what can be checked here? +} + +# PerlSAX API +sub entity_reference +{ + my ($self, $hash) = @_; + $self->EntityRef ($hash->{Name}, 0); +#?? parameter entities (like %par;) are NOT supported! +# PerlSAX::handle_default should be fixed! +} + +sub EntityRef +{ + my ($self, $ref, $isParam) = @_; + + if ($isParam) + { + # expand to "%name;" + print STDERR "XML::Checker::Entity - parameter Entity (%ent;) not implemented\n"; + } + else + { + # Treat same as Char - for now + my $context = $self->{Context}; + $context->[0]->Char ($self, "&$ref;"); + $INSIGNIF_WS = 0; +#?? I could count the number of times each Entity is referenced + } +} + +# PerlSAX API +sub unparsed_entity_decl +{ + my ($self, $hash) = @_; + $self->Unparsed ($hash->{Name}); +#?? what about Base, SytemId, PublicId ? +} + +sub Unparsed +{ + my ($self, $entity) = @_; +# print "ARule::Unparsed $entity\n"; + if ($self->{Unparsed}->{$entity}) + { + $self->fail (112, "unparsed ENTITY [$entity] already defined"); + } + else + { + $self->{Unparsed}->{$entity} = 1; + } +} + +# PerlSAX API +sub notation_decl +{ + my ($self, $hash) = @_; + $self->Notation ($hash->{Name}); +#?? what about Base, SytemId, PublicId ? +} + +sub Notation +{ + my ($self, $notation) = @_; + if ($self->{NOTATION}->{$notation}) + { + $self->fail (113, "NOTATION [$notation] already defined"); + } + else + { + $self->{NOTATION}->{$notation} = 1; + } +} + +# PerlSAX API +sub entity_decl +{ + my ($self, $hash) = @_; + + $self->Entity ($hash->{Name}, $hash->{Value}, $hash->{SystemId}, + $hash->{PublicId}, $hash->{'Notation'}); +} + +sub Entity +{ + my ($self, $name, $val, $sysId, $pubId, $ndata) = @_; + + if (exists $self->{ENTITY}->{$name}) + { + $self->fail (114, "ENTITY [$name] already defined"); + } + else + { + $self->{ENTITY}->{$name} = $val; + } +} + +# PerlSAX API +#sub xml_decl {} $hash=> Version, Encoding, Standalone +# Don't implement resolve_entity() which is called by ExternEnt! +#sub processing_instruction {} $hash=> Target, Data + +# Returns whether the Char data is whitespace and also updates the +# $INSIGNIF_WS variable to indicate whether it is insignificant whitespace. +# Note that this method is only called in places where potential whitespace +# can be insignificant (i.e. when the ERule is Children or EMPTY) +sub isWS +{ + $INSIGNIF_WS = ($_[1] =~ /^\s*$/); +} + +sub isInsignifWS +{ + $INSIGNIF_WS; +} + +sub fail +{ + my $self = shift; + &$FAIL (@_); +} + +sub print_error # static +{ + my $str = error_string (@_); + print STDERR $str; +} + +sub error_string # static +{ + my $code = shift; + my $msg = shift; + + my @a = (); + my ($key, $val); + while ($key = shift) + { + $val = shift; + push @a, ("$key " . (defined $val ? $val : "(undef)")); + } + + my $cat = $code >= 200 ? ($code >= 300 ? "INFO" : "WARNING") : "ERROR"; + my $str = join (", ", @a); + $str = length($str) ? "\tContext: $str\n" : ""; + + "XML::Checker $cat-$code: $msg\n$str"; +} + +sub debug +{ + my ($self) = @_; + my $context = $self->{Context}->[0]; + my $c = $context ? $context->debug : "no context"; + my $root = $self->{RootElement}; + + "Checker[$c,RootElement=$root]"; +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Checker - A perl module for validating XML + +=head1 SYNOPSIS + +L - an L that validates at parse time + +L - an L that validates at parse time + +(Some of the package names may change! This is only an alpha release...) + +=head1 DESCRIPTION + +XML::Checker can be used in different ways to validate XML. See the manual +pages of L and L +for more information. + +This document only describes common topics like error handling +and the XML::Checker class itself. + +WARNING: Not all errors are currently checked. Almost everything is subject to +change. Some reported errors may not be real errors. + +=head1 ERROR HANDLING + +Whenever XML::Checker (or one of the packages that uses XML::Checker) detects a +potential error, the 'fail handler' is called. It is currently also called +to report information, like how many times an Entity was referenced. +(The whole error handling mechanism is subject to change, I'm afraid...) + +The default fail handler is XML::Checker::print_error(), which prints an error +message to STDERR. It does not stop the XML::Checker, so it will continue +looking for other errors. +The error message is created with XML::Checker::error_string(). + +You can define your +own fail handler in two ways, locally and globally. Use a local variable to +temporarily override the fail handler. This way the default fail handler is restored +when the local variable goes out of scope, esp. when exceptions are thrown e.g. + + # Using a local variable to temporarily override the fail handler (preferred) + { # new block - start of local scope + local $XML::Checker::FAIL = \&my_fail; + ... your code here ... + } # end of block - the previous fail handler is restored + +You can also set the error handler globally, risking that your code may not +be reusable or may clash with other modules that use XML::Checker. + + # Globally setting the fail handler (not recommended) + $XML::Checker::FAIL = \&my_fail; + ... rest of your code ... + +The fail handler is called with the following parameters ($code, $msg, @context), +where $code is the error code, $msg is the error description and +@context contains information on where the error occurred. The @context is +a (ordered) list of (key,value) pairs and can easily be turned into a hash. +It contains the following information: + + Element - tag name of Element node (if applicable) + Attr - attribute name (if applicable) + ChildElementIndex - if applicable (see error 157) + line - only when parsing + column - only when parsing + byte - only when parsing (-1 means: end of file) + +Some examples of fail handlers: + + # Don't print info messages + sub my_fail + { + my $code = shift; + print STDERR XML::Checker::error_message ($code, @_) + if $code < 300; + } + + # Die when the first error is encountered - this will stop + # the parsing process. Ignore information messages. + sub my_fail + { + my $code = shift; + die XML::Checker::error_message ($code, @_) if $code < 300; + } + + # Count the number of undefined NOTATION references + # and print the error as usual + sub my_fail + { + my $code = shift; + $count_undef_notations++ if $code == 100; + XML::Checker::print_error ($code, @_); + } + + # Die when an error is encountered. + # Don't die if a warning or info message is encountered, just print a message. + sub my_fail { + my $code = shift; + die XML::Checker::error_string ($code, @_) if $code < 200; + XML::Checker::print_error ($code, @_); + } + +=head1 INSIGNIFICANT WHITESPACE + +XML::Checker keeps track of whether whitespace found in character data +is significant or not. It is considered insignicant if it is found inside +an element that has a ELEMENT rule that is not of type Mixed or of type ANY. +(A Mixed ELEMENT rule does contains the #PCDATA keyword. +An ANY rule contains the ANY keyword. See the XML spec for more info.) + +XML::Checker can not determine whether whitespace is insignificant in those two +cases, because they both allow regular character data to appear within +XML elements and XML::Checker can therefore not deduce whether whitespace +is part of the actual data or was just added for readability of the XML file. + +XML::Checker::Parser and XML::DOM::ValParser both have the option to skip +insignificant whitespace when setting B to 1 in their constructor. +If set, they will not call the Char handler when insignificant whitespace is +encountered. This means that in XML::DOM::ValParser no Text nodes are created +for insignificant whitespace. + +Regardless of whether the SkipInsignifWS options is set, XML::Checker always +keeps track of whether whitespace is insignificant. After making a call to +XML::Checker's Char handler, you can find out if it was insignificant whitespace +by calling the isInsignifWS method. + +When using multiple (nested) XML::Checker instances or when using XML::Checker +without using XML::Checker::Parser or XML::DOM::ValParser (which hardly anybody +probably will), make sure to set a local variable in the scope of your checking +code, e.g. + + { # new block - start of local scope + local $XML::Checker::INSIGNIF_WS = 0; + ... insert your code here ... + } # end of scope + +=head1 ERROR CODES + +There are 3 categories, errors, warnings and info messages. +(The codes are still subject to change, as well the error descriptions.) + +Most errors have a link to the appropriate Validaty Constraint (B) +or other section in the XML specification. + +=head2 ERROR Messages + +=head2 100 - 109 + +=over 4 + +=item * + +B<100> - undefined NOTATION [$notation] in ATTLIST + +The ATTLIST contained a Notation reference that was not defined in a +NOTATION definition. +B L + + +=item * + +B<101> - undefined ELEMENT [$tagName] + +The specified Element was never defined in an ELEMENT definition. +This is not an error according to the XML spec. +See L + + +=item * + +B<102> - undefined unparsed ENTITY [$entity] + +The attribute value referenced an undefined unparsed entity. +B L + + +=item * + +B<103> - undefined attribute [$attrName] + +The specified attribute was not defined in an ATTLIST for that Element. +B L + + +=back + +=head2 110 - 119 + +=over 4 + +=item * + +B<110> - attribute [$attrName] of element [$tagName] already defined + +The specified attribute was already defined in this ATTLIST definition or +in a previous one. +This is not an error according to the XML spec. +See L + + +=item * + +B<111> - ID [$value] already defined + +An ID with the specified value was already defined in an attribute +within the same document. +B L + + +=item * + +B<112> - unparsed ENTITY [$entity] already defined + +This is not an error according to the XML spec. +See L + + +=item * + +B<113> - NOTATION [$notation] already defined + + +=item * + +B<114> - ENTITY [$entity] already defined + +This is not an error according to the XML spec. +See L + + +=item * + +B<115> - ELEMENT [$name] already defined +B L + + +=back + +=head2 120 - 129 + +=over 4 + +=item * + +B<120> - invalid default ENTITY [$default] + +(Or IDREF or NMTOKEN instead of ENTITY.) +The ENTITY, IDREF or NMTOKEN reference in the default attribute +value for an attribute with types ENTITY, IDREF or NMTOKEN was not +valid. +B L + + +=item * + +B<121> - invalid default [$token] in ENTITIES [$default] + +(Or IDREFS or NMTOKENS instead of ENTITIES) +One of the ENTITY, IDREF or NMTOKEN references in the default attribute +value for an attribute with types ENTITIES, IDREFS or NMTOKENS was not +valid. +B L + + +=item * + +B<122> - invalid default attribute value [$default] + +The specified default attribute value is not a valid attribute value. +B L + + +=item * + +B<123> - invalid default ID [$default], must be #REQUIRED or #IMPLIED + +The default attribute value for an attribute of type ID has to be +#REQUIRED or #IMPLIED. +B L + + +=item * + +B<124> - bad model [$model] for ELEMENT [$name] + +The model in the ELEMENT definition did not conform to the XML syntax +for Mixed models. +See L + + +=back + +=head2 130 - 139 + +=over 4 + +=item * + +B<130> - invalid NMTOKEN [$attrValue] + +The attribute value is not a valid NmToken token. +B L + + +=item * + +B<131> - invalid ID [$attrValue] + +The specified attribute value is not a valid Name token. +B L + + +=item * + +B<132> - invalid IDREF [$value] + +The specified attribute value is not a valid Name token. +B L + + +=item * + +B<133> - invalid ENTITY name [$name] + +The specified attribute value is not a valid Name token. +B L + + +=item * + +B<134> - invalid Enumeration value [$value] in ATTLIST + +The specified value is not a valid NmToken (see XML spec for def.) +See definition of L + + +=item * + +B<135> - empty NOTATION list in ATTLIST + +The NOTATION list of the ATTLIST definition did not contain any NOTATION +references. +See definition of L + + +=item * + +B<136> - empty Enumeration list in ATTLIST + +The ATTLIST definition of the attribute of type Enumeration did not +contain any values. +See definition of L + + +=item * + +B<137> - invalid ATTLIST type [$type] + +The attribute type has to be one of: ID, IDREF, IDREFS, ENTITY, ENTITIES, +NMTOKEN, NMTOKENS, CDATA, NOTATION or an Enumeration. +See definition of L + + +=back + +=head2 150 - 159 + +=over 4 + +=item * + +B<150> - bad #FIXED attribute value [$value], it should be [$default] + +The specified attribute was defined as #FIXED in the ATTLIST definition +and the found attribute $value differs from the specified $default value. +B L + + +=item * + +B<151> - only one ID allowed in ATTLIST per element first=[$attrName] + +The ATTLIST definitions for an Element may contain only one attribute +with the type ID. The specified $attrName is the one that was found first. +B L + + +=item * + +B<152> - Element should be EMPTY, found Element [$tagName] + +The ELEMENT definition for the specified Element said it should be +EMPTY, but a child Element was found. +B L + + +=item * + +B<153> - Element should be EMPTY, found text [$text] + +The ELEMENT definition for the specified Element said it should be +EMPTY, but text was found. Currently, whitespace is not allowed between the +open and close tag. (This may be wrong, please give feedback.) +To allow whitespace (subject to change), set: + + $XML::Checker::Context::EMPTY::ALLOW_WHITE_SPACE = 1; + +B L + + +=item * + +B<154> - bad order of Elements Found=[$found] RE=[$re] + +The child elements of the specified Element did not match the +regular expression found in the ELEMENT definition. $found contains +a comma separated list of all the child element tag names that were found. +$re contains the (decoded) regular expression that was used internally. +B L + + +=item * + +B<155> - more than one root Element [$tags] + +An XML Document may only contain one Element. +$tags is a comma separated list of element tag names encountered sofar. +L (expat) throws 'no element found' exception. +See two_roots.xml for an example. +See definition of L + + +=item * + +B<156> - unexpected root Element [$tagName], expected [$rootTagName] + +The tag name of the root Element of the XML Document differs from the name +specified in the DOCTYPE section. +L (expat) throws 'not well-formed' exception. +See bad_root.xml for an example. +B L + + +=item * + +B<157> - unexpected Element [$tagName] + +The ELEMENT definition for the specified Element does not allow child +Elements with the specified $tagName. +B L + +The error context contains ChildElementIndex which is the index within +its parent Element (counting only Element nodes.) + + +=item * + +B<158> - unspecified value for #IMPLIED attribute [$attrName] + +The ATTLIST for the specified attribute said the attribute was #IMPLIED, +which means the user application should supply a value, but the attribute +value was not specified. (User applications should pass a value and set +$specified to 1 in the Attr handler.) + + +=item * + +B<159> - unspecified value for #REQUIRED attribute [$attrName] + +The ATTLIST for the specified attribute said the attribute was #REQUIRED, +which means that a value should have been specified. +B L + + +=back + +=head2 160 - 169 + +=over 4 + +=item * + +B<160> - invalid Enumeration value [$attrValue] + +The specified attribute value does not match one of the Enumeration values +in the ATTLIST. +B L + + +=item * + +B<161> - invalid NOTATION value [$attrValue] + +The specifed attribute value was not found in the list of possible NOTATION +references as found in the ATTLIST definition. +B L + + +=item * + +B<162> - undefined NOTATION [$attrValue] + +The NOTATION referenced by the specified attribute value was not defined. +B L + + +=back + +=head2 WARNING Messages (200 and up) + +=over 4 + +=item * + +B<200> - undefined ID [$id] was referenced [$n] times + +The specified ID was referenced $n times, but never defined in an attribute +value with type ID. +B L + + +=back + +=head2 INFO Messages (300 and up) + +=over 4 + +=item * + +B<300> - [$n] references to ID [$id] + +The specified ID was referenced $n times. + + +=back + +=head2 Not checked + +The following errors are already checked by L (expat) and +are currently not checked by XML::Checker: + +(?? TODO - add more info) + +=over 4 + +=item root element is missing + +L (expat) throws 'no element found' exception. +See no_root.xml for an example. + +=back + +=head1 XML::Checker + +XML::Checker can be easily plugged into your application. +It uses mostly the same style of event handlers (or callbacks) as L. +See L manual page for descriptions of most handlers. + +It also implements PerlSAX style event handlers. See L. + +Currently, the XML::Checker object is a blessed hash with the following +(potentially useful) entries: + + $checker->{RootElement} - root element name as found in the DOCTYPE + $checker->{NOTATION}->{$notation} - is 1 if the NOTATION was defined + $checker->{ENTITY}->{$name} - contains the (first) ENTITY value if defined + $checker->{Unparsed}->{$entity} - is 1 if the unparsed ENTITY was defined + $checker->{ID}->{$id} - is 1 if the ID was defined + $checker->{IDREF}->{$id} - number of times the ID was referenced + + # Less useful: + $checker->{ERule}->{$tag} - the ELEMENT rules by Element tag name + $checker->{ARule}->{$tag} - the ATTLIST rules by Element tag name + $checker->{Context} - context stack used internally + $checker->{CurrARule} - current ATTLIST rule for the current Element + +=head2 XML:Checker methods + +This section is only interesting when using XML::Checker directly. +XML::Checker supports most event handlers that L supports with minor +differences. Note that the XML::Checker event handler methods are +instance methods and not static, so don't forget to call them like this, +without passing $expat (as in the L) handlers: + + $checker->Start($tagName); + +=over 4 + +=item Constructor + + $checker = new XML::Checker; + $checker = new XML::Checker (%user_args); + +User data may be stored by client applications. Only $checker->{User} is +guaranteed not to clash with internal hash keys. + +=item getRootElement () + + $tagName = $checker->getRootElement; + +Returns the root element name as found in the DOCTYPE + +=back + +=head2 Expat interface + +XML::Checker supports what I call the I interface, which is +the collection of methods you normally specify as the callback handlers +when using XML::Parser. + +Only the following L handlers are currently supported: +Init, Final, Char, Start, End, Element, Attlist, Doctype, +Unparsed, Entity, Notation. + +I don't know how to correctly support the Default handler for all L +releases. The Start handler works a little different (see below) and I +added Attr, InitDomElem, FinalDomElem, CDATA and EntityRef handlers. +See L for a description of the handlers that are not listed below. + +Note that this interface may disappear, when the PerlSAX interface stabilizes. + +=over 4 + +=item Start ($tag) + + $checker->Start($tag); + +Call this when an Element with the specified $tag name is encountered. +Different from the Start handler in L, in that no attributes +are passed in (use the Attr handler for those.) + +=item Attr ($tag, $attrName, $attrValue, $isSpecified) + + $checker->Attr($tag,$attrName,$attrValue,$spec); + +Checks an attribute with the specified $attrName and $attrValue against the +ATTLIST definition of the element with the specified $tag name. +$isSpecified means whether the attribute was specified (1) or defaulted (0). + +=item EndAttr () + + $checker->EndAttr; + +This should be called after all attributes are passed with Attr(). +It will check which of the #REQUIRED attributes were not specified and generate +the appropriate error (159) for each one that is missing. + +=item CDATA ($text) + + $checker->CDATA($text); + +This should be called whenever CDATASections are encountered. +Similar to Char handler (but might perform different checks later...) + +=item EntityRef ($entity, $isParameterEntity) + + $checker->EntityRef($entity,$isParameterEntity); + +Checks the ENTITY reference. Set $isParameterEntity to 1 for +entity references that start with '%'. + +=item InitDomElem () and FinalDomElem () + +Used by XML::DOM::Element::check() to initialize (and cleanup) the +context stack when checking a single element. + +=back + +=head2 PerlSAX interface + +XML::Checker now also supports the PerlSAX interface, so you can use XML::Checker +wherever you use PerlSAX handlers. + +XML::Checker implements the following methods: start_document, end_document, +start_element, end_element, characters, processing_instruction, comment, +start_cdata, end_cdata, entity_reference, notation_decl, unparsed_entity_decl, +entity_decl, element_decl, attlist_decl, doctype_decl, xml_decl + +Not implemented: set_document_locator, ignorable_whitespace + +See PerlSAX.pod for details. (It is called lib/PerlSAX.pod in the libxml-perl +distribution which can be found at CPAN.) + +=head1 CAVEATS + +This is an alpha release. Almost everything is subject to change. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=head1 SEE ALSO + +The home page of XML::Checker at L + +The XML spec (Extensible Markup Language 1.0) at L + +The L and L manual pages. + +The other packages that come with XML::Checker: +L, L + +The DOM Level 1 specification at L + +The PerlSAX specification. It is currently in lib/PerlSAX.pod in the +libxml-perl distribution by Ken MacLeod. + +The original SAX specification (Simple API for XML) can be found at +L and L diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Checker/DOM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Checker/DOM.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,4 @@ +BEGIN +{ + warn "XML::Checker::DOM has been deprecated. The methods have been merged into XML::DOM." +} diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Checker/Parser.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Checker/Parser.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,683 @@ +package XML::Checker::Parser; +use strict; +use XML::Parser; +use XML::Checker; + +use vars qw( @ISA @InterceptedHandlers @SGML_SEARCH_PATH %URI_MAP + $_checker $_prevFAIL + $_Init $_Final $_Char $_Start $_End $_Element $_Attlist + $_Doctype $_Unparsed $_Notation $_Entity $_skipInsignifWS + $_EndOfDoc + ); + +@ISA = qw( XML::Parser ); + +@InterceptedHandlers = qw( Init Final Char Start End Element Attlist + Doctype Unparsed Notation Entity ); + +# Where to search for external DTDs (in local file system) +@SGML_SEARCH_PATH = (); + +# Where to search for external DTDs as referred to by public ID in a +# statement, e.g. "-//W3C//DTD HTML 4.0//EN" +# E.g. it could map "-//W3C//DTD HTML 4.0//EN" to "file:/user/html.dtd" +%URI_MAP = (); + +sub new +{ + my ($class, %args) = @_; + + my $super = new XML::Parser (%args); + $super->{Checker} = new XML::Checker (%args); + + my %handlers = %{$super->{Handlers}}; + + # Don't need Comment handler - assuming comments are allowed anywhere +#?? What should Default handler do? +#?? Check XMLDecl, ExternEnt, Proc? No, for now. +#?? Add CdataStart, CdataEnd support? + + for (@InterceptedHandlers) + { + my $func = "XML::Checker::Parser::$_"; + $handlers{$_} = \&$func; + } + + $super->{UserHandlers} = $super->{Handlers}; + $super->{Handlers} = \%handlers; + + bless $super, $class; +} + +sub getChecker +{ + $_[0]->{Checker} +} + +sub parse +{ + my $self = shift; + my $uh = $self->{UserHandlers}; + + local $_checker = $self->{Checker}; + + local $_Init = $uh->{Init}; + local $_Final = $uh->{Final}; + local $_Start = $uh->{Start}; + local $_End = $uh->{End}; + local $_Char = $uh->{Char}; + local $_Element = $uh->{'Element'}; + local $_Attlist = $uh->{'Attlist'}; + local $_Doctype = $uh->{Doctype}; + local $_Unparsed = $uh->{Unparsed}; + local $_Notation = $uh->{Notation}; + local $_Entity = $uh->{Entity}; + + local $_prevFAIL = $XML::Checker::FAIL; + local $XML::Checker::FAIL = \&fail_add_context; + + local $XML::Checker::INSIGNIF_WS = 0; + local $_skipInsignifWS = $self->{SkipInsignifWS}; + + local $_EndOfDoc = 0; + + $self->SUPER::parse (@_); +} + +my $LWP_USER_AGENT; +sub set_LWP_UserAgent # static +{ + $LWP_USER_AGENT = shift; +} + +sub load_URL # static +{ + my ($url, $lwp_user_agent) = @_; + my $result; + + # Read the file from the web with LWP. + # + # Note that we read in the entire file, which may not be ideal + # for large files. LWP::UserAgent also provides a callback style + # request, which we could convert to a stream with a fork()... + + my $response; + eval + { + use LWP::UserAgent; + + my $ua = $lwp_user_agent; + unless (defined $ua) + { + unless (defined $LWP_USER_AGENT) + { + $LWP_USER_AGENT = LWP::UserAgent->new; + + # Load proxy settings from environment variables, i.e.: + # http_proxy, ftp_proxy, no_proxy etc. (see LWP::UserAgent(3)) + # You need these to go thru firewalls. + $LWP_USER_AGENT->env_proxy; + } + $ua = $LWP_USER_AGENT; + } + my $req = new HTTP::Request 'GET', $url; + $response = $LWP_USER_AGENT->request ($req); + $result = $response->content; + }; + if ($@) + { + die "Couldn't load URL [$url] with LWP: $@"; + } + if (!$result) + { + my $message = $response->as_string; + die "Couldn't load URL [$url] with LWP: $message"; + } + return $result; +} + +sub parsefile +{ + my $self = shift; + my $url = shift; + + # Any other URL schemes? + if ($url =~ /^(https?|ftp|wais|gopher|file):/) + { + my $xml = load_URL ($url, $self->{LWP_UserAgent}); + my $result; + eval + { + # Parse the result of the HTTP request + $result = $self->parse ($xml, @_); + }; + if ($@) + { + die "Couldn't parsefile [$url]: $@"; + } + return $result; + } + else + { + return $self->SUPER::parsefile ($url, @_); + } +} + +sub Init +{ + my $expat = shift; + $_checker->{Expat} = $expat; + + $_checker->Init (@_); + &$_Init ($expat) if $_Init; +} + +sub Final +{ + my $expat = shift; + $_EndOfDoc = 1; + + $_checker->Final (@_); + my $result = &$_Final ($expat) if $_Final; + + # Decouple Expat from Checker + delete $_checker->{Expat}; + + # NOTE: Checker is not decoupled + return $result; +} + +sub Start +{ + my ($expat, $tag, @attr) = @_; + + $_checker->Start ($tag); + + my $num_spec = $expat->specified_attr; + for (my $i = 0; $i < @attr; $i++) + { + my $spec = ($i < $num_spec); + my $attr = $attr[$i]; + my $val = $attr[++$i]; + +# print "--- $tag $attr $val $spec\n"; + $_checker->Attr ($tag, $attr, $val, $spec); + } + $_checker->EndAttr; + + &$_Start ($expat, $tag, @attr) if $_Start; +} + +sub End +{ + my $expat = shift; + $_checker->End (@_); + &$_End ($expat, @_) if $_End; +} + +sub Char +{ + my $expat = shift; + $_checker->Char (@_); + &$_Char ($expat, @_) + if $_Char && !($XML::Checker::INSIGNIF_WS && $_skipInsignifWS); + # Skip insignificant whitespace +} + +sub Element +{ + my $expat = shift; + $_checker->Element (@_); + &$_Element ($expat, @_) if $_Element; +} + +sub Attlist +{ + my $expat = shift; + $_checker->Attlist (@_); + &$_Attlist ($expat, @_) if $_Attlist; +} + + +sub Doctype +{ + my $expat = shift; + my ($name, $sysid, $pubid, $internal) = @_; + + my $dtd; + unless ($_checker->{SkipExternalDTD}) + { + if ($sysid) + { + # External DTD... + + #?? I'm not sure if we should die here or keep going? + $dtd = load_DTD ($sysid, $expat->{LWP_UserAgent}); + } + elsif ($pubid) + { + $dtd = load_DTD ($pubid, $expat->{LWP_UserAgent}); + } + } + + if (defined $dtd) + { +#?? what about passing ProtocolEncoding, Namespaces, Stream_Delimiter ? + my $parser = new XML::Parser ( + Checker => $_checker, + ErrorContext => $expat->{ErrorContext}, + Handlers => { + Entity => \&XML::Checker::Parser::ExternalDTD::Entity, + Notation => \&XML::Checker::Parser::ExternalDTD::Notation, + Element => \&XML::Checker::Parser::ExternalDTD::Element, + Attlist => \&XML::Checker::Parser::ExternalDTD::Attlist, + Unparsed => \&XML::Checker::Parser::ExternalDTD::Unparsed, + }); + + eval + { + $parser->parse ("\n<$name/>"); + }; + if ($@) + { + die "Couldn't parse contents of external DTD <$sysid> :$@"; + } + } + $_checker->Doctype (@_); + &$_Doctype ($expat, @_) if $_Doctype; +} + +sub Unparsed +{ + my $expat = shift; + $_checker->Unparsed (@_); + &$_Unparsed ($expat, @_) if $_Unparsed; +} + +sub Entity +{ + my $expat = shift; + $_checker->Entity (@_); + &$_Entity ($expat, @_) if $_Entity; +} + +sub Notation +{ + my $expat = shift; + $_checker->Notation (@_); + &$_Notation ($expat, @_) if $_Notation; +} + +sub Default +{ +#?? what can I check here? +# print "Default handler got[" . join (", ", @_) . "]"; +} + +#sub XMLDecl +#{ +#?? support later? +#} + +sub setHandlers +{ + my ($self, %h) = @_; + + for my $name (@InterceptedHandlers) + { + if (exists $h{$name}) + { + eval "\$_$name = \$h{$name}"; + delete $h{$name}; + } + } + + # Pass remaining handlers to the parent class (XML::Parser) + $self->SUPER::setHandlers (%h); +} + +# Add (line, column, byte) to error context (unless it's EOF) +sub fail_add_context # static +{ + my $e = $_checker->{Expat}; + + my $byte = $e->current_byte; # -1 means: end of XML document + if ($byte != -1 && !$_EndOfDoc) + { + push @_, (line => $e->current_line, + column => $e->current_column, + byte => $byte); + } + &$_prevFAIL (@_); +} + +#-------- STATIC METHODS related to External DTDs --------------------------- + +sub load_DTD # static +{ + my ($sysid, $lwp_user_agent) = @_; + + # See if it is defined in the %URI_MAP + # (Public IDs are stored here, e.g. "-//W3C//DTD HTML 4.0//EN") + if (exists $URI_MAP{$sysid}) + { + $sysid = $URI_MAP{$sysid}; + } + elsif ($sysid !~ /^\w+:/) + { + # Prefix the sysid with 'file:' if it has no protocol identifier + unless ($sysid =~ /^\//) + { + # Not an absolute path. See if it's in SGML_SEARCH_PATH. + my $relative_sysid = $sysid; + + $sysid = find_in_sgml_search_path ($sysid); + if (! $sysid) + { + if ($ENV{'SGML_SEARCH_PATH'}) + { + die "Couldn't find external DTD [$relative_sysid] in SGML_SEARCH_PATH ($ENV{'SGML_SEARCH_PATH'})"; + } + else + { + die "Couldn't find external DTD [$relative_sysid], may be you should set SGML_SEARCH_PATH"; + } + } + } + $sysid = "file:$sysid"; + } + + return load_URL ($sysid, $lwp_user_agent); +} + +sub map_uri # static +{ + %URI_MAP = (%URI_MAP, @_); +} + +sub set_sgml_search_path # static +{ + @SGML_SEARCH_PATH = @_; +} + +sub find_in_sgml_search_path # static +{ + my $file = shift; + + my @dirs = @SGML_SEARCH_PATH; + unless (@dirs) + { + my $path = $ENV{SGML_SEARCH_PATH}; + if ($path) + { + @dirs = split (':', $path); + } + else + { + my $home = $ENV{HOME}; + @dirs = (".", "$home/.sgml", "/usr/lib/sgml", "/usr/share/sgml"); + } + } + + for my $directory (@dirs) + { + if (-e "$directory/$file") + { + return "$directory/$file"; + } + } + return undef; +} + +package XML::Checker::Parser::ExternalDTD; + +sub Element { + my $expat = shift; + $expat->{Checker}->Element(@_); +} + +sub Attlist { + my $expat = shift; + $expat->{Checker}->Attlist(@_); +} + +sub Unparsed { + my $expat = shift; + $expat->{Checker}->Unparsed(@_); +} + +sub Notation { + my $expat = shift; + $expat->{Checker}->Notation(@_); +} + +sub Entity { + my $expat = shift; +# print "Entity: $expat\n"; + $expat->{Checker}->Entity(@_); +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Checker::Parser - an XML::Parser that validates at parse time + +=head1 SYNOPSIS + + use XML::Checker::Parser; + + my %expat_options = (KeepCDATA => 1, + Handlers => [ Unparsed => \&my_Unparsed_handler ]); + my $parser = new XML::Checker::Parser (%expat_options); + + eval { + local $XML::Checker::FAIL = \&my_fail; + $parser->parsefile ("fail.xml"); + }; + if ($@) { + # Either XML::Parser (expat) threw an exception or my_fail() died. + ... your error handling code here ... + } + + # Throws an exception (with die) when an error is encountered, this + # will stop the parsing process. + # Don't die if a warning or info message is encountered, just print a message. + sub my_fail { + my $code = shift; + die XML::Checker::error_string ($code, @_) if $code < 200; + XML::Checker::print_error ($code, @_); + } + +=head1 DESCRIPTION + +XML::Checker::Parser extends L + +I hope the example in the SYNOPSIS says it all, just use +L as if it were an XML::Parser. +See L for the supported (expat) options. + +You can also derive your parser from XML::Checker::Parser instead of +from XML::Parser. All you should have to do is replace: + + package MyParser; + @ISA = qw( XML::Parser ); + +with: + + package MyParser; + @ISA = qw( XML::Checker::Parser ); + +=head1 XML::Checker::Parser constructor + + $parser = new XML::Checker::Parser (SkipExternalDTD => 1, SkipInsignifWS => 1); + +The constructor takes the same parameters as L with the following additions: + +=over 4 + +=item SkipExternalDTD + +By default, it will try to load external DTDs using LWP. You can disable this +by setting SkipExternalDTD to 1. See L for details. + +=item SkipInsignifWS + +By default, it will treat insignificant whitespace as regular Char data. +By setting SkipInsignifWS to 1, the user Char handler will not be called +if insignificant whitespace is encountered. +See L for details. + +=item LWP_UserAgent + +When calling parsefile() with a URL (instead of a filename) or when loading +external DTDs, we use LWP to download the +remote file. By default it will use a L that is created as follows: + + use LWP::UserAgent; + $LWP_USER_AGENT = LWP::UserAgent->new; + $LWP_USER_AGENT->env_proxy; + +Note that L reads proxy settings from your environment variables, +which is what I need to do to get thru our firewall. +If you want to use a different LWP::UserAgent, you can either set +it globally with: + + XML::Checker::Parser::set_LWP_UserAgent ($my_agent); + +or, you can specify it for a specific XML::Checker::Parser by passing it to +the constructor: + + my $parser = new XML::Checker::Parser (LWP_UserAgent => $my_agent); + +Currently, LWP is used when the filename (passed to parsefile) starts with one of +the following URL schemes: http, https, ftp, wais, gopher, or file +(followed by a colon.) If I missed one, please let me know. + +The LWP modules are part of libwww-perl which is available at CPAN. + +=back + +=head1 External DTDs + +XML::Checker::Parser will try to load and parse external DTDs that are +referenced in DOCTYPE definitions unless you set the B +option to 1 (the default setting is 0.) +See L for details on what is not supported by XML::Checker::Parser. + +L (version 2.27 and up) does a much better job at reading external +DTDs, because recently external DTD parsing was added to expat. +Make sure you set the L option B to 1 and the +XML::Checker::Parser option B to 1. +(They can both be set in the XML::Checker::Parser constructor.) + +When external DTDs are parsed by XML::Checker::Parser, they are +located in the following order: + +=over 4 + +=item * + +With the %URI_MAP, which can be set using B. +This hash maps external resource ids (like system ID's and public ID's) +to full path URI's. +It was meant to aid in resolving PUBLIC IDs found in DOCTYPE declarations +after the PUBLIC keyword, e.g. + + + +However, you can also use this to force L to read DTDs from a +different URL than was specified (e.g. from the local file system for +performance reasons.) + +=item * + +on the Internet, if their system identifier starts with a protocol +(like http://...) + +=item * + +on the local disk, if their system identifier starts with a slash +(absolute path) + +=item * + +in the SGML_SEARCH_PATH, if their system identifier is a +relative file name. It will use @SGML_SEARCH_PATH if it was set with +B, or the colon-separated $ENV{SGML_SEARCH_PATH}, +or (if that isn't set) the list (".", "$ENV{'HOME'}/.sgml", "/usr/lib/sgml", +"/usr/share/sgml"), which includes the +current directory, so it should do the right thing in most cases. + +=back + +=head2 Static methods related to External DTDs + +=over 4 + +=item set_sgml_search_path (dir1, dir2, ...) + +External DTDs with relative file paths are looked up using the @SGML_SEARCH_PATH, +which can be set with this method. If @SGML_SEARCH_PATH is never set, it +will use the colon-separated $ENV{SGML_SEARCH_PATH} instead. If neither are set +it uses the list: ".", "$ENV{'HOME'}/.sgml", "/usr/lib/sgml", +"/usr/share/sgml". + +set_sgml_search_path is a static method. + +=item map_uri (pubid => uri, ...) + +To define the location of PUBLIC ids, as found in DOCTYPE declarations +after the PUBLIC keyword, e.g. + + + +call this method, e.g. + + XML::Checker::Parser::map_uri ( + "-//W3C//DTD HTML 4.0//EN" => "file:/user/html.dtd"); + +See L for more info. + +XML::Checker::Parser::map_uri is a static method. + +=back + +=head1 Switching user handlers at parse time + +You should be able to use setHandlers() just as in L. +(Using setHandlers has not been tested yet.) + +=head1 Error handling + +XML::Checker::Parser routes the fail handler through +XML::Checker::Parser::fail_add_context() before calling your fail handler +(i.e. the global fail handler: $XML::Checker::FAIL. +See L.) +It adds the (line, column, byte) information from L to the +error context (unless it was the end of the XML document.) + +=head1 Supported XML::Parser handlers + +Only the following L handlers are currently routed through +L: Init, Final, Char, Start, End, Element, Attlist, Doctype, +Unparsed, Notation. + +=head1 CAVEATS + +When using XML::Checker::Parser to parse external DTDs +(i.e. with SkipExternalDTD => 0), +expect trouble when your external DTD contains parameter entities inside +declarations or conditional sections. The external DTD should probably have +the same encoding as the orignal XML document. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=head1 SEE ALSO + +L (L), L diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,5065 @@ +################################################################################ +# +# Perl module: XML::DOM +# +# By Enno Derksen +# +################################################################################ +# +# To do: +# +# * optimize Attr if it only contains 1 Text node to hold the value +# * fix setDocType! +# +# * BUG: setOwnerDocument - does not process default attr values correctly, +# they still point to the old doc. +# * change Exception mechanism +# * maybe: more checking of sysId etc. +# * NoExpand mode (don't know what else is useful) +# * various odds and ends: see comments starting with "??" +# * normalize(1) could also expand CDataSections and EntityReferences +# * parse a DocumentFragment? +# * encoding support +# +###################################################################### + +###################################################################### +package XML::DOM; +###################################################################### + +use strict; +use vars qw( $VERSION @ISA @EXPORT + $IgnoreReadOnly $SafeMode $TagStyle + %DefaultEntities %DecodeDefaultEntity + ); +use Carp; +use XML::RegExp; + +BEGIN +{ + require XML::Parser; + $VERSION = '1.27'; + + my $needVersion = '2.23'; + die "need at least XML::Parser version $needVersion (current=${XML::Parser::VERSION})" + unless $XML::Parser::VERSION >= $needVersion; + + @ISA = qw( Exporter ); + + # Constants for XML::DOM Node types + @EXPORT = qw( + UNKNOWN_NODE + ELEMENT_NODE + ATTRIBUTE_NODE + TEXT_NODE + CDATA_SECTION_NODE + ENTITY_REFERENCE_NODE + ENTITY_NODE + PROCESSING_INSTRUCTION_NODE + COMMENT_NODE + DOCUMENT_NODE + DOCUMENT_TYPE_NODE + DOCUMENT_FRAGMENT_NODE + NOTATION_NODE + ELEMENT_DECL_NODE + ATT_DEF_NODE + XML_DECL_NODE + ATTLIST_DECL_NODE + ); +} + +#---- Constant definitions + +# Node types + +sub UNKNOWN_NODE () { 0 } # not in the DOM Spec + +sub ELEMENT_NODE () { 1 } +sub ATTRIBUTE_NODE () { 2 } +sub TEXT_NODE () { 3 } +sub CDATA_SECTION_NODE () { 4 } +sub ENTITY_REFERENCE_NODE () { 5 } +sub ENTITY_NODE () { 6 } +sub PROCESSING_INSTRUCTION_NODE () { 7 } +sub COMMENT_NODE () { 8 } +sub DOCUMENT_NODE () { 9 } +sub DOCUMENT_TYPE_NODE () { 10} +sub DOCUMENT_FRAGMENT_NODE () { 11} +sub NOTATION_NODE () { 12} + +sub ELEMENT_DECL_NODE () { 13 } # not in the DOM Spec +sub ATT_DEF_NODE () { 14 } # not in the DOM Spec +sub XML_DECL_NODE () { 15 } # not in the DOM Spec +sub ATTLIST_DECL_NODE () { 16 } # not in the DOM Spec + +%DefaultEntities = +( + "quot" => '"', + "gt" => ">", + "lt" => "<", + "apos" => "'", + "amp" => "&" +); + +%DecodeDefaultEntity = +( + '"' => """, + ">" => ">", + "<" => "<", + "'" => "'", + "&" => "&" +); + +# +# If you don't want DOM warnings to use 'warn', override this method like this: +# +# { # start block scope +# local *XML::DOM::warning = \&my_warn; +# ... your code here ... +# } # end block scope (old XML::DOM::warning takes effect again) +# +sub warning # static +{ + warn @_; +} + +# +# This method defines several things in the caller's package, so you can use named constants to +# access the array that holds the member data, i.e. $self->[_Data]. It assumes the caller's package +# defines a class that is implemented as a blessed array reference. +# Note that this is very similar to using 'use fields' and 'use base'. +# +# E.g. if $fields eq "Name Model", $parent eq "XML::DOM::Node" and +# XML::DOM::Node had "A B C" as fields and it was called from package "XML::DOM::ElementDecl", +# then this code would basically do the following: +# +# package XML::DOM::ElementDecl; +# +# sub _Name () { 3 } # Note that parent class had three fields +# sub _Model () { 4 } +# +# # Maps constant names (without '_') to constant (int) value +# %HFIELDS = ( %XML::DOM::Node::HFIELDS, Name => _Name, Model => _Model ); +# +# # Define XML:DOM::ElementDecl as a subclass of XML::DOM::Node +# @ISA = qw{ XML::DOM::Node }; +# +# # The following function names can be exported into the user's namespace. +# @EXPORT_OK = qw{ _Name _Model }; +# +# # The following function names can be exported into the user's namespace +# # with: import XML::DOM::ElementDecl qw( :Fields ); +# %EXPORT_TAGS = ( Fields => qw{ _Name _Model } ); +# +sub def_fields # static +{ + my ($fields, $parent) = @_; + + my ($pkg) = caller; + + no strict 'refs'; + + my @f = split (/\s+/, $fields); + my $n = 0; + + my %hfields; + if (defined $parent) + { + my %pf = %{"$parent\::HFIELDS"}; + %hfields = %pf; + + $n = scalar (keys %pf); + @{"$pkg\::ISA"} = ( $parent ); + } + + my $i = $n; + for (@f) + { + eval "sub $pkg\::_$_ () { $i }"; + $hfields{$_} = $i; + $i++; + } + %{"$pkg\::HFIELDS"} = %hfields; + @{"$pkg\::EXPORT_OK"} = map { "_$_" } @f; + + ${"$pkg\::EXPORT_TAGS"}{Fields} = [ map { "_$_" } @f ]; +} + +# sub blesh +# { +# my $hashref = shift; +# my $class = shift; +# no strict 'refs'; +# my $self = bless [\%{"$class\::FIELDS"}], $class; +# if (defined $hashref) +# { +# for (keys %$hashref) +# { +# $self->{$_} = $hashref->{$_}; +# } +# } +# $self; +# } + +# sub blesh2 +# { +# my $hashref = shift; +# my $class = shift; +# no strict 'refs'; +# my $self = bless [\%{"$class\::FIELDS"}], $class; +# if (defined $hashref) +# { +# for (keys %$hashref) +# { +# eval { $self->{$_} = $hashref->{$_}; }; +# croak "ERROR in field [$_] $@" if $@; +# } +# } +# $self; +#} + +# +# CDATA section may not contain "]]>" +# +sub encodeCDATA +{ + my ($str) = shift; + $str =~ s/]]>/]]>/go; + $str; +} + +# +# PI may not contain "?>" +# +sub encodeProcessingInstruction +{ + my ($str) = shift; + $str =~ s/\?>/?>/go; + $str; +} + +# +#?? Not sure if this is right - must prevent double minus somehow... +# +sub encodeComment +{ + my ($str) = shift; + return undef unless defined $str; + + $str =~ s/--/--/go; + $str; +} + +# +# For debugging +# +sub toHex +{ + my $str = shift; + my $len = length($str); + my @a = unpack ("C$len", $str); + my $s = ""; + for (@a) + { + $s .= sprintf ("%02x", $_); + } + $s; +} + +# +# 2nd parameter $default: list of Default Entity characters that need to be +# converted (e.g. "&<" for conversion to "&" and "<" resp.) +# +sub encodeText +{ + my ($str, $default) = @_; + return undef unless defined $str; + + $str =~ s/([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)|([$default])|(]]>)/ + defined($1) ? XmlUtf8Decode ($1) : + defined ($2) ? $DecodeDefaultEntity{$2} : "]]>" /egs; + +#?? could there be references that should not be expanded? +# e.g. should not replace &#nn; ¯ and &abc; +# $str =~ s/&(?!($ReName|#[0-9]+|#x[0-9a-fA-F]+);)/&/go; + + $str; +} + +# +# Used by AttDef - default value +# +sub encodeAttrValue +{ + encodeText (shift, '"&<'); +} + +# +# Converts an integer (Unicode - ISO/IEC 10646) to a UTF-8 encoded character +# sequence. +# Used when converting e.g. { or Ͽ to a string value. +# +# Algorithm borrowed from expat/xmltok.c/XmlUtf8Encode() +# +# not checking for bad characters: < 0, x00-x08, x0B-x0C, x0E-x1F, xFFFE-xFFFF +# +sub XmlUtf8Encode +{ + my $n = shift; + if ($n < 0x80) + { + return chr ($n); + } + elsif ($n < 0x800) + { + return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80)); + } + elsif ($n < 0x10000) + { + return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80), + (($n & 0x3f) | 0x80)); + } + elsif ($n < 0x110000) + { + return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80), + ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); + } + croak "number is too large for Unicode [$n] in &XmlUtf8Encode"; +} + +# +# Opposite of XmlUtf8Decode plus it adds prefix "&#" or "&#x" and suffix ";" +# The 2nd parameter ($hex) indicates whether the result is hex encoded or not. +# +sub XmlUtf8Decode +{ + my ($str, $hex) = @_; + my $len = length ($str); + my $n; + + if ($len == 2) + { + my @n = unpack "C2", $str; + $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); + } + elsif ($len == 3) + { + my @n = unpack "C3", $str; + $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + + ($n[2] & 0x3f); + } + elsif ($len == 4) + { + my @n = unpack "C4", $str; + $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); + } + elsif ($len == 1) # just to be complete... + { + $n = ord ($str); + } + else + { + croak "bad value [$str] for XmlUtf8Decode"; + } + $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; +} + +$IgnoreReadOnly = 0; +$SafeMode = 1; + +sub getIgnoreReadOnly +{ + $IgnoreReadOnly; +} + +# +# The global flag $IgnoreReadOnly is set to the specified value and the old +# value of $IgnoreReadOnly is returned. +# +# To temporarily disable read-only related exceptions (i.e. when parsing +# XML or temporarily), do the following: +# +# my $oldIgnore = XML::DOM::ignoreReadOnly (1); +# ... do whatever you want ... +# XML::DOM::ignoreReadOnly ($oldIgnore); +# +sub ignoreReadOnly +{ + my $i = $IgnoreReadOnly; + $IgnoreReadOnly = $_[0]; + return $i; +} + +# +# XML spec seems to break its own rules... (see ENTITY xmlpio) +# +sub forgiving_isValidName +{ + $_[0] =~ /^$XML::RegExp::Name$/o; +} + +# +# Don't allow names starting with xml (either case) +# +sub picky_isValidName +{ + $_[0] =~ /^$XML::RegExp::Name$/o and $_[0] !~ /^xml/i; +} + +# Be forgiving by default, +*isValidName = \&forgiving_isValidName; + +sub allowReservedNames # static +{ + *isValidName = ($_[0] ? \&forgiving_isValidName : \&picky_isValidName); +} + +sub getAllowReservedNames # static +{ + *isValidName == \&forgiving_isValidName; +} + +# +# Always compress empty tags by default +# This is used by Element::print. +# +$TagStyle = sub { 0 }; + +sub setTagCompression +{ + $TagStyle = shift; +} + +###################################################################### +package XML::DOM::PrintToFileHandle; +###################################################################### + +# +# Used by XML::DOM::Node::printToFileHandle +# + +sub new +{ + my($class, $fn) = @_; + bless $fn, $class; +} + +sub print +{ + my ($self, $str) = @_; + print $self $str; +} + +###################################################################### +package XML::DOM::PrintToString; +###################################################################### + +use vars qw{ $Singleton }; + +# +# Used by XML::DOM::Node::toString to concatenate strings +# + +sub new +{ + my($class) = @_; + my $str = ""; + bless \$str, $class; +} + +sub print +{ + my ($self, $str) = @_; + $$self .= $str; +} + +sub toString +{ + my $self = shift; + $$self; +} + +sub reset +{ + ${$_[0]} = ""; +} + +$Singleton = new XML::DOM::PrintToString; + +###################################################################### +package XML::DOM::DOMImplementation; +###################################################################### + +$XML::DOM::DOMImplementation::Singleton = + bless \$XML::DOM::DOMImplementation::Singleton, 'XML::DOM::DOMImplementation'; + +sub hasFeature +{ + my ($self, $feature, $version) = @_; + + $feature eq 'XML' and $version eq '1.0'; +} + + +###################################################################### +package XML::XQL::Node; # forward declaration +###################################################################### + +###################################################################### +package XML::DOM::Node; +###################################################################### + +use vars qw( @NodeNames @EXPORT @ISA %HFIELDS @EXPORT_OK @EXPORT_TAGS ); + +BEGIN +{ + use XML::DOM::DOMException; + import Carp; + + require FileHandle; + + @ISA = qw( Exporter XML::XQL::Node ); + + # NOTE: SortKey is used in XML::XQL::Node. + # UserData is reserved for users (Hang your data here!) + XML::DOM::def_fields ("C A Doc Parent ReadOnly UsedIn Hidden SortKey UserData"); + + push (@EXPORT, qw( + UNKNOWN_NODE + ELEMENT_NODE + ATTRIBUTE_NODE + TEXT_NODE + CDATA_SECTION_NODE + ENTITY_REFERENCE_NODE + ENTITY_NODE + PROCESSING_INSTRUCTION_NODE + COMMENT_NODE + DOCUMENT_NODE + DOCUMENT_TYPE_NODE + DOCUMENT_FRAGMENT_NODE + NOTATION_NODE + ELEMENT_DECL_NODE + ATT_DEF_NODE + XML_DECL_NODE + ATTLIST_DECL_NODE + )); +} + +#---- Constant definitions + +# Node types + +sub UNKNOWN_NODE () {0;} # not in the DOM Spec + +sub ELEMENT_NODE () {1;} +sub ATTRIBUTE_NODE () {2;} +sub TEXT_NODE () {3;} +sub CDATA_SECTION_NODE () {4;} +sub ENTITY_REFERENCE_NODE () {5;} +sub ENTITY_NODE () {6;} +sub PROCESSING_INSTRUCTION_NODE () {7;} +sub COMMENT_NODE () {8;} +sub DOCUMENT_NODE () {9;} +sub DOCUMENT_TYPE_NODE () {10;} +sub DOCUMENT_FRAGMENT_NODE () {11;} +sub NOTATION_NODE () {12;} + +sub ELEMENT_DECL_NODE () {13;} # not in the DOM Spec +sub ATT_DEF_NODE () {14;} # not in the DOM Spec +sub XML_DECL_NODE () {15;} # not in the DOM Spec +sub ATTLIST_DECL_NODE () {16;} # not in the DOM Spec + +@NodeNames = ( + "UNKNOWN_NODE", # not in the DOM Spec! + + "ELEMENT_NODE", + "ATTRIBUTE_NODE", + "TEXT_NODE", + "CDATA_SECTION_NODE", + "ENTITY_REFERENCE_NODE", + "ENTITY_NODE", + "PROCESSING_INSTRUCTION_NODE", + "COMMENT_NODE", + "DOCUMENT_NODE", + "DOCUMENT_TYPE_NODE", + "DOCUMENT_FRAGMENT_NODE", + "NOTATION_NODE", + + "ELEMENT_DECL_NODE", + "ATT_DEF_NODE", + "XML_DECL_NODE", + "ATTLIST_DECL_NODE" + ); + +sub decoupleUsedIn +{ + my $self = shift; + undef $self->[_UsedIn]; # was delete +} + +sub getParentNode +{ + $_[0]->[_Parent]; +} + +sub appendChild +{ + my ($self, $node) = @_; + + # REC 7473 + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + } + + my $doc = $self->[_Doc]; + + if ($node->isDocumentFragmentNode) + { + if ($XML::DOM::SafeMode) + { + for my $n (@{$node->[_C]}) + { + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR, + "nodes belong to different documents") + if $doc != $n->[_Doc]; + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "node is ancestor of parent node") + if $n->isAncestor ($self); + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "bad node type") + if $self->rejectChild ($n); + } + } + + my @list = @{$node->[_C]}; # don't try to compress this + for my $n (@list) + { + $n->setParentNode ($self); + } + push @{$self->[_C]}, @list; + } + else + { + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR, + "nodes belong to different documents") + if $doc != $node->[_Doc]; + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "node is ancestor of parent node") + if $node->isAncestor ($self); + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "bad node type") + if $self->rejectChild ($node); + } + $node->setParentNode ($self); + push @{$self->[_C]}, $node; + } + $node; +} + +sub getChildNodes +{ + # NOTE: if node can't have children, $self->[_C] is undef. + my $kids = $_[0]->[_C]; + + # Return a list if called in list context. + wantarray ? (defined ($kids) ? @{ $kids } : ()) : + (defined ($kids) ? $kids : $XML::DOM::NodeList::EMPTY); +} + +sub hasChildNodes +{ + my $kids = $_[0]->[_C]; + defined ($kids) && @$kids > 0; +} + +# This method is overriden in Document +sub getOwnerDocument +{ + $_[0]->[_Doc]; +} + +sub getFirstChild +{ + my $kids = $_[0]->[_C]; + defined $kids ? $kids->[0] : undef; +} + +sub getLastChild +{ + my $kids = $_[0]->[_C]; + defined $kids ? $kids->[-1] : undef; +} + +sub getPreviousSibling +{ + my $self = shift; + + my $pa = $self->[_Parent]; + return undef unless $pa; + my $index = $pa->getChildIndex ($self); + return undef unless $index; + + $pa->getChildAtIndex ($index - 1); +} + +sub getNextSibling +{ + my $self = shift; + + my $pa = $self->[_Parent]; + return undef unless $pa; + + $pa->getChildAtIndex ($pa->getChildIndex ($self) + 1); +} + +sub insertBefore +{ + my ($self, $node, $refNode) = @_; + + return $self->appendChild ($node) unless $refNode; # append at the end + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my @nodes = ($node); + @nodes = @{$node->[_C]} + if $node->getNodeType == DOCUMENT_FRAGMENT_NODE; + + my $doc = $self->[_Doc]; + + for my $n (@nodes) + { + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR, + "nodes belong to different documents") + if $doc != $n->[_Doc]; + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "node is ancestor of parent node") + if $n->isAncestor ($self); + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "bad node type") + if $self->rejectChild ($n); + } + my $index = $self->getChildIndex ($refNode); + + croak new XML::DOM::DOMException (NOT_FOUND_ERR, + "reference node not found") + if $index == -1; + + for my $n (@nodes) + { + $n->setParentNode ($self); + } + + splice (@{$self->[_C]}, $index, 0, @nodes); + $node; +} + +sub replaceChild +{ + my ($self, $node, $refNode) = @_; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my @nodes = ($node); + @nodes = @{$node->[_C]} + if $node->getNodeType == DOCUMENT_FRAGMENT_NODE; + + for my $n (@nodes) + { + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR, + "nodes belong to different documents") + if $self->[_Doc] != $n->[_Doc]; + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "node is ancestor of parent node") + if $n->isAncestor ($self); + + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "bad node type") + if $self->rejectChild ($n); + } + + my $index = $self->getChildIndex ($refNode); + croak new XML::DOM::DOMException (NOT_FOUND_ERR, + "reference node not found") + if $index == -1; + + for my $n (@nodes) + { + $n->setParentNode ($self); + } + splice (@{$self->[_C]}, $index, 1, @nodes); + + $refNode->removeChildHoodMemories; + $refNode; +} + +sub removeChild +{ + my ($self, $node) = @_; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my $index = $self->getChildIndex ($node); + + croak new XML::DOM::DOMException (NOT_FOUND_ERR, + "reference node not found") + if $index == -1; + + splice (@{$self->[_C]}, $index, 1, ()); + + $node->removeChildHoodMemories; + $node; +} + +# Merge all subsequent Text nodes in this subtree +sub normalize +{ + my ($self) = shift; + my $prev = undef; # previous Text node + + return unless defined $self->[_C]; + + my @nodes = @{$self->[_C]}; + my $i = 0; + my $n = @nodes; + while ($i < $n) + { + my $node = $self->getChildAtIndex($i); + my $type = $node->getNodeType; + + if (defined $prev) + { + # It should not merge CDATASections. Dom Spec says: + # Adjacent CDATASections nodes are not merged by use + # of the Element.normalize() method. + if ($type == TEXT_NODE) + { + $prev->appendData ($node->getData); + $self->removeChild ($node); + $i--; + $n--; + } + else + { + $prev = undef; + if ($type == ELEMENT_NODE) + { + $node->normalize; + if (defined $node->[_A]) + { + for my $attr (@{$node->[_A]->getValues}) + { + $attr->normalize; + } + } + } + } + } + else + { + if ($type == TEXT_NODE) + { + $prev = $node; + } + elsif ($type == ELEMENT_NODE) + { + $node->normalize; + if (defined $node->[_A]) + { + for my $attr (@{$node->[_A]->getValues}) + { + $attr->normalize; + } + } + } + } + $i++; + } +} + +# +# Return all Element nodes in the subtree that have the specified tagName. +# If tagName is "*", all Element nodes are returned. +# NOTE: the DOM Spec does not specify a 3rd or 4th parameter +# +sub getElementsByTagName +{ + my ($self, $tagName, $recurse, $list) = @_; + $recurse = 1 unless defined $recurse; + $list = (wantarray ? [] : new XML::DOM::NodeList) unless defined $list; + + return unless defined $self->[_C]; + + # preorder traversal: check parent node first + for my $kid (@{$self->[_C]}) + { + if ($kid->isElementNode) + { + if ($tagName eq "*" || $tagName eq $kid->getTagName) + { + push @{$list}, $kid; + } + $kid->getElementsByTagName ($tagName, $recurse, $list) if $recurse; + } + } + wantarray ? @{ $list } : $list; +} + +sub getNodeValue +{ + undef; +} + +sub setNodeValue +{ + # no-op +} + +# +# Redefined by XML::DOM::Element +# +sub getAttributes +{ + undef; +} + +#------------------------------------------------------------ +# Extra method implementations + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + $self->[_Doc] = $doc; + + return unless defined $self->[_C]; + + for my $kid (@{$self->[_C]}) + { + $kid->setOwnerDocument ($doc); + } +} + +sub cloneChildren +{ + my ($self, $node, $deep) = @_; + return unless $deep; + + return unless defined $self->[_C]; + + local $XML::DOM::IgnoreReadOnly = 1; + + for my $kid (@{$node->[_C]}) + { + my $newNode = $kid->cloneNode ($deep); + push @{$self->[_C]}, $newNode; + $newNode->setParentNode ($self); + } +} + +# +# For internal use only! +# +sub removeChildHoodMemories +{ + my ($self) = @_; + + undef $self->[_Parent]; # was delete +} + +# +# Remove circular dependencies. The Node and its children should +# not be used afterwards. +# +sub dispose +{ + my $self = shift; + + $self->removeChildHoodMemories; + + if (defined $self->[_C]) + { + $self->[_C]->dispose; + undef $self->[_C]; # was delete + } + undef $self->[_Doc]; # was delete +} + +# +# For internal use only! +# +sub setParentNode +{ + my ($self, $parent) = @_; + + # REC 7473 + my $oldParent = $self->[_Parent]; + if (defined $oldParent) + { + # remove from current parent + my $index = $oldParent->getChildIndex ($self); + + # NOTE: we don't have to check if [_C] is defined, + # because were removing a child here! + splice (@{$oldParent->[_C]}, $index, 1, ()); + + $self->removeChildHoodMemories; + } + $self->[_Parent] = $parent; +} + +# +# This function can return 3 values: +# 1: always readOnly +# 0: never readOnly +# undef: depends on parent node +# +# Returns 1 for DocumentType, Notation, Entity, EntityReference, Attlist, +# ElementDecl, AttDef. +# The first 4 are readOnly according to the DOM Spec, the others are always +# children of DocumentType. (Naturally, children of a readOnly node have to be +# readOnly as well...) +# These nodes are always readOnly regardless of who their ancestors are. +# Other nodes, e.g. Comment, are readOnly only if their parent is readOnly, +# which basically means that one of its ancestors has to be one of the +# aforementioned node types. +# Document and DocumentFragment return 0 for obvious reasons. +# Attr, Element, CDATASection, Text return 0. The DOM spec says that they can +# be children of an Entity, but I don't think that that's possible +# with the current XML::Parser. +# Attr uses a {ReadOnly} property, which is only set if it's part of a AttDef. +# Always returns 0 if ignoreReadOnly is set. +# +sub isReadOnly +{ + # default implementation for Nodes that are always readOnly + ! $XML::DOM::IgnoreReadOnly; +} + +sub rejectChild +{ + 1; +} + +sub getNodeTypeName +{ + $NodeNames[$_[0]->getNodeType]; +} + +sub getChildIndex +{ + my ($self, $node) = @_; + my $i = 0; + + return -1 unless defined $self->[_C]; + + for my $kid (@{$self->[_C]}) + { + return $i if $kid == $node; + $i++; + } + -1; +} + +sub getChildAtIndex +{ + my $kids = $_[0]->[_C]; + defined ($kids) ? $kids->[$_[1]] : undef; +} + +sub isAncestor +{ + my ($self, $node) = @_; + + do + { + return 1 if $self == $node; + $node = $node->[_Parent]; + } + while (defined $node); + + 0; +} + +# +# Added for optimization. Overriden in XML::DOM::Text +# +sub isTextNode +{ + 0; +} + +# +# Added for optimization. Overriden in XML::DOM::DocumentFragment +# +sub isDocumentFragmentNode +{ + 0; +} + +# +# Added for optimization. Overriden in XML::DOM::Element +# +sub isElementNode +{ + 0; +} + +# +# Add a Text node with the specified value or append the text to the +# previous Node if it is a Text node. +# +sub addText +{ + # REC 9456 (if it was called) + my ($self, $str) = @_; + + my $node = ${$self->[_C]}[-1]; # $self->getLastChild + + if (defined ($node) && $node->isTextNode) + { + # REC 5475 (if it was called) + $node->appendData ($str); + } + else + { + $node = $self->[_Doc]->createTextNode ($str); + $self->appendChild ($node); + } + $node; +} + +# +# Add a CDATASection node with the specified value or append the text to the +# previous Node if it is a CDATASection node. +# +sub addCDATA +{ + my ($self, $str) = @_; + + my $node = ${$self->[_C]}[-1]; # $self->getLastChild + + if (defined ($node) && $node->getNodeType == CDATA_SECTION_NODE) + { + $node->appendData ($str); + } + else + { + $node = $self->[_Doc]->createCDATASection ($str); + $self->appendChild ($node); + } + $node; +} + +sub removeChildNodes +{ + my $self = shift; + + my $cref = $self->[_C]; + return unless defined $cref; + + my $kid; + while ($kid = pop @{$cref}) + { + undef $kid->[_Parent]; # was delete + } +} + +sub toString +{ + my $self = shift; + my $pr = $XML::DOM::PrintToString::Singleton; + $pr->reset; + $self->print ($pr); + $pr->toString; +} + +sub to_sax +{ + my $self = shift; + unshift @_, 'Handler' if (@_ == 1); + my %h = @_; + + my $doch = exists ($h{DocumentHandler}) ? $h{DocumentHandler} + : $h{Handler}; + my $dtdh = exists ($h{DTDHandler}) ? $h{DTDHandler} + : $h{Handler}; + my $enth = exists ($h{EntityResolver}) ? $h{EntityResolver} + : $h{Handler}; + + $self->_to_sax ($doch, $dtdh, $enth); +} + +sub printToFile +{ + my ($self, $fileName) = @_; + my $fh = new FileHandle ($fileName, "w") || + croak "printToFile - can't open output file $fileName"; + + $self->print ($fh); + $fh->close; +} + +# +# Use print to print to a FileHandle object (see printToFile code) +# +sub printToFileHandle +{ + my ($self, $FH) = @_; + my $pr = new XML::DOM::PrintToFileHandle ($FH); + $self->print ($pr); +} + +# +# Used by AttDef::setDefault to convert unexpanded default attribute value +# +sub expandEntityRefs +{ + my ($self, $str) = @_; + my $doctype = $self->[_Doc]->getDoctype; + + $str =~ s/&($XML::RegExp::Name|(#([0-9]+)|#x([0-9a-fA-F]+)));/ + defined($2) ? XML::DOM::XmlUtf8Encode ($3 || hex ($4)) + : expandEntityRef ($1, $doctype)/ego; + $str; +} + +sub expandEntityRef +{ + my ($entity, $doctype) = @_; + + my $expanded = $XML::DOM::DefaultEntities{$entity}; + return $expanded if defined $expanded; + + $expanded = $doctype->getEntity ($entity); + return $expanded->getValue if (defined $expanded); + +#?? is this an error? + croak "Could not expand entity reference of [$entity]\n"; +# return "&$entity;"; # entity not found +} + +sub isHidden +{ + $_[0]->[_Hidden]; +} + +###################################################################### +package XML::DOM::Attr; +###################################################################### + +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Name Specified", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +sub new +{ + my ($class, $doc, $name, $value, $specified) = @_; + + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Attr name [$name]") + unless XML::DOM::isValidName ($name); + } + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_C] = new XML::DOM::NodeList; + $self->[_Name] = $name; + + if (defined $value) + { + $self->setValue ($value); + $self->[_Specified] = (defined $specified) ? $specified : 1; + } + else + { + $self->[_Specified] = 0; + } + $self; +} + +sub getNodeType +{ + ATTRIBUTE_NODE; +} + +sub isSpecified +{ + $_[0]->[_Specified]; +} + +sub getName +{ + $_[0]->[_Name]; +} + +sub getValue +{ + my $self = shift; + my $value = ""; + + for my $kid (@{$self->[_C]}) + { + $value .= $kid->getData; + } + $value; +} + +sub setValue +{ + my ($self, $value) = @_; + + # REC 1147 + $self->removeChildNodes; + $self->appendChild ($self->[_Doc]->createTextNode ($value)); + $self->[_Specified] = 1; +} + +sub getNodeName +{ + $_[0]->getName; +} + +sub getNodeValue +{ + $_[0]->getValue; +} + +sub setNodeValue +{ + $_[0]->setValue ($_[1]); +} + +sub cloneNode +{ + my ($self) = @_; # parameter deep is ignored + + my $node = $self->[_Doc]->createAttribute ($self->getName); + $node->[_Specified] = $self->[_Specified]; + $node->[_ReadOnly] = 1 if $self->[_ReadOnly]; + + $node->cloneChildren ($self, 1); + $node; +} + +#------------------------------------------------------------ +# Extra method implementations +# + +sub isReadOnly +{ + # ReadOnly property is set if it's part of a AttDef + ! $XML::DOM::IgnoreReadOnly && defined ($_[0]->[_ReadOnly]); +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_Name]; + + $FILE->print ("$name=\""); + for my $kid (@{$self->[_C]}) + { + if ($kid->getNodeType == TEXT_NODE) + { + $FILE->print (XML::DOM::encodeAttrValue ($kid->getData)); + } + else # ENTITY_REFERENCE_NODE + { + $kid->print ($FILE); + } + } + $FILE->print ("\""); +} + +sub rejectChild +{ + my $t = $_[1]->getNodeType; + + $t != TEXT_NODE + && $t != ENTITY_REFERENCE_NODE; +} + +###################################################################### +package XML::DOM::ProcessingInstruction; +###################################################################### + +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Target Data", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +sub new +{ + my ($class, $doc, $target, $data, $hidden) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad ProcessingInstruction Target [$target]") + unless (XML::DOM::isValidName ($target) && $target !~ /^xml$/io); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Target] = $target; + $self->[_Data] = $data; + $self->[_Hidden] = $hidden; + $self; +} + +sub getNodeType +{ + PROCESSING_INSTRUCTION_NODE; +} + +sub getTarget +{ + $_[0]->[_Target]; +} + +sub getData +{ + $_[0]->[_Data]; +} + +sub setData +{ + my ($self, $data) = @_; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + $self->[_Data] = $data; +} + +sub getNodeName +{ + $_[0]->[_Target]; +} + +# +# Same as getData +# +sub getNodeValue +{ + $_[0]->[_Data]; +} + +sub setNodeValue +{ + $_[0]->setData ($_[1]); +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createProcessingInstruction ($self->getTarget, + $self->getData, + $self->isHidden); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + return 0 if $XML::DOM::IgnoreReadOnly; + + my $pa = $_[0]->[_Parent]; + defined ($pa) ? $pa->isReadOnly : 0; +} + +sub print +{ + my ($self, $FILE) = @_; + + $FILE->print ("print ($self->[_Target]); + $FILE->print (" "); + $FILE->print (XML::DOM::encodeProcessingInstruction ($self->[_Data])); + $FILE->print ("?>"); +} + +###################################################################### +package XML::DOM::Notation; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Name Base SysId PubId", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +sub new +{ + my ($class, $doc, $name, $base, $sysId, $pubId, $hidden) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Notation Name [$name]") + unless XML::DOM::isValidName ($name); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Name] = $name; + $self->[_Base] = $base; + $self->[_SysId] = $sysId; + $self->[_PubId] = $pubId; + $self->[_Hidden] = $hidden; + $self; +} + +sub getNodeType +{ + NOTATION_NODE; +} + +sub getPubId +{ + $_[0]->[_PubId]; +} + +sub setPubId +{ + $_[0]->[_PubId] = $_[1]; +} + +sub getSysId +{ + $_[0]->[_SysId]; +} + +sub setSysId +{ + $_[0]->[_SysId] = $_[1]; +} + +sub getName +{ + $_[0]->[_Name]; +} + +sub setName +{ + $_[0]->[_Name] = $_[1]; +} + +sub getBase +{ + $_[0]->[_Base]; +} + +sub getNodeName +{ + $_[0]->[_Name]; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_Name]; + my $sysId = $self->[_SysId]; + my $pubId = $self->[_PubId]; + + $FILE->print ("print (" PUBLIC \"$pubId\""); + } + if (defined $sysId) + { + $FILE->print (" SYSTEM \"$sysId\""); + } + $FILE->print (">"); +} + +sub cloneNode +{ + my ($self) = @_; + $self->[_Doc]->createNotation ($self->[_Name], $self->[_Base], + $self->[_SysId], $self->[_PubId], + $self->[_Hidden]); +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->Notation ($self->getName, $self->getBase, + $self->getSysId, $self->getPubId); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $dtdh->notation_decl ( { Name => $self->getName, + Base => $self->getBase, + SystemId => $self->getSysId, + PublicId => $self->getPubId }); +} + +###################################################################### +package XML::DOM::Entity; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("NotationName Parameter Value Ndata SysId PubId", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +sub new +{ + my ($class, $doc, $par, $notationName, $value, $sysId, $pubId, $ndata, $hidden) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Entity Name [$notationName]") + unless XML::DOM::isValidName ($notationName); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_NotationName] = $notationName; + $self->[_Parameter] = $par; + $self->[_Value] = $value; + $self->[_Ndata] = $ndata; + $self->[_SysId] = $sysId; + $self->[_PubId] = $pubId; + $self->[_Hidden] = $hidden; + $self; +#?? maybe Value should be a Text node +} + +sub getNodeType +{ + ENTITY_NODE; +} + +sub getPubId +{ + $_[0]->[_PubId]; +} + +sub getSysId +{ + $_[0]->[_SysId]; +} + +# Dom Spec says: +# For unparsed entities, the name of the notation for the +# entity. For parsed entities, this is null. + +#?? do we have unparsed entities? +sub getNotationName +{ + $_[0]->[_NotationName]; +} + +sub getNodeName +{ + $_[0]->[_NotationName]; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createEntity ($self->[_Parameter], + $self->[_NotationName], $self->[_Value], + $self->[_SysId], $self->[_PubId], + $self->[_Ndata], $self->[_Hidden]); +} + +sub rejectChild +{ + return 1; +#?? if value is split over subnodes, recode this section +# also add: C => new XML::DOM::NodeList, + + my $t = $_[1]; + + return $t == TEXT_NODE + || $t == ENTITY_REFERENCE_NODE + || $t == PROCESSING_INSTRUCTION_NODE + || $t == COMMENT_NODE + || $t == CDATA_SECTION_NODE + || $t == ELEMENT_NODE; +} + +sub getValue +{ + $_[0]->[_Value]; +} + +sub isParameterEntity +{ + $_[0]->[_Parameter]; +} + +sub getNdata +{ + $_[0]->[_Ndata]; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_NotationName]; + + my $par = $self->isParameterEntity ? "% " : ""; + + $FILE->print ("[_Value]; + my $sysId = $self->[_SysId]; + my $pubId = $self->[_PubId]; + my $ndata = $self->[_Ndata]; + + if (defined $value) + { +#?? Not sure what to do if it contains both single and double quote + $value = ($value =~ /\"/) ? "'$value'" : "\"$value\""; + $FILE->print (" $value"); + } + if (defined $pubId) + { + $FILE->print (" PUBLIC \"$pubId\""); + } + elsif (defined $sysId) + { + $FILE->print (" SYSTEM"); + } + + if (defined $sysId) + { + $FILE->print (" \"$sysId\""); + } + $FILE->print (" NDATA $ndata") if defined $ndata; + $FILE->print (">"); +} + +sub to_expat +{ + my ($self, $iter) = @_; + my $name = ($self->isParameterEntity ? '%' : "") . $self->getNotationName; + $iter->Entity ($name, + $self->getValue, $self->getSysId, $self->getPubId, + $self->getNdata); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + my $name = ($self->isParameterEntity ? '%' : "") . $self->getNotationName; + $dtdh->entity_decl ( { Name => $name, + Value => $self->getValue, + SystemId => $self->getSysId, + PublicId => $self->getPubId, + Notation => $self->getNdata } ); +} + +###################################################################### +package XML::DOM::EntityReference; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("EntityName Parameter", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +sub new +{ + my ($class, $doc, $name, $parameter) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Entity Name [$name] in EntityReference") + unless XML::DOM::isValidName ($name); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_EntityName] = $name; + $self->[_Parameter] = ($parameter || 0); + $self; +} + +sub getNodeType +{ + ENTITY_REFERENCE_NODE; +} + +sub getNodeName +{ + $_[0]->[_EntityName]; +} + +#------------------------------------------------------------ +# Extra method implementations + +sub getEntityName +{ + $_[0]->[_EntityName]; +} + +sub isParameterEntity +{ + $_[0]->[_Parameter]; +} + +sub getData +{ + my $self = shift; + my $name = $self->[_EntityName]; + my $parameter = $self->[_Parameter]; + + my $data = $self->[_Doc]->expandEntity ($name, $parameter); + + unless (defined $data) + { +#?? this is probably an error + my $pc = $parameter ? "%" : "&"; + $data = "$pc$name;"; + } + $data; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_EntityName]; + +#?? or do we expand the entities? + + my $pc = $self->[_Parameter] ? "%" : "&"; + $FILE->print ("$pc$name;"); +} + +# Dom Spec says: +# [...] but if such an Entity exists, then +# the child list of the EntityReference node is the same as that of the +# Entity node. +# +# The resolution of the children of the EntityReference (the replacement +# value of the referenced Entity) may be lazily evaluated; actions by the +# user (such as calling the childNodes method on the EntityReference +# node) are assumed to trigger the evaluation. +sub getChildNodes +{ + my $self = shift; + my $entity = $self->[_Doc]->getEntity ($self->[_EntityName]); + defined ($entity) ? $entity->getChildNodes : new XML::DOM::NodeList; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createEntityReference ($self->[_EntityName], + $self->[_Parameter]); +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->EntityRef ($self->getEntityName, $self->isParameterEntity); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + my @par = $self->isParameterEntity ? (Parameter => 1) : (); +#?? not supported by PerlSAX: $self->isParameterEntity + + $doch->entity_reference ( { Name => $self->getEntityName, @par } ); +} + +# NOTE: an EntityReference can't really have children, so rejectChild +# is not reimplemented (i.e. it always returns 0.) + +###################################################################### +package XML::DOM::AttDef; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Name Type Fixed Default Required Implied Quote", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +#------------------------------------------------------------ +# Extra method implementations + +# AttDef is not part of DOM Spec +sub new +{ + my ($class, $doc, $name, $attrType, $default, $fixed, $hidden) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Attr name in AttDef [$name]") + unless XML::DOM::isValidName ($name); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Name] = $name; + $self->[_Type] = $attrType; + + if (defined $default) + { + if ($default eq "#REQUIRED") + { + $self->[_Required] = 1; + } + elsif ($default eq "#IMPLIED") + { + $self->[_Implied] = 1; + } + else + { + # strip off quotes - see Attlist handler in XML::Parser + $default =~ m#^(["'])(.*)['"]$#; + + $self->[_Quote] = $1; # keep track of the quote character + $self->[_Default] = $self->setDefault ($2); + +#?? should default value be decoded - what if it contains e.g. "&" + } + } + $self->[_Fixed] = $fixed if defined $fixed; + $self->[_Hidden] = $hidden if defined $hidden; + + $self; +} + +sub getNodeType +{ + ATT_DEF_NODE; +} + +sub getName +{ + $_[0]->[_Name]; +} + +# So it can be added to a NamedNodeMap +sub getNodeName +{ + $_[0]->[_Name]; +} + +sub getType +{ + $_[0]->[_Type]; +} + +sub setType +{ + $_[0]->[_Type] = $_[1]; +} + +sub getDefault +{ + $_[0]->[_Default]; +} + +sub setDefault +{ + my ($self, $value) = @_; + + # specified=0, it's the default ! + my $attr = $self->[_Doc]->createAttribute ($self->[_Name], undef, 0); + $attr->[_ReadOnly] = 1; + +#?? this should be split over Text and EntityReference nodes, just like other +# Attr nodes - just expand the text for now + $value = $self->expandEntityRefs ($value); + $attr->addText ($value); +#?? reimplement in NoExpand mode! + + $attr; +} + +sub isFixed +{ + $_[0]->[_Fixed] || 0; +} + +sub isRequired +{ + $_[0]->[_Required] || 0; +} + +sub isImplied +{ + $_[0]->[_Implied] || 0; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_Name]; + my $type = $self->[_Type]; + my $fixed = $self->[_Fixed]; + my $default = $self->[_Default]; + + $FILE->print ("$name $type"); + $FILE->print (" #FIXED") if defined $fixed; + + if ($self->[_Required]) + { + $FILE->print (" #REQUIRED"); + } + elsif ($self->[_Implied]) + { + $FILE->print (" #IMPLIED"); + } + elsif (defined ($default)) + { + my $quote = $self->[_Quote]; + $FILE->print (" $quote"); + for my $kid (@{$default->[_C]}) + { + $kid->print ($FILE); + } + $FILE->print ($quote); + } +} + +sub getDefaultString +{ + my $self = shift; + my $default; + + if ($self->[_Required]) + { + return "#REQUIRED"; + } + elsif ($self->[_Implied]) + { + return "#IMPLIED"; + } + elsif (defined ($default = $self->[_Default])) + { + my $quote = $self->[_Quote]; + $default = $default->toString; + return "$quote$default$quote"; + } + undef; +} + +sub cloneNode +{ + my $self = shift; + my $node = new XML::DOM::AttDef ($self->[_Doc], $self->[_Name], $self->[_Type], + undef, $self->[_Fixed]); + + $node->[_Required] = 1 if $self->[_Required]; + $node->[_Implied] = 1 if $self->[_Implied]; + $node->[_Fixed] = $self->[_Fixed] if defined $self->[_Fixed]; + $node->[_Hidden] = $self->[_Hidden] if defined $self->[_Hidden]; + + if (defined $self->[_Default]) + { + $node->[_Default] = $self->[_Default]->cloneNode(1); + } + $node->[_Quote] = $self->[_Quote]; + + $node; +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + $self->SUPER::setOwnerDocument ($doc); + + if (defined $self->[_Default]) + { + $self->[_Default]->setOwnerDocument ($doc); + } +} + +###################################################################### +package XML::DOM::AttlistDecl; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + import XML::DOM::AttDef qw{ :Fields }; + + XML::DOM::def_fields ("ElementName", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + +#------------------------------------------------------------ +# Extra method implementations + +# AttlistDecl is not part of the DOM Spec +sub new +{ + my ($class, $doc, $name) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Element TagName [$name] in AttlistDecl") + unless XML::DOM::isValidName ($name); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_C] = new XML::DOM::NodeList; + $self->[_ReadOnly] = 1; + $self->[_ElementName] = $name; + + $self->[_A] = new XML::DOM::NamedNodeMap (Doc => $doc, + ReadOnly => 1, + Parent => $self); + + $self; +} + +sub getNodeType +{ + ATTLIST_DECL_NODE; +} + +sub getName +{ + $_[0]->[_ElementName]; +} + +sub getNodeName +{ + $_[0]->[_ElementName]; +} + +sub getAttDef +{ + my ($self, $attrName) = @_; + $self->[_A]->getNamedItem ($attrName); +} + +sub addAttDef +{ + my ($self, $attrName, $type, $default, $fixed, $hidden) = @_; + my $node = $self->getAttDef ($attrName); + + if (defined $node) + { + # data will be ignored if already defined + my $elemName = $self->getName; + XML::DOM::warning ("multiple definitions of attribute $attrName for element $elemName, only first one is recognized"); + } + else + { + $node = new XML::DOM::AttDef ($self->[_Doc], $attrName, $type, + $default, $fixed, $hidden); + $self->[_A]->setNamedItem ($node); + } + $node; +} + +sub getDefaultAttrValue +{ + my ($self, $attr) = @_; + my $attrNode = $self->getAttDef ($attr); + (defined $attrNode) ? $attrNode->getDefault : undef; +} + +sub cloneNode +{ + my ($self, $deep) = @_; + my $node = $self->[_Doc]->createAttlistDecl ($self->[_ElementName]); + + $node->[_A] = $self->[_A]->cloneNode ($deep); + $node; +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + $self->SUPER::setOwnerDocument ($doc); + + $self->[_A]->setOwnerDocument ($doc); +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->getName; + my @attlist = @{$self->[_A]->getValues}; + + my $hidden = 1; + for my $att (@attlist) + { + unless ($att->[_Hidden]) + { + $hidden = 0; + last; + } + } + + unless ($hidden) + { + $FILE->print ("print (" "); + $attlist[0]->print ($FILE); + } + else + { + for my $attr (@attlist) + { + next if $attr->[_Hidden]; + + $FILE->print ("\x0A "); + $attr->print ($FILE); + } + } + $FILE->print (">"); + } +} + +sub to_expat +{ + my ($self, $iter) = @_; + my $tag = $self->getName; + for my $a ($self->[_A]->getValues) + { + my $default = $a->isImplied ? '#IMPLIED' : + ($a->isRequired ? '#REQUIRED' : + ($a->[_Quote] . $a->getDefault->getValue . $a->[_Quote])); + + $iter->Attlist ($tag, $a->getName, $a->getType, $default, $a->isFixed); + } +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + my $tag = $self->getName; + for my $a ($self->[_A]->getValues) + { + my $default = $a->isImplied ? '#IMPLIED' : + ($a->isRequired ? '#REQUIRED' : + ($a->[_Quote] . $a->getDefault->getValue . $a->[_Quote])); + + $dtdh->attlist_decl ({ ElementName => $tag, + AttributeName => $a->getName, + Type => $a->[_Type], + Default => $default, + Fixed => $a->isFixed }); + } +} + +###################################################################### +package XML::DOM::ElementDecl; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Name Model", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + + +#------------------------------------------------------------ +# Extra method implementations + +# ElementDecl is not part of the DOM Spec +sub new +{ + my ($class, $doc, $name, $model, $hidden) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Element TagName [$name] in ElementDecl") + unless XML::DOM::isValidName ($name); + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Name] = $name; + $self->[_ReadOnly] = 1; + $self->[_Model] = $model; + $self->[_Hidden] = $hidden; + $self; +} + +sub getNodeType +{ + ELEMENT_DECL_NODE; +} + +sub getName +{ + $_[0]->[_Name]; +} + +sub getNodeName +{ + $_[0]->[_Name]; +} + +sub getModel +{ + $_[0]->[_Model]; +} + +sub setModel +{ + my ($self, $model) = @_; + + $self->[_Model] = $model; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_Name]; + my $model = $self->[_Model]; + + $FILE->print ("") + unless $self->[_Hidden]; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createElementDecl ($self->[_Name], $self->[_Model], + $self->[_Hidden]); +} + +sub to_expat +{ +#?? add support for Hidden?? (allover, also in _to_sax!!) + + my ($self, $iter) = @_; + $iter->Element ($self->getName, $self->getModel); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $dtdh->element_decl ( { Name => $self->getName, + Model => $self->getModel } ); +} + +###################################################################### +package XML::DOM::Element; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("TagName", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use XML::DOM::NamedNodeMap; +use Carp; + +sub new +{ + my ($class, $doc, $tagName) = @_; + + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Element TagName [$tagName]") + unless XML::DOM::isValidName ($tagName); + } + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_C] = new XML::DOM::NodeList; + $self->[_TagName] = $tagName; + +# Now we're creating the NamedNodeMap only when needed (REC 2313 => 1147) +# $self->[_A] = new XML::DOM::NamedNodeMap (Doc => $doc, +# Parent => $self); + + $self; +} + +sub getNodeType +{ + ELEMENT_NODE; +} + +sub getTagName +{ + $_[0]->[_TagName]; +} + +sub getNodeName +{ + $_[0]->[_TagName]; +} + +sub getAttributeNode +{ + my ($self, $name) = @_; + return undef unless defined $self->[_A]; + + $self->getAttributes->{$name}; +} + +sub getAttribute +{ + my ($self, $name) = @_; + my $attr = $self->getAttributeNode ($name); + (defined $attr) ? $attr->getValue : ""; +} + +sub setAttribute +{ + my ($self, $name, $val) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Attr Name [$name]") + unless XML::DOM::isValidName ($name); + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my $node = $self->getAttributes->{$name}; + if (defined $node) + { + $node->setValue ($val); + } + else + { + $node = $self->[_Doc]->createAttribute ($name, $val); + $self->[_A]->setNamedItem ($node); + } +} + +sub setAttributeNode +{ + my ($self, $node) = @_; + my $attr = $self->getAttributes; + my $name = $node->getNodeName; + + # REC 1147 + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR, + "nodes belong to different documents") + if $self->[_Doc] != $node->[_Doc]; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my $attrParent = $node->[_UsedIn]; + croak new XML::DOM::DOMException (INUSE_ATTRIBUTE_ERR, + "Attr is already used by another Element") + if (defined ($attrParent) && $attrParent != $attr); + } + + my $other = $attr->{$name}; + $attr->removeNamedItem ($name) if defined $other; + + $attr->setNamedItem ($node); + + $other; +} + +sub removeAttributeNode +{ + my ($self, $node) = @_; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my $attr = $self->[_A]; + unless (defined $attr) + { + croak new XML::DOM::DOMException (NOT_FOUND_ERR); + return undef; + } + + my $name = $node->getNodeName; + my $attrNode = $attr->getNamedItem ($name); + +#?? should it croak if it's the default value? + croak new XML::DOM::DOMException (NOT_FOUND_ERR) + unless $node == $attrNode; + + # Not removing anything if it's the default value already + return undef unless $node->isSpecified; + + $attr->removeNamedItem ($name); + + # Substitute with default value if it's defined + my $default = $self->getDefaultAttrValue ($name); + if (defined $default) + { + local $XML::DOM::IgnoreReadOnly = 1; + + $default = $default->cloneNode (1); + $attr->setNamedItem ($default); + } + $node; +} + +sub removeAttribute +{ + my ($self, $name) = @_; + my $attr = $self->[_A]; + unless (defined $attr) + { + croak new XML::DOM::DOMException (NOT_FOUND_ERR); + return; + } + + my $node = $attr->getNamedItem ($name); + if (defined $node) + { +#?? could use dispose() to remove circular references for gc, but what if +#?? somebody is referencing it? + $self->removeAttributeNode ($node); + } +} + +sub cloneNode +{ + my ($self, $deep) = @_; + my $node = $self->[_Doc]->createElement ($self->getTagName); + + # Always clone the Attr nodes, even if $deep == 0 + if (defined $self->[_A]) + { + $node->[_A] = $self->[_A]->cloneNode (1); # deep=1 + $node->[_A]->setParentNode ($node); + } + + $node->cloneChildren ($self, $deep); + $node; +} + +sub getAttributes +{ + $_[0]->[_A] ||= XML::DOM::NamedNodeMap->new (Doc => $_[0]->[_Doc], + Parent => $_[0]); +} + +#------------------------------------------------------------ +# Extra method implementations + +# Added for convenience +sub setTagName +{ + my ($self, $tagName) = @_; + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "bad Element TagName [$tagName]") + unless XML::DOM::isValidName ($tagName); + + $self->[_TagName] = $tagName; +} + +sub isReadOnly +{ + 0; +} + +# Added for optimization. +sub isElementNode +{ + 1; +} + +sub rejectChild +{ + my $t = $_[1]->getNodeType; + + $t != TEXT_NODE + && $t != ENTITY_REFERENCE_NODE + && $t != PROCESSING_INSTRUCTION_NODE + && $t != COMMENT_NODE + && $t != CDATA_SECTION_NODE + && $t != ELEMENT_NODE; +} + +sub getDefaultAttrValue +{ + my ($self, $attr) = @_; + $self->[_Doc]->getDefaultAttrValue ($self->[_TagName], $attr); +} + +sub dispose +{ + my $self = shift; + + $self->[_A]->dispose if defined $self->[_A]; + $self->SUPER::dispose; +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + $self->SUPER::setOwnerDocument ($doc); + + $self->[_A]->setOwnerDocument ($doc) if defined $self->[_A]; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_TagName]; + + $FILE->print ("<$name"); + + if (defined $self->[_A]) + { + for my $att (@{$self->[_A]->getValues}) + { + # skip un-specified (default) Attr nodes + if ($att->isSpecified) + { + $FILE->print (" "); + $att->print ($FILE); + } + } + } + + my @kids = @{$self->[_C]}; + if (@kids > 0) + { + $FILE->print (">"); + for my $kid (@kids) + { + $kid->print ($FILE); + } + $FILE->print (""); + } + else + { + my $style = &$XML::DOM::TagStyle ($name, $self); + if ($style == 0) + { + $FILE->print ("/>"); + } + elsif ($style == 1) + { + $FILE->print (">"); + } + else + { + $FILE->print (" />"); + } + } +} + +sub check +{ + my ($self, $checker) = @_; + die "Usage: \$xml_dom_elem->check (\$checker)" unless $checker; + + $checker->InitDomElem; + $self->to_expat ($checker); + $checker->FinalDomElem; +} + +sub to_expat +{ + my ($self, $iter) = @_; + + my $tag = $self->getTagName; + $iter->Start ($tag); + + if (defined $self->[_A]) + { + for my $attr ($self->[_A]->getValues) + { + $iter->Attr ($tag, $attr->getName, $attr->getValue, $attr->isSpecified); + } + } + + $iter->EndAttr; + + for my $kid ($self->getChildNodes) + { + $kid->to_expat ($iter); + } + + $iter->End; +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + + my $tag = $self->getTagName; + + my @attr = (); + my $attrOrder; + my $attrDefaulted; + + if (defined $self->[_A]) + { + my @spec = (); # names of specified attributes + my @unspec = (); # names of defaulted attributes + + for my $attr ($self->[_A]->getValues) + { + my $attrName = $attr->getName; + push @attr, $attrName, $attr->getValue; + if ($attr->isSpecified) + { + push @spec, $attrName; + } + else + { + push @unspec, $attrName; + } + } + $attrOrder = [ @spec, @unspec ]; + $attrDefaulted = @spec; + } + $doch->start_element (defined $attrOrder ? + { Name => $tag, + Attributes => { @attr }, + AttributeOrder => $attrOrder, + Defaulted => $attrDefaulted + } : + { Name => $tag, + Attributes => { @attr } + } + ); + + for my $kid ($self->getChildNodes) + { + $kid->_to_sax ($doch, $dtdh, $enth); + } + + $doch->end_element ( { Name => $tag } ); +} + +###################################################################### +package XML::DOM::CharacterData; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Data", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use Carp; + + +# +# CharacterData nodes should never be created directly, only subclassed! +# +sub new +{ + my ($class, $doc, $data) = @_; + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Data] = $data; + $self; +} + +sub appendData +{ + my ($self, $data) = @_; + + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + } + $self->[_Data] .= $data; +} + +sub deleteData +{ + my ($self, $offset, $count) = @_; + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "bad offset [$offset]") + if ($offset < 0 || $offset >= length ($self->[_Data])); +#?? DOM Spec says >, but >= makes more sense! + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "negative count [$count]") + if $count < 0; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + substr ($self->[_Data], $offset, $count) = ""; +} + +sub getData +{ + $_[0]->[_Data]; +} + +sub getLength +{ + length $_[0]->[_Data]; +} + +sub insertData +{ + my ($self, $offset, $data) = @_; + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "bad offset [$offset]") + if ($offset < 0 || $offset >= length ($self->[_Data])); +#?? DOM Spec says >, but >= makes more sense! + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + substr ($self->[_Data], $offset, 0) = $data; +} + +sub replaceData +{ + my ($self, $offset, $count, $data) = @_; + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "bad offset [$offset]") + if ($offset < 0 || $offset >= length ($self->[_Data])); +#?? DOM Spec says >, but >= makes more sense! + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "negative count [$count]") + if $count < 0; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + substr ($self->[_Data], $offset, $count) = $data; +} + +sub setData +{ + my ($self, $data) = @_; + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + $self->[_Data] = $data; +} + +sub substringData +{ + my ($self, $offset, $count) = @_; + my $data = $self->[_Data]; + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "bad offset [$offset]") + if ($offset < 0 || $offset >= length ($data)); +#?? DOM Spec says >, but >= makes more sense! + + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "negative count [$count]") + if $count < 0; + + substr ($data, $offset, $count); +} + +sub getNodeValue +{ + $_[0]->getData; +} + +sub setNodeValue +{ + $_[0]->setData ($_[1]); +} + +###################################################################### +package XML::DOM::CDATASection; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::CharacterData qw( :DEFAULT :Fields ); + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("", "XML::DOM::CharacterData"); +} + +use XML::DOM::DOMException; + +sub getNodeName +{ + "#cdata-section"; +} + +sub getNodeType +{ + CDATA_SECTION_NODE; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createCDATASection ($self->getData); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + 0; +} + +sub print +{ + my ($self, $FILE) = @_; + $FILE->print ("print (XML::DOM::encodeCDATA ($self->getData)); + $FILE->print ("]]>"); +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->CData ($self->getData); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $doch->start_cdata; + $doch->characters ( { Data => $self->getData } ); + $doch->end_cdata; +} + +###################################################################### +package XML::DOM::Comment; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::CharacterData qw( :DEFAULT :Fields ); + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("", "XML::DOM::CharacterData"); +} + +use XML::DOM::DOMException; +use Carp; + +#?? setData - could check comment for double minus + +sub getNodeType +{ + COMMENT_NODE; +} + +sub getNodeName +{ + "#comment"; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createComment ($self->getData); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + return 0 if $XML::DOM::IgnoreReadOnly; + + my $pa = $_[0]->[_Parent]; + defined ($pa) ? $pa->isReadOnly : 0; +} + +sub print +{ + my ($self, $FILE) = @_; + my $comment = XML::DOM::encodeComment ($self->[_Data]); + + $FILE->print (""); +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->Comment ($self->getData); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $doch->Comment ( { Data => $self->getData }); +} + +###################################################################### +package XML::DOM::Text; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::CharacterData qw( :DEFAULT :Fields ); + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("", "XML::DOM::CharacterData"); +} + +use XML::DOM::DOMException; +use Carp; + +sub getNodeType +{ + TEXT_NODE; +} + +sub getNodeName +{ + "#text"; +} + +sub splitText +{ + my ($self, $offset) = @_; + + my $data = $self->getData; + croak new XML::DOM::DOMException (INDEX_SIZE_ERR, + "bad offset [$offset]") + if ($offset < 0 || $offset >= length ($data)); +#?? DOM Spec says >, but >= makes more sense! + + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR, + "node is ReadOnly") + if $self->isReadOnly; + + my $rest = substring ($data, $offset); + + $self->setData (substring ($data, 0, $offset)); + my $node = $self->[_Doc]->createTextNode ($rest); + + # insert new node after this node + $self->[_Parent]->insertAfter ($node, $self); + + $node; +} + +sub cloneNode +{ + my $self = shift; + $self->[_Doc]->createTextNode ($self->getData); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + 0; +} + +sub print +{ + my ($self, $FILE) = @_; + $FILE->print (XML::DOM::encodeText ($self->getData, "<&")); +} + +sub isTextNode +{ + 1; +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->Char ($self->getData); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $doch->characters ( { Data => $self->getData } ); +} + +###################################################################### +package XML::DOM::XMLDecl; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Version Encoding Standalone", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; + + +#------------------------------------------------------------ +# Extra method implementations + +# XMLDecl is not part of the DOM Spec +sub new +{ + my ($class, $doc, $version, $encoding, $standalone) = @_; + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_Version] = $version if defined $version; + $self->[_Encoding] = $encoding if defined $encoding; + $self->[_Standalone] = $standalone if defined $standalone; + + $self; +} + +sub setVersion +{ + if (defined $_[1]) + { + $_[0]->[_Version] = $_[1]; + } + else + { + undef $_[0]->[_Version]; # was delete + } +} + +sub getVersion +{ + $_[0]->[_Version]; +} + +sub setEncoding +{ + if (defined $_[1]) + { + $_[0]->[_Encoding] = $_[1]; + } + else + { + undef $_[0]->[_Encoding]; # was delete + } +} + +sub getEncoding +{ + $_[0]->[_Encoding]; +} + +sub setStandalone +{ + if (defined $_[1]) + { + $_[0]->[_Standalone] = $_[1]; + } + else + { + undef $_[0]->[_Standalone]; # was delete + } +} + +sub getStandalone +{ + $_[0]->[_Standalone]; +} + +sub getNodeType +{ + XML_DECL_NODE; +} + +sub cloneNode +{ + my $self = shift; + + new XML::DOM::XMLDecl ($self->[_Doc], $self->[_Version], + $self->[_Encoding], $self->[_Standalone]); +} + +sub print +{ + my ($self, $FILE) = @_; + + my $version = $self->[_Version]; + my $encoding = $self->[_Encoding]; + my $standalone = $self->[_Standalone]; + $standalone = ($standalone ? "yes" : "no") if defined $standalone; + + $FILE->print ("print (" version=\"$version\"") if defined $version; + $FILE->print (" encoding=\"$encoding\"") if defined $encoding; + $FILE->print (" standalone=\"$standalone\"") if defined $standalone; + $FILE->print ("?>"); +} + +sub to_expat +{ + my ($self, $iter) = @_; + $iter->XMLDecl ($self->getVersion, $self->getEncoding, $self->getStandalone); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + $dtdh->xml_decl ( { Version => $self->getVersion, + Encoding => $self->getEncoding, + Standalone => $self->getStandalone } ); +} + +###################################################################### +package XML::DOM::DocumentFragment; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; + +sub new +{ + my ($class, $doc) = @_; + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_C] = new XML::DOM::NodeList; + $self; +} + +sub getNodeType +{ + DOCUMENT_FRAGMENT_NODE; +} + +sub getNodeName +{ + "#document-fragment"; +} + +sub cloneNode +{ + my ($self, $deep) = @_; + my $node = $self->[_Doc]->createDocumentFragment; + + $node->cloneChildren ($self, $deep); + $node; +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + 0; +} + +sub print +{ + my ($self, $FILE) = @_; + + for my $node (@{$self->[_C]}) + { + $node->print ($FILE); + } +} + +sub rejectChild +{ + my $t = $_[1]->getNodeType; + + $t != TEXT_NODE + && $t != ENTITY_REFERENCE_NODE + && $t != PROCESSING_INSTRUCTION_NODE + && $t != COMMENT_NODE + && $t != CDATA_SECTION_NODE + && $t != ELEMENT_NODE; +} + +sub isDocumentFragmentNode +{ + 1; +} + +###################################################################### +package XML::DOM::Document; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + XML::DOM::def_fields ("Doctype XmlDecl", "XML::DOM::Node"); +} + +use Carp; +use XML::DOM::NodeList; +use XML::DOM::DOMException; + +sub new +{ + my ($class) = @_; + my $self = bless [], $class; + + # keep Doc pointer, even though getOwnerDocument returns undef + $self->[_Doc] = $self; + $self->[_C] = new XML::DOM::NodeList; + $self; +} + +sub getNodeType +{ + DOCUMENT_NODE; +} + +sub getNodeName +{ + "#document"; +} + +#?? not sure about keeping a fixed order of these nodes.... +sub getDoctype +{ + $_[0]->[_Doctype]; +} + +sub getDocumentElement +{ + my ($self) = @_; + for my $kid (@{$self->[_C]}) + { + return $kid if $kid->isElementNode; + } + undef; +} + +sub getOwnerDocument +{ + undef; +} + +sub getImplementation +{ + $XML::DOM::DOMImplementation::Singleton; +} + +# +# Added extra parameters ($val, $specified) that are passed straight to the +# Attr constructor +# +sub createAttribute +{ + new XML::DOM::Attr (@_); +} + +sub createCDATASection +{ + new XML::DOM::CDATASection (@_); +} + +sub createComment +{ + new XML::DOM::Comment (@_); + +} + +sub createElement +{ + new XML::DOM::Element (@_); +} + +sub createTextNode +{ + new XML::DOM::Text (@_); +} + +sub createProcessingInstruction +{ + new XML::DOM::ProcessingInstruction (@_); +} + +sub createEntityReference +{ + new XML::DOM::EntityReference (@_); +} + +sub createDocumentFragment +{ + new XML::DOM::DocumentFragment (@_); +} + +sub createDocumentType +{ + new XML::DOM::DocumentType (@_); +} + +sub cloneNode +{ + my ($self, $deep) = @_; + my $node = new XML::DOM::Document; + + $node->cloneChildren ($self, $deep); + + my $xmlDecl = $self->[_XmlDecl]; + $node->[_XmlDecl] = $xmlDecl->cloneNode ($deep) if defined $xmlDecl; + + $node; +} + +sub appendChild +{ + my ($self, $node) = @_; + + # Extra check: make sure we don't end up with more than one Element. + # Don't worry about multiple DocType nodes, because DocumentFragment + # can't contain DocType nodes. + + my @nodes = ($node); + @nodes = @{$node->[_C]} + if $node->getNodeType == DOCUMENT_FRAGMENT_NODE; + + my $elem = 0; + for my $n (@nodes) + { + $elem++ if $n->isElementNode; + } + + if ($elem > 0 && defined ($self->getDocumentElement)) + { + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "document can have only one Element"); + } + $self->SUPER::appendChild ($node); +} + +sub insertBefore +{ + my ($self, $node, $refNode) = @_; + + # Extra check: make sure sure we don't end up with more than 1 Elements. + # Don't worry about multiple DocType nodes, because DocumentFragment + # can't contain DocType nodes. + + my @nodes = ($node); + @nodes = @{$node->[_C]} + if $node->getNodeType == DOCUMENT_FRAGMENT_NODE; + + my $elem = 0; + for my $n (@nodes) + { + $elem++ if $n->isElementNode; + } + + if ($elem > 0 && defined ($self->getDocumentElement)) + { + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "document can have only one Element"); + } + $self->SUPER::insertBefore ($node, $refNode); +} + +sub replaceChild +{ + my ($self, $node, $refNode) = @_; + + # Extra check: make sure sure we don't end up with more than 1 Elements. + # Don't worry about multiple DocType nodes, because DocumentFragment + # can't contain DocType nodes. + + my @nodes = ($node); + @nodes = @{$node->[_C]} + if $node->getNodeType == DOCUMENT_FRAGMENT_NODE; + + my $elem = 0; + $elem-- if $refNode->isElementNode; + + for my $n (@nodes) + { + $elem++ if $n->isElementNode; + } + + if ($elem > 0 && defined ($self->getDocumentElement)) + { + croak new XML::DOM::DOMException (HIERARCHY_REQUEST_ERR, + "document can have only one Element"); + } + $self->SUPER::appendChild ($node, $refNode); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + 0; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $xmlDecl = $self->getXMLDecl; + if (defined $xmlDecl) + { + $xmlDecl->print ($FILE); + $FILE->print ("\x0A"); + } + + for my $node (@{$self->[_C]}) + { + $node->print ($FILE); + $FILE->print ("\x0A"); + } +} + +sub setDoctype +{ + my ($self, $doctype) = @_; + my $oldDoctype = $self->[_Doctype]; + if (defined $oldDoctype) + { + $self->replaceChild ($doctype, $oldDoctype); + } + else + { +#?? before root element, but after XmlDecl ! + $self->appendChild ($doctype); + } + $_[0]->[_Doctype] = $_[1]; +} + +sub removeDoctype +{ + my $self = shift; + my $doctype = $self->removeChild ($self->[_Doctype]); + + undef $self->[_Doctype]; # was delete + $doctype; +} + +sub rejectChild +{ + my $t = $_[1]->getNodeType; + $t != ELEMENT_NODE + && $t != PROCESSING_INSTRUCTION_NODE + && $t != COMMENT_NODE + && $t != DOCUMENT_TYPE_NODE; +} + +sub expandEntity +{ + my ($self, $ent, $param) = @_; + my $doctype = $self->getDoctype; + + (defined $doctype) ? $doctype->expandEntity ($ent, $param) : undef; +} + +sub getDefaultAttrValue +{ + my ($self, $elem, $attr) = @_; + + my $doctype = $self->getDoctype; + + (defined $doctype) ? $doctype->getDefaultAttrValue ($elem, $attr) : undef; +} + +sub getEntity +{ + my ($self, $entity) = @_; + + my $doctype = $self->getDoctype; + + (defined $doctype) ? $doctype->getEntity ($entity) : undef; +} + +sub dispose +{ + my $self = shift; + + $self->[_XmlDecl]->dispose if defined $self->[_XmlDecl]; + undef $self->[_XmlDecl]; # was delete + undef $self->[_Doctype]; # was delete + $self->SUPER::dispose; +} + +sub setOwnerDocument +{ + # Do nothing, you can't change the owner document! +#?? could throw exception... +} + +sub getXMLDecl +{ + $_[0]->[_XmlDecl]; +} + +sub setXMLDecl +{ + $_[0]->[_XmlDecl] = $_[1]; +} + +sub createXMLDecl +{ + new XML::DOM::XMLDecl (@_); +} + +sub createNotation +{ + new XML::DOM::Notation (@_); +} + +sub createElementDecl +{ + new XML::DOM::ElementDecl (@_); +} + +sub createAttlistDecl +{ + new XML::DOM::AttlistDecl (@_); +} + +sub createEntity +{ + new XML::DOM::Entity (@_); +} + +sub createChecker +{ + my $self = shift; + my $checker = XML::Checker->new; + + $checker->Init; + my $doctype = $self->getDoctype; + $doctype->to_expat ($checker) if $doctype; + $checker->Final; + + $checker; +} + +sub check +{ + my ($self, $checker) = @_; + $checker ||= XML::Checker->new; + + $self->to_expat ($checker); +} + +sub to_expat +{ + my ($self, $iter) = @_; + + $iter->Init; + + for my $kid ($self->getChildNodes) + { + $kid->to_expat ($iter); + } + $iter->Final; +} + +sub check_sax +{ + my ($self, $checker) = @_; + $checker ||= XML::Checker->new; + + $self->to_sax (Handler => $checker); +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + + $doch->start_document; + + for my $kid ($self->getChildNodes) + { + $kid->_to_sax ($doch, $dtdh, $enth); + } + $doch->end_document; +} + +###################################################################### +package XML::DOM::DocumentType; +###################################################################### +use vars qw{ @ISA @EXPORT_OK %EXPORT_TAGS %HFIELDS }; + +BEGIN +{ + import XML::DOM::Node qw( :DEFAULT :Fields ); + import XML::DOM::Document qw( :Fields ); + XML::DOM::def_fields ("Entities Notations Name SysId PubId Internal", "XML::DOM::Node"); +} + +use XML::DOM::DOMException; +use XML::DOM::NamedNodeMap; + +sub new +{ + my $class = shift; + my $doc = shift; + + my $self = bless [], $class; + + $self->[_Doc] = $doc; + $self->[_ReadOnly] = 1; + $self->[_C] = new XML::DOM::NodeList; + + $self->[_Entities] = new XML::DOM::NamedNodeMap (Doc => $doc, + Parent => $self, + ReadOnly => 1); + $self->[_Notations] = new XML::DOM::NamedNodeMap (Doc => $doc, + Parent => $self, + ReadOnly => 1); + $self->setParams (@_); + $self; +} + +sub getNodeType +{ + DOCUMENT_TYPE_NODE; +} + +sub getNodeName +{ + $_[0]->[_Name]; +} + +sub getName +{ + $_[0]->[_Name]; +} + +sub getEntities +{ + $_[0]->[_Entities]; +} + +sub getNotations +{ + $_[0]->[_Notations]; +} + +sub setParentNode +{ + my ($self, $parent) = @_; + $self->SUPER::setParentNode ($parent); + + $parent->[_Doctype] = $self + if $parent->getNodeType == DOCUMENT_NODE; +} + +sub cloneNode +{ + my ($self, $deep) = @_; + + my $node = new XML::DOM::DocumentType ($self->[_Doc], $self->[_Name], + $self->[_SysId], $self->[_PubId], + $self->[_Internal]); + +#?? does it make sense to make a shallow copy? + + # clone the NamedNodeMaps + $node->[_Entities] = $self->[_Entities]->cloneNode ($deep); + + $node->[_Notations] = $self->[_Notations]->cloneNode ($deep); + + $node->cloneChildren ($self, $deep); + + $node; +} + +#------------------------------------------------------------ +# Extra method implementations + +sub getSysId +{ + $_[0]->[_SysId]; +} + +sub getPubId +{ + $_[0]->[_PubId]; +} + +sub getInternal +{ + $_[0]->[_Internal]; +} + +sub setSysId +{ + $_[0]->[_SysId] = $_[1]; +} + +sub setPubId +{ + $_[0]->[_PubId] = $_[1]; +} + +sub setInternal +{ + $_[0]->[_Internal] = $_[1]; +} + +sub setName +{ + $_[0]->[_Name] = $_[1]; +} + +sub removeChildHoodMemories +{ + my ($self, $dontWipeReadOnly) = @_; + + my $parent = $self->[_Parent]; + if (defined $parent && $parent->getNodeType == DOCUMENT_NODE) + { + undef $parent->[_Doctype]; # was delete + } + $self->SUPER::removeChildHoodMemories; +} + +sub dispose +{ + my $self = shift; + + $self->[_Entities]->dispose; + $self->[_Notations]->dispose; + $self->SUPER::dispose; +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + $self->SUPER::setOwnerDocument ($doc); + + $self->[_Entities]->setOwnerDocument ($doc); + $self->[_Notations]->setOwnerDocument ($doc); +} + +sub expandEntity +{ + my ($self, $ent, $param) = @_; + + my $kid = $self->[_Entities]->getNamedItem ($ent); + return $kid->getValue + if (defined ($kid) && $param == $kid->isParameterEntity); + + undef; # entity not found +} + +sub getAttlistDecl +{ + my ($self, $elemName) = @_; + for my $kid (@{$_[0]->[_C]}) + { + return $kid if ($kid->getNodeType == ATTLIST_DECL_NODE && + $kid->getName eq $elemName); + } + undef; # not found +} + +sub getElementDecl +{ + my ($self, $elemName) = @_; + for my $kid (@{$_[0]->[_C]}) + { + return $kid if ($kid->getNodeType == ELEMENT_DECL_NODE && + $kid->getName eq $elemName); + } + undef; # not found +} + +sub addElementDecl +{ + my ($self, $name, $model, $hidden) = @_; + my $node = $self->getElementDecl ($name); + +#?? could warn + unless (defined $node) + { + $node = $self->[_Doc]->createElementDecl ($name, $model, $hidden); + $self->appendChild ($node); + } + $node; +} + +sub addAttlistDecl +{ + my ($self, $name) = @_; + my $node = $self->getAttlistDecl ($name); + + unless (defined $node) + { + $node = $self->[_Doc]->createAttlistDecl ($name); + $self->appendChild ($node); + } + $node; +} + +sub addNotation +{ + my $self = shift; + my $node = $self->[_Doc]->createNotation (@_); + $self->[_Notations]->setNamedItem ($node); + $node; +} + +sub addEntity +{ + my $self = shift; + my $node = $self->[_Doc]->createEntity (@_); + + $self->[_Entities]->setNamedItem ($node); + $node; +} + +# All AttDefs for a certain Element are merged into a single ATTLIST +sub addAttDef +{ + my $self = shift; + my $elemName = shift; + + # create the AttlistDecl if it doesn't exist yet + my $attListDecl = $self->addAttlistDecl ($elemName); + $attListDecl->addAttDef (@_); +} + +sub getDefaultAttrValue +{ + my ($self, $elem, $attr) = @_; + my $elemNode = $self->getAttlistDecl ($elem); + (defined $elemNode) ? $elemNode->getDefaultAttrValue ($attr) : undef; +} + +sub getEntity +{ + my ($self, $entity) = @_; + $self->[_Entities]->getNamedItem ($entity); +} + +sub setParams +{ + my ($self, $name, $sysid, $pubid, $internal) = @_; + + $self->[_Name] = $name; + +#?? not sure if we need to hold on to these... + $self->[_SysId] = $sysid if defined $sysid; + $self->[_PubId] = $pubid if defined $pubid; + $self->[_Internal] = $internal if defined $internal; + + $self; +} + +sub rejectChild +{ + # DOM Spec says: DocumentType -- no children + not $XML::DOM::IgnoreReadOnly; +} + +sub print +{ + my ($self, $FILE) = @_; + + my $name = $self->[_Name]; + + my $sysId = $self->[_SysId]; + my $pubId = $self->[_PubId]; + + $FILE->print ("print (" PUBLIC \"$pubId\" \"$sysId\""); + } + elsif (defined $sysId) + { + $FILE->print (" SYSTEM \"$sysId\""); + } + + my @entities = @{$self->[_Entities]->getValues}; + my @notations = @{$self->[_Notations]->getValues}; + my @kids = @{$self->[_C]}; + + if (@entities || @notations || @kids) + { + $FILE->print (" [\x0A"); + + for my $kid (@entities) + { + next if $kid->[_Hidden]; + + $FILE->print (" "); + $kid->print ($FILE); + $FILE->print ("\x0A"); + } + + for my $kid (@notations) + { + next if $kid->[_Hidden]; + + $FILE->print (" "); + $kid->print ($FILE); + $FILE->print ("\x0A"); + } + + for my $kid (@kids) + { + next if $kid->[_Hidden]; + + $FILE->print (" "); + $kid->print ($FILE); + $FILE->print ("\x0A"); + } + $FILE->print ("]"); + } + $FILE->print (">"); +} + +sub to_expat +{ + my ($self, $iter) = @_; + + $iter->Doctype ($self->getName, $self->getSysId, $self->getPubId, $self->getInternal); + + for my $ent ($self->getEntities->getValues) + { + next if $ent->[_Hidden]; + $ent->to_expat ($iter); + } + + for my $nota ($self->getNotations->getValues) + { + next if $nota->[_Hidden]; + $nota->to_expat ($iter); + } + + for my $kid ($self->getChildNodes) + { + next if $kid->[_Hidden]; + $kid->to_expat ($iter); + } +} + +sub _to_sax +{ + my ($self, $doch, $dtdh, $enth) = @_; + + $dtdh->doctype_decl ( { Name => $self->getName, + SystemId => $self->getSysId, + PublicId => $self->getPubId, + Internal => $self->getInternal }); + + for my $ent ($self->getEntities->getValues) + { + next if $ent->[_Hidden]; + $ent->_to_sax ($doch, $dtdh, $enth); + } + + for my $nota ($self->getNotations->getValues) + { + next if $nota->[_Hidden]; + $nota->_to_sax ($doch, $dtdh, $enth); + } + + for my $kid ($self->getChildNodes) + { + next if $kid->[_Hidden]; + $kid->_to_sax ($doch, $dtdh, $enth); + } +} + +###################################################################### +package XML::DOM::Parser; +###################################################################### +use vars qw ( @ISA ); +@ISA = qw( XML::Parser ); + +sub new +{ + my ($class, %args) = @_; + + $args{Style} = 'Dom'; + $class->SUPER::new (%args); +} + +# This method needed to be overriden so we can restore some global +# variables when an exception is thrown +sub parse +{ + my $self = shift; + + local $XML::Parser::Dom::_DP_doc; + local $XML::Parser::Dom::_DP_elem; + local $XML::Parser::Dom::_DP_doctype; + local $XML::Parser::Dom::_DP_in_prolog; + local $XML::Parser::Dom::_DP_end_doc; + local $XML::Parser::Dom::_DP_saw_doctype; + local $XML::Parser::Dom::_DP_in_CDATA; + local $XML::Parser::Dom::_DP_keep_CDATA; + local $XML::Parser::Dom::_DP_last_text; + + + # Temporarily disable checks that Expat already does (for performance) + local $XML::DOM::SafeMode = 0; + # Temporarily disable ReadOnly checks + local $XML::DOM::IgnoreReadOnly = 1; + + my $ret; + eval { + $ret = $self->SUPER::parse (@_); + }; + my $err = $@; + + if ($err) + { + my $doc = $XML::Parser::Dom::_DP_doc; + if ($doc) + { + $doc->dispose; + } + die $err; + } + + $ret; +} + +my $LWP_USER_AGENT; +sub set_LWP_UserAgent +{ + $LWP_USER_AGENT = shift; +} + +sub parsefile +{ + my $self = shift; + my $url = shift; + + # Any other URL schemes? + if ($url =~ /^(https?|ftp|wais|gopher|file):/) + { + # Read the file from the web with LWP. + # + # Note that we read in the entire file, which may not be ideal + # for large files. LWP::UserAgent also provides a callback style + # request, which we could convert to a stream with a fork()... + + my $result; + eval + { + use LWP::UserAgent; + + my $ua = $self->{LWP_UserAgent}; + unless (defined $ua) + { + unless (defined $LWP_USER_AGENT) + { + $LWP_USER_AGENT = LWP::UserAgent->new; + + # Load proxy settings from environment variables, i.e.: + # http_proxy, ftp_proxy, no_proxy etc. (see LWP::UserAgent(3)) + # You need these to go thru firewalls. + $LWP_USER_AGENT->env_proxy; + } + $ua = $LWP_USER_AGENT; + } + my $req = new HTTP::Request 'GET', $url; + my $response = $LWP_USER_AGENT->request ($req); + + # Parse the result of the HTTP request + $result = $self->parse ($response->content, @_); + }; + if ($@) + { + die "Couldn't parsefile [$url] with LWP: $@"; + } + return $result; + } + else + { + return $self->SUPER::parsefile ($url, @_); + } +} + +###################################################################### +package XML::Parser::Dom; +###################################################################### + +BEGIN +{ + import XML::DOM::Node qw( :Fields ); + import XML::DOM::CharacterData qw( :Fields ); +} + +use vars qw( $_DP_doc + $_DP_elem + $_DP_doctype + $_DP_in_prolog + $_DP_end_doc + $_DP_saw_doctype + $_DP_in_CDATA + $_DP_keep_CDATA + $_DP_last_text + $_DP_level + $_DP_expand_pent + ); + +# This adds a new Style to the XML::Parser class. +# From now on you can say: $parser = new XML::Parser ('Style' => 'Dom' ); +# but that is *NOT* how a regular user should use it! +$XML::Parser::Built_In_Styles{Dom} = 1; + +sub Init +{ + $_DP_elem = $_DP_doc = new XML::DOM::Document(); + $_DP_doctype = new XML::DOM::DocumentType ($_DP_doc); + $_DP_doc->setDoctype ($_DP_doctype); + $_DP_keep_CDATA = $_[0]->{KeepCDATA}; + + # Prepare for document prolog + $_DP_in_prolog = 1; + + # We haven't passed the root element yet + $_DP_end_doc = 0; + + # Expand parameter entities in the DTD by default + + $_DP_expand_pent = defined $_[0]->{ExpandParamEnt} ? + $_[0]->{ExpandParamEnt} : 1; + if ($_DP_expand_pent) + { + $_[0]->{DOM_Entity} = {}; + } + + $_DP_level = 0; + + undef $_DP_last_text; +} + +sub Final +{ + unless ($_DP_saw_doctype) + { + my $doctype = $_DP_doc->removeDoctype; + $doctype->dispose; + } + $_DP_doc; +} + +sub Char +{ + my $str = $_[1]; + + if ($_DP_in_CDATA && $_DP_keep_CDATA) + { + undef $_DP_last_text; + # Merge text with previous node if possible + $_DP_elem->addCDATA ($str); + } + else + { + # Merge text with previous node if possible + # Used to be: $expat->{DOM_Element}->addText ($str); + if ($_DP_last_text) + { + $_DP_last_text->[_Data] .= $str; + } + else + { + $_DP_last_text = $_DP_doc->createTextNode ($str); + $_DP_last_text->[_Parent] = $_DP_elem; + push @{$_DP_elem->[_C]}, $_DP_last_text; + } + } +} + +sub Start +{ + my ($expat, $elem, @attr) = @_; + my $parent = $_DP_elem; + my $doc = $_DP_doc; + + if ($parent == $doc) + { + # End of document prolog, i.e. start of first Element + $_DP_in_prolog = 0; + } + + undef $_DP_last_text; + my $node = $doc->createElement ($elem); + $_DP_elem = $node; + $parent->appendChild ($node); + + my $n = @attr; + return unless $n; + + # Add attributes + my $first_default = $expat->specified_attr; + my $i = 0; + while ($i < $n) + { + my $specified = $i < $first_default; + my $name = $attr[$i++]; + undef $_DP_last_text; + my $attr = $doc->createAttribute ($name, $attr[$i++], $specified); + $node->setAttributeNode ($attr); + } +} + +sub End +{ + $_DP_elem = $_DP_elem->[_Parent]; + undef $_DP_last_text; + + # Check for end of root element + $_DP_end_doc = 1 if ($_DP_elem == $_DP_doc); +} + +# Called at end of file, i.e. whitespace following last closing tag +# Also for Entity references +# May also be called at other times... +sub Default +{ + my ($expat, $str) = @_; + +# shift; deb ("Default", @_); + + if ($_DP_in_prolog) # still processing Document prolog... + { +#?? could try to store this text later +#?? I've only seen whitespace here so far + } + elsif (!$_DP_end_doc) # ignore whitespace at end of Document + { +# if ($expat->{NoExpand}) +# { + $str =~ /^&(.+);$/os; + return unless defined ($1); + # Got a TextDecl () from an external entity here once + + $_DP_elem->appendChild ( + $_DP_doc->createEntityReference ($1)); + undef $_DP_last_text; +# } +# else +# { +# $expat->{DOM_Element}->addText ($str); +# } + } +} + +# XML::Parser 2.19 added support for CdataStart and CdataEnd handlers +# If they are not defined, the Default handler is called instead +# with the text "createComment ($_[1]); + $_DP_elem->appendChild ($comment); + } +} + +sub deb +{ +# return; + + my $name = shift; + print "$name (" . join(",", map {defined($_)?$_ : "(undef)"} @_) . ")\n"; +} + +sub Doctype +{ + my $expat = shift; +# deb ("Doctype", @_); + + $_DP_doctype->setParams (@_); + $_DP_saw_doctype = 1; +} + +sub Attlist +{ + my $expat = shift; +# deb ("Attlist", @_); + + $_[5] = "Hidden" unless $_DP_expand_pent || $_DP_level == 0; + $_DP_doctype->addAttDef (@_); +} + +sub XMLDecl +{ + my $expat = shift; +# deb ("XMLDecl", @_); + + undef $_DP_last_text; + $_DP_doc->setXMLDecl (new XML::DOM::XMLDecl ($_DP_doc, @_)); +} + +sub Entity +{ + my $expat = shift; +# deb ("Entity", @_); + + # Parameter Entities names are passed starting with '%' + my $parameter = 0; + if ($_[0] =~ /^%(.*)/s) + { + $_[0] = $1; + $parameter = 1; + + if (defined $_[2]) # was sysid specified? + { + # Store the Entity mapping for use in ExternEnt + if (exists $expat->{DOM_Entity}->{$_[2]}) + { + # If this ever happens, the name of entity may be the wrong one + # when writing out the Document. + XML::DOM::warning ("Entity $_[2] is known as %$_[0] and %" . + $expat->{DOM_Entity}->{$_[2]}); + } + else + { + $expat->{DOM_Entity}->{$_[2]} = $_[0]; + } + #?? remove this block when XML::Parser has better support + } + } + + undef $_DP_last_text; + + $_[5] = "Hidden" unless $_DP_expand_pent || $_DP_level == 0; + $_DP_doctype->addEntity ($parameter, @_); +} + +# +# Unparsed is called when it encounters e.g: +# +# +# +sub Unparsed +{ + Entity (@_); # same as regular ENTITY, as far as DOM is concerned +} + +sub Element +{ + shift; +# deb ("Element", @_); + + undef $_DP_last_text; + push @_, "Hidden" unless $_DP_expand_pent || $_DP_level == 0; + $_DP_doctype->addElementDecl (@_); +} + +sub Notation +{ + shift; +# deb ("Notation", @_); + + undef $_DP_last_text; + $_[4] = "Hidden" unless $_DP_expand_pent || $_DP_level == 0; + $_DP_doctype->addNotation (@_); +} + +sub Proc +{ + shift; +# deb ("Proc", @_); + + undef $_DP_last_text; + push @_, "Hidden" unless $_DP_expand_pent || $_DP_level == 0; + $_DP_elem->appendChild ($_DP_doc->createProcessingInstruction (@_)); +} + +# +# ExternEnt is called when an external entity, such as: +# +# +# +# is referenced in the document, e.g. with: &externalEntity; +# If ExternEnt is not specified, the entity reference is passed to the Default +# handler as e.g. "&externalEntity;", where an EntityReference object is added. +# +# Also for %externalEntity; references in the DTD itself. +# +# It can also be called when XML::Parser parses the DOCTYPE header +# (just before calling the DocType handler), when it contains a +# reference like "docbook.dtd" below: +# +# {DOM_Entity}->{$sysid}; + if (defined $entname) + { + $_DP_doctype->appendChild ($_DP_doc->createEntityReference ($entname, 1)); + # Wrap the contents in special comments, so we know when we reach the + # end of parsing the entity. This way we can omit the contents from + # the DTD, when ExpandParamEnt is set to 0. + + return "" . + $content . ""; + } + else + { + # We either read the entity ref'd by the system id in the + # header, or the entity was undefined. + # In either case, don't bother with maintaining the entity + # reference, just expand the contents. + return "" . + $content . ""; + } + } +} + +1; # module return code + +__END__ + +=head1 NAME + +XML::DOM - A perl module for building DOM Level 1 compliant document structures + +=head1 SYNOPSIS + + use XML::DOM; + + my $parser = new XML::DOM::Parser; + my $doc = $parser->parsefile ("file.xml"); + + # print all HREF attributes of all CODEBASE elements + my $nodes = $doc->getElementsByTagName ("CODEBASE"); + my $n = $nodes->getLength; + + for (my $i = 0; $i < $n; $i++) + { + my $node = $nodes->item ($i); + my $href = $node->getAttributeNode ("HREF"); + print $href->getValue . "\n"; + } + + # Print doc file + $doc->printToFile ("out.xml"); + + # Print to string + print $doc->toString; + + # Avoid memory leaks - cleanup circular references for garbage collection + $doc->dispose; + +=head1 DESCRIPTION + +This module extends the XML::Parser module by Clark Cooper. +The XML::Parser module is built on top of XML::Parser::Expat, +which is a lower level interface to James Clark's expat library. + +XML::DOM::Parser is derived from XML::Parser. It parses XML strings or files +and builds a data structure that conforms to the API of the Document Object +Model as described at http://www.w3.org/TR/REC-DOM-Level-1. +See the XML::Parser manpage for other available features of the +XML::DOM::Parser class. +Note that the 'Style' property should not be used (it is set internally.) + +The XML::Parser I option is more or less supported, in that it will +generate EntityReference objects whenever an entity reference is encountered +in character data. I'm not sure how useful this is. Any comments are welcome. + +As described in the synopsis, when you create an XML::DOM::Parser object, +the parse and parsefile methods create an I object +from the specified input. This Document object can then be examined, modified and +written back out to a file or converted to a string. + +When using XML::DOM with XML::Parser version 2.19 and up, setting the +XML::DOM::Parser option I to 1 will store CDATASections in +CDATASection nodes, instead of converting them to Text nodes. +Subsequent CDATASection nodes will be merged into one. Let me know if this +is a problem. + +When using XML::Parser 2.27 and above, you can suppress expansion of +parameter entity references (e.g. %pent;) in the DTD, by setting I +to 1 and I to 0. See L for details. + +A Document has a tree structure consisting of I objects. A Node may contain +other nodes, depending on its type. +A Document may have Element, Text, Comment, and CDATASection nodes. +Element nodes may have Attr, Element, Text, Comment, and CDATASection nodes. +The other nodes may not have any child nodes. + +This module adds several node types that are not part of the DOM spec (yet.) +These are: ElementDecl (for declarations), AttlistDecl (for + declarations), XMLDecl (for declarations) and AttDef +(for attribute definitions in an AttlistDecl.) + +=head1 XML::DOM Classes + +The XML::DOM module stores XML documents in a tree structure with a root node +of type XML::DOM::Document. Different nodes in tree represent different +parts of the XML file. The DOM Level 1 Specification defines the following +node types: + +=over 4 + +=item * L - Super class of all node types + +=item * L - The root of the XML document + +=item * L - Describes the document structure: + +=item * L - An XML element: ... + +=item * L - An XML element attribute: name="value" + +=item * L - Super class of Text, Comment and CDATASection + +=item * L - Text in an XML element + +=item * L - Escaped block of text: + +=item * L - An XML comment: + +=item * L - Refers to an ENTITY: &ent; or %ent; + +=item * L - An ENTITY definition: + +=item * L - + +=item * L - Lightweight node for cut & paste + +=item * L - An NOTATION definition: + +=back + +In addition, the XML::DOM module contains the following nodes that are not part +of the DOM Level 1 Specification: + +=over 4 + +=item * L - Defines an element: + +=item * L - Defines one or more attributes in an + +=item * L - Defines one attribute in an + +=item * L - An XML declaration: + +=back + +Other classes that are part of the DOM Level 1 Spec: + +=over 4 + +=item * L - Provides information about this implementation. Currently it doesn't do much. + +=item * L - Used internally to store a node's child nodes. Also returned by getElementsByTagName. + +=item * L - Used internally to store an element's attributes. + +=back + +Other classes that are not part of the DOM Level 1 Spec: + +=over 4 + +=item * L - An non-validating XML parser that creates XML::DOM::Documents + +=item * L - A validating XML parser that creates XML::DOM::Documents. It uses L to check against the DocumentType (DTD) + +=item * L - A PerlSAX handler that creates XML::DOM::Documents. + +=back + +=head1 XML::DOM package + +=over 4 + +=item Constant definitions + +The following predefined constants indicate which type of node it is. + +=back + + UNKNOWN_NODE (0) The node type is unknown (not part of DOM) + + ELEMENT_NODE (1) The node is an Element. + ATTRIBUTE_NODE (2) The node is an Attr. + TEXT_NODE (3) The node is a Text node. + CDATA_SECTION_NODE (4) The node is a CDATASection. + ENTITY_REFERENCE_NODE (5) The node is an EntityReference. + ENTITY_NODE (6) The node is an Entity. + PROCESSING_INSTRUCTION_NODE (7) The node is a ProcessingInstruction. + COMMENT_NODE (8) The node is a Comment. + DOCUMENT_NODE (9) The node is a Document. + DOCUMENT_TYPE_NODE (10) The node is a DocumentType. + DOCUMENT_FRAGMENT_NODE (11) The node is a DocumentFragment. + NOTATION_NODE (12) The node is a Notation. + + ELEMENT_DECL_NODE (13) The node is an ElementDecl (not part of DOM) + ATT_DEF_NODE (14) The node is an AttDef (not part of DOM) + XML_DECL_NODE (15) The node is an XMLDecl (not part of DOM) + ATTLIST_DECL_NODE (16) The node is an AttlistDecl (not part of DOM) + + Usage: + + if ($node->getNodeType == ELEMENT_NODE) + { + print "It's an Element"; + } + +B: The DOM Spec does not mention UNKNOWN_NODE and, +quite frankly, you should never encounter it. The last 4 node types were added +to support the 4 added node classes. + +=head2 Global Variables + +=over 4 + +=item $VERSION + +The variable $XML::DOM::VERSION contains the version number of this +implementation, e.g. "1.07". + +=back + +=head2 METHODS + +These methods are not part of the DOM Level 1 Specification. + +=over 4 + +=item getIgnoreReadOnly and ignoreReadOnly (readOnly) + +The DOM Level 1 Spec does not allow you to edit certain sections of the document, +e.g. the DocumentType, so by default this implementation throws DOMExceptions +(i.e. NO_MODIFICATION_ALLOWED_ERR) when you try to edit a readonly node. +These readonly checks can be disabled by (temporarily) setting the global +IgnoreReadOnly flag. + +The ignoreReadOnly method sets the global IgnoreReadOnly flag and returns its +previous value. The getIgnoreReadOnly method simply returns its current value. + + my $oldIgnore = XML::DOM::ignoreReadOnly (1); + eval { + ... do whatever you want, catching any other exceptions ... + }; + XML::DOM::ignoreReadOnly ($oldIgnore); # restore previous value + +Another way to do it, using a local variable: + + { # start new scope + local $XML::DOM::IgnoreReadOnly = 1; + ... do whatever you want, don't worry about exceptions ... + } # end of scope ($IgnoreReadOnly is set back to its previous value) + + +=item isValidName (name) + +Whether the specified name is a valid "Name" as specified in the XML spec. +Characters with Unicode values > 127 are now also supported. + +=item getAllowReservedNames and allowReservedNames (boolean) + +The first method returns whether reserved names are allowed. +The second takes a boolean argument and sets whether reserved names are allowed. +The initial value is 1 (i.e. allow reserved names.) + +The XML spec states that "Names" starting with (X|x)(M|m)(L|l) +are reserved for future use. (Amusingly enough, the XML version of the XML spec +(REC-xml-19980210.xml) breaks that very rule by defining an ENTITY with the name +'xmlpio'.) +A "Name" in this context means the Name token as found in the BNF rules in the +XML spec. + +XML::DOM only checks for errors when you modify the DOM tree, not when the +DOM tree is built by the XML::DOM::Parser. + +=item setTagCompression (funcref) + +There are 3 possible styles for printing empty Element tags: + +=over 4 + +=item Style 0 + + or + +XML::DOM uses this style by default for all Elements. + +=item Style 1 + + or + +=item Style 2 + + or + +This style is sometimes desired when using XHTML. +(Note the extra space before the slash "/") +See L Appendix C for more details. + +=back + +By default XML::DOM compresses all empty Element tags (style 0.) +You can control which style is used for a particular Element by calling +XML::DOM::setTagCompression with a reference to a function that takes +2 arguments. The first is the tag name of the Element, the second is the +XML::DOM::Element that is being printed. +The function should return 0, 1 or 2 to indicate which style should be used to +print the empty tag. E.g. + + XML::DOM::setTagCompression (\&my_tag_compression); + + sub my_tag_compression + { + my ($tag, $elem) = @_; + + # Print empty br, hr and img tags like this:
+ return 2 if $tag =~ /^(br|hr|img)$/; + + # Print other empty tags like this: + return 1; + } + +=back + +=head1 IMPLEMENTATION DETAILS + +=over 4 + +=item * Perl Mappings + +The value undef was used when the DOM Spec said null. + +The DOM Spec says: Applications must encode DOMString using UTF-16 (defined in +Appendix C.3 of [UNICODE] and Amendment 1 of [ISO-10646]). +In this implementation we use plain old Perl strings encoded in UTF-8 instead of +UTF-16. + +=item * Text and CDATASection nodes + +The Expat parser expands EntityReferences and CDataSection sections to +raw strings and does not indicate where it was found. +This implementation does therefore convert both to Text nodes at parse time. +CDATASection and EntityReference nodes that are added to an existing Document +(by the user) will be preserved. + +Also, subsequent Text nodes are always merged at parse time. Text nodes that are +added later can be merged with the normalize method. Consider using the addText +method when adding Text nodes. + +=item * Printing and toString + +When printing (and converting an XML Document to a string) the strings have to +encoded differently depending on where they occur. E.g. in a CDATASection all +substrings are allowed except for "]]>". In regular text, certain characters are +not allowed, e.g. ">" has to be converted to ">". +These routines should be verified by someone who knows the details. + +=item * Quotes + +Certain sections in XML are quoted, like attribute values in an Element. +XML::Parser strips these quotes and the print methods in this implementation +always uses double quotes, so when parsing and printing a document, single quotes +may be converted to double quotes. The default value of an attribute definition +(AttDef) in an AttlistDecl, however, will maintain its quotes. + +=item * AttlistDecl + +Attribute declarations for a certain Element are always merged into a single +AttlistDecl object. + +=item * Comments + +Comments in the DOCTYPE section are not kept in the right place. They will become +child nodes of the Document. + +=item * Hidden Nodes + +Previous versions of XML::DOM would expand parameter entity references +(like B<%pent;>), so when printing the DTD, it would print the contents +of the external entity, instead of the parameter entity reference. +With this release (1.27), you can prevent this by setting the XML::DOM::Parser +options ParseParamEnt => 1 and ExpandParamEnt => 0. + +When it is parsing the contents of the external entities, it *DOES* still add +the nodes to the DocumentType, but it marks these nodes by setting +the 'Hidden' property. In addition, it adds an EntityReference node to the +DocumentType node. + +When printing the DocumentType node (or when using to_expat() or to_sax()), +the 'Hidden' nodes are suppressed, so you will see the parameter entity +reference instead of the contents of the external entities. See test case +t/dom_extent.t for an example. + +The reason for adding the 'Hidden' nodes to the DocumentType node, is that +the nodes may contain definitions that are referenced further +in the document. (Simply not adding the nodes to the DocumentType could +cause such entity references to be expanded incorrectly.) + +Note that you need XML::Parser 2.27 or higher for this to work correctly. + +=back + +=head1 SEE ALSO + +The Japanese version of this document by Takanori Kawai (Hippo2000) +at L + +The DOM Level 1 specification at L + +The XML spec (Extensible Markup Language 1.0) at L + +The L and L manual pages. + +=head1 CAVEATS + +The method getElementsByTagName() does not return a "live" NodeList. +Whether this is an actual caveat is debatable, but a few people on the +www-dom mailing list seemed to think so. I haven't decided yet. It's a pain +to implement, it slows things down and the benefits seem marginal. +Let me know what you think. + +(To subscribe to the www-dom mailing list send an email with the subject +"subscribe" to www-dom-request@w3.org. I only look here occasionally, so don't +send bug reports or suggestions about XML::DOM to this list, send them +to enno@att.com instead.) + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +Thanks to Clark Cooper for his help with the initial version. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/AttDef.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/AttDef.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,36 @@ +=head1 NAME + +XML::DOM::AttDef - A single XML attribute definition in an ATTLIST in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::AttDef extends L, but is not part of the DOM Level 1 +specification. + +Each object of this class represents one attribute definition in an AttlistDecl. + +=head2 METHODS + +=over 4 + +=item getName + +Returns the attribute name. + +=item getDefault + +Returns the default value, or undef. + +=item isFixed + +Whether the attribute value is fixed (see #FIXED keyword.) + +=item isRequired + +Whether the attribute value is required (see #REQUIRED keyword.) + +=item isImplied + +Whether the attribute value is implied (see #IMPLIED keyword.) + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/AttlistDecl.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/AttlistDecl.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,45 @@ +=head1 NAME + +XML::DOM::AttlistDecl - An XML ATTLIST declaration in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::AttlistDecl extends L but is not part of the +DOM Level 1 specification. + +This node represents an ATTLIST declaration, e.g. + + + +Each attribute definition is stored a separate AttDef node. The AttDef nodes can +be retrieved with getAttDef and added with addAttDef. +(The AttDef nodes are stored in a NamedNodeMap internally.) + +=head2 METHODS + +=over 4 + +=item getName + +Returns the Element tagName. + +=item getAttDef (attrName) + +Returns the AttDef node for the attribute with the specified name. + +=item addAttDef (attrName, type, default, [ fixed ]) + +Adds a AttDef node for the attribute with the specified name. + +Parameters: + I the attribute name. + I the attribute type (e.g. "CDATA" or "(male|female)".) + I the default value enclosed in quotes (!), the string #IMPLIED or + the string #REQUIRED. + I whether the attribute is '#FIXED' (default is 0.) + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Attr.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Attr.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,67 @@ +=head1 NAME + +XML::DOM::Attr - An XML attribute in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Attr extends L. + +The Attr nodes built by the XML::DOM::Parser always have one child node +which is a Text node containing the expanded string value (i.e. EntityReferences +are always expanded.) EntityReferences may be added when modifying or creating +a new Document. + +The Attr interface represents an attribute in an Element object. +Typically the allowable values for the attribute are defined in a +document type definition. + +Attr objects inherit the Node interface, but since they are not +actually child nodes of the element they describe, the DOM does not +consider them part of the document tree. Thus, the Node attributes +parentNode, previousSibling, and nextSibling have a undef value for Attr +objects. The DOM takes the view that attributes are properties of +elements rather than having a separate identity from the elements they +are associated with; this should make it more efficient to implement +such features as default attributes associated with all elements of a +given type. Furthermore, Attr nodes may not be immediate children of a +DocumentFragment. However, they can be associated with Element nodes +contained within a DocumentFragment. In short, users and implementors +of the DOM need to be aware that Attr nodes have some things in common +with other objects inheriting the Node interface, but they also are +quite distinct. + +The attribute's effective value is determined as follows: if this +attribute has been explicitly assigned any value, that value is the +attribute's effective value; otherwise, if there is a declaration for +this attribute, and that declaration includes a default value, then +that default value is the attribute's effective value; otherwise, the +attribute does not exist on this element in the structure model until +it has been explicitly added. Note that the nodeValue attribute on the +Attr instance can also be used to retrieve the string version of the +attribute's value(s). + +In XML, where the value of an attribute can contain entity references, +the child nodes of the Attr node provide a representation in which +entity references are not expanded. These child nodes may be either +Text or EntityReference nodes. Because the attribute type may be +unknown, there are no tokenized attribute values. + +=head2 METHODS + +=over 4 + +=item getValue + +On retrieval, the value of the attribute is returned as a string. +Character and general entity references are replaced with their values. + +=item setValue (str) + +DOM Spec: On setting, this creates a Text node with the unparsed contents of the +string. + +=item getName + +Returns the name of this attribute. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/CDATASection.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/CDATASection.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,31 @@ +=head1 NAME + +XML::DOM::CDATASection - Escaping XML text blocks in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::CDATASection extends L which extends +L. + +CDATA sections are used to escape blocks of text containing characters +that would otherwise be regarded as markup. The only delimiter that is +recognized in a CDATA section is the "]]>" string that ends the CDATA +section. CDATA sections can not be nested. The primary purpose is for +including material such as XML fragments, without needing to escape all +the delimiters. + +The DOMString attribute of the Text node holds the text that is +contained by the CDATA section. Note that this may contain characters +that need to be escaped outside of CDATA sections and that, depending +on the character encoding ("charset") chosen for serialization, it may +be impossible to write out some characters as part of a CDATA section. + +The CDATASection interface inherits the CharacterData interface through +the Text interface. Adjacent CDATASections nodes are not merged by use +of the Element.normalize() method. + +B XML::DOM::Parser and XML::DOM::ValParser convert all CDATASections +to regular text by default. +To preserve CDATASections, set the parser option KeepCDATA to 1. + + diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/CharacterData.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/CharacterData.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,87 @@ +=head1 NAME + +XML::DOM::CharacterData - Common interface for Text, CDATASections and Comments + +=head1 DESCRIPTION + +XML::DOM::CharacterData extends L + +The CharacterData interface extends Node with a set of attributes and +methods for accessing character data in the DOM. For clarity this set +is defined here rather than on each object that uses these attributes +and methods. No DOM objects correspond directly to CharacterData, +though Text, Comment and CDATASection do inherit the interface from it. +All offsets in this interface start from 0. + +=head2 METHODS + +=over 4 + +=item getData and setData (data) + +The character data of the node that implements this +interface. The DOM implementation may not put arbitrary +limits on the amount of data that may be stored in a +CharacterData node. However, implementation limits may mean +that the entirety of a node's data may not fit into a single +DOMString. In such cases, the user may call substringData to +retrieve the data in appropriately sized pieces. + +=item getLength + +The number of characters that are available through data and +the substringData method below. This may have the value zero, +i.e., CharacterData nodes may be empty. + +=item substringData (offset, count) + +Extracts a range of data from the node. + +Parameters: + I Start offset of substring to extract. + I The number of characters to extract. + +Return Value: The specified substring. If the sum of offset and count +exceeds the length, then all characters to the end of +the data are returned. + +=item appendData (str) + +Appends the string to the end of the character data of the +node. Upon success, data provides access to the concatenation +of data and the DOMString specified. + +=item insertData (offset, arg) + +Inserts a string at the specified character offset. + +Parameters: + I The character offset at which to insert. + I The DOMString to insert. + +=item deleteData (offset, count) + +Removes a range of characters from the node. +Upon success, data and length reflect the change. +If the sum of offset and count exceeds length then all characters +from offset to the end of the data are deleted. + +Parameters: + I The offset from which to remove characters. + I The number of characters to delete. + +=item replaceData (offset, count, arg) + +Replaces the characters starting at the specified character +offset with the specified string. + +Parameters: + I The offset from which to start replacing. + I The number of characters to replace. + I The DOMString with which the range must be replaced. + +If the sum of offset and count exceeds length, then all characters to the end of +the data are replaced (i.e., the effect is the same as a remove method call with +the same range, followed by an append method invocation). + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Comment.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Comment.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,14 @@ +=head1 NAME + +XML::DOM::Comment - An XML comment in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Comment extends L which extends +L. + +This node represents the content of a comment, i.e., all the characters +between the starting ''. Note that this is the +definition of a comment in XML, and, in practice, HTML, although some +HTML tools may implement the full SGML comment structure. + diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/DOMException.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/DOMException.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,88 @@ +###################################################################### +package XML::DOM::DOMException; +###################################################################### + +use Exporter; + +use overload '""' => \&stringify; +use vars qw ( @ISA @EXPORT @ErrorNames ); + +BEGIN +{ + @ISA = qw( Exporter ); + @EXPORT = qw( INDEX_SIZE_ERR + DOMSTRING_SIZE_ERR + HIERARCHY_REQUEST_ERR + WRONG_DOCUMENT_ERR + INVALID_CHARACTER_ERR + NO_DATA_ALLOWED_ERR + NO_MODIFICATION_ALLOWED_ERR + NOT_FOUND_ERR + NOT_SUPPORTED_ERR + INUSE_ATTRIBUTE_ERR + ); +} + +sub UNKNOWN_ERR () {0;} # not in the DOM Spec! +sub INDEX_SIZE_ERR () {1;} +sub DOMSTRING_SIZE_ERR () {2;} +sub HIERARCHY_REQUEST_ERR () {3;} +sub WRONG_DOCUMENT_ERR () {4;} +sub INVALID_CHARACTER_ERR () {5;} +sub NO_DATA_ALLOWED_ERR () {6;} +sub NO_MODIFICATION_ALLOWED_ERR () {7;} +sub NOT_FOUND_ERR () {8;} +sub NOT_SUPPORTED_ERR () {9;} +sub INUSE_ATTRIBUTE_ERR () {10;} + +@ErrorNames = ( + "UNKNOWN_ERR", + "INDEX_SIZE_ERR", + "DOMSTRING_SIZE_ERR", + "HIERARCHY_REQUEST_ERR", + "WRONG_DOCUMENT_ERR", + "INVALID_CHARACTER_ERR", + "NO_DATA_ALLOWED_ERR", + "NO_MODIFICATION_ALLOWED_ERR", + "NOT_FOUND_ERR", + "NOT_SUPPORTED_ERR", + "INUSE_ATTRIBUTE_ERR" + ); +sub new +{ + my ($type, $code, $msg) = @_; + my $self = bless {Code => $code}, $type; + + $self->{Message} = $msg if defined $msg; + +# print "=> Exception: " . $self->stringify . "\n"; + $self; +} + +sub getCode +{ + $_[0]->{Code}; +} + +#------------------------------------------------------------ +# Extra method implementations + +sub getName +{ + $ErrorNames[$_[0]->{Code}]; +} + +sub getMessage +{ + $_[0]->{Message}; +} + +sub stringify +{ + my $self = shift; + + "XML::DOM::DOMException(Code=" . $self->getCode . ", Name=" . + $self->getName . ", Message=" . $self->getMessage . ")"; +} + +1; # package return code diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/DOMImplementation.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/DOMImplementation.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,24 @@ +=head1 NAME + +XML::DOM::DOMImplementation - Information about XML::DOM implementation + +=head1 DESCRIPTION + +The DOMImplementation interface provides a number of methods for +performing operations that are independent of any particular instance +of the document object model. + +The DOM Level 1 does not specify a way of creating a document instance, +and hence document creation is an operation specific to an +implementation. Future Levels of the DOM specification are expected to +provide methods for creating documents directly. + +=head2 METHODS + +=over 4 + +=item hasFeature (feature, version) + +Returns 1 if and only if feature equals "XML" and version equals "1.0". + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Document.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Document.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,220 @@ +=head1 NAME + +XML::DOM::Document - An XML document node in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Document extends L. + +It is the main root of the XML document structure as returned by +XML::DOM::Parser::parse and XML::DOM::Parser::parsefile. + +Since elements, text nodes, comments, processing instructions, etc. +cannot exist outside the context of a Document, the Document interface +also contains the factory methods needed to create these objects. The +Node objects created have a getOwnerDocument method which associates +them with the Document within whose context they were created. + +=head2 METHODS + +=over 4 + +=item getDocumentElement + +This is a convenience method that allows direct access to +the child node that is the root Element of the document. + +=item getDoctype + +The Document Type Declaration (see DocumentType) associated +with this document. For HTML documents as well as XML +documents without a document type declaration this returns +undef. The DOM Level 1 does not support editing the Document +Type Declaration. + +B: This implementation allows editing the doctype. +See I for details. + +=item getImplementation + +The DOMImplementation object that handles this document. A +DOM application may use objects from multiple implementations. + +=item createElement (tagName) + +Creates an element of the type specified. Note that the +instance returned implements the Element interface, so +attributes can be specified directly on the returned object. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the tagName does not conform to the XML spec. + +=back + +=item createTextNode (data) + +Creates a Text node given the specified string. + +=item createComment (data) + +Creates a Comment node given the specified string. + +=item createCDATASection (data) + +Creates a CDATASection node given the specified string. + +=item createAttribute (name [, value [, specified ]]) + +Creates an Attr of the given name. Note that the Attr +instance can then be set on an Element using the setAttribute method. + +B: The DOM Spec does not allow passing the value or the +specified property in this method. In this implementation they are optional. + +Parameters: + I The attribute's value. See Attr::setValue for details. + If the value is not supplied, the specified property is set to 0. + I Whether the attribute value was specified or whether the default + value was used. If not supplied, it's assumed to be 1. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the name does not conform to the XML spec. + +=back + +=item createProcessingInstruction (target, data) + +Creates a ProcessingInstruction node given the specified name and data strings. + +Parameters: + I The target part of the processing instruction. + I The data for the node. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the target does not conform to the XML spec. + +=back + +=item createDocumentFragment + +Creates an empty DocumentFragment object. + +=item createEntityReference (name) + +Creates an EntityReference object. + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item getXMLDecl and setXMLDecl (xmlDecl) + +Returns the XMLDecl for this Document or undef if none was specified. +Note that XMLDecl is not part of the list of child nodes. + +=item setDoctype (doctype) + +Sets or replaces the DocumentType. +B: Don't use appendChild or insertBefore to set the DocumentType. +Even though doctype will be part of the list of child nodes, it is handled +specially. + +=item getDefaultAttrValue (elem, attr) + +Returns the default attribute value as a string or undef, if none is available. + +Parameters: + I The element tagName. + I The attribute name. + +=item getEntity (name) + +Returns the Entity with the specified name. + +=item createXMLDecl (version, encoding, standalone) + +Creates an XMLDecl object. All parameters may be undefined. + +=item createDocumentType (name, sysId, pubId) + +Creates a DocumentType object. SysId and pubId may be undefined. + +=item createNotation (name, base, sysId, pubId) + +Creates a new Notation object. Consider using +XML::DOM::DocumentType::addNotation! + +=item createEntity (parameter, notationName, value, sysId, pubId, ndata) + +Creates an Entity object. Consider using XML::DOM::DocumentType::addEntity! + +=item createElementDecl (name, model) + +Creates an ElementDecl object. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the element name (tagName) does not conform to the XML spec. + +=back + +=item createAttlistDecl (name) + +Creates an AttlistDecl object. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the element name (tagName) does not conform to the XML spec. + +=back + +=item expandEntity (entity [, parameter]) + +Expands the specified entity or parameter entity (if parameter=1) and returns +its value as a string, or undef if the entity does not exist. +(The entity name should not contain the '%', '&' or ';' delimiters.) + +=item check ( [$checker] ) + +Uses the specified L to validate the document. +If no XML::Checker is supplied, a new XML::Checker is created. +See L for details. + +=item check_sax ( [$checker] ) + +Similar to check() except it uses the SAX interface to XML::Checker instead of +the expat interface. This method may disappear or replace check() at some time. + +=item createChecker () + +Creates an XML::Checker based on the document's DTD. +The $checker can be reused to check any elements within the document. +Create a new L whenever the DOCTYPE section of the document +is altered! + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/DocumentFragment.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/DocumentFragment.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,40 @@ +=head1 NAME + +XML::DOM::DocumentFragment - Facilitates cut & paste in XML::DOM documents + +=head1 DESCRIPTION + +XML::DOM::DocumentFragment extends L + +DocumentFragment is a "lightweight" or "minimal" Document object. It is +very common to want to be able to extract a portion of a document's +tree or to create a new fragment of a document. Imagine implementing a +user command like cut or rearranging a document by moving fragments +around. It is desirable to have an object which can hold such fragments +and it is quite natural to use a Node for this purpose. While it is +true that a Document object could fulfil this role, a Document object +can potentially be a heavyweight object, depending on the underlying +implementation. What is really needed for this is a very lightweight +object. DocumentFragment is such an object. + +Furthermore, various operations -- such as inserting nodes as children +of another Node -- may take DocumentFragment objects as arguments; this +results in all the child nodes of the DocumentFragment being moved to +the child list of this node. + +The children of a DocumentFragment node are zero or more nodes +representing the tops of any sub-trees defining the structure of the +document. DocumentFragment nodes do not need to be well-formed XML +documents (although they do need to follow the rules imposed upon +well-formed XML parsed entities, which can have multiple top nodes). +For example, a DocumentFragment might have only one child and that +child node could be a Text node. Such a structure model represents +neither an HTML document nor a well-formed XML document. + +When a DocumentFragment is inserted into a Document (or indeed any +other Node that may take children) the children of the DocumentFragment +and not the DocumentFragment itself are inserted into the Node. This +makes the DocumentFragment very useful when the user wishes to create +nodes that are siblings; the DocumentFragment acts as the parent of +these nodes so that the user can use the standard methods from the Node +interface, such as insertBefore() and appendChild(). diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/DocumentType.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/DocumentType.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,182 @@ +=head1 NAME + +XML::DOM::DocumentType - An XML document type (DTD) in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::DocumentType extends L. + +Each Document has a doctype attribute whose value is either null or a +DocumentType object. The DocumentType interface in the DOM Level 1 Core +provides an interface to the list of entities that are defined for the +document, and little else because the effect of namespaces and the +various XML scheme efforts on DTD representation are not clearly +understood as of this writing. +The DOM Level 1 doesn't support editing DocumentType nodes. + +B: This implementation has added a lot of extra +functionality to the DOM Level 1 interface. +To allow editing of the DocumentType nodes, see XML::DOM::ignoreReadOnly. + +=head2 METHODS + +=over 4 + +=item getName + +Returns the name of the DTD, i.e. the name immediately following the +DOCTYPE keyword. + +=item getEntities + +A NamedNodeMap containing the general entities, both external +and internal, declared in the DTD. Duplicates are discarded. +For example in: + + + + + ]> + + +the interface provides access to foo and bar but not baz. +Every node in this map also implements the Entity interface. + +The DOM Level 1 does not support editing entities, therefore +entities cannot be altered in any way. + +B: See XML::DOM::ignoreReadOnly to edit the DocumentType etc. + +=item getNotations + +A NamedNodeMap containing the notations declared in the DTD. +Duplicates are discarded. Every node in this map also +implements the Notation interface. + +The DOM Level 1 does not support editing notations, therefore +notations cannot be altered in any way. + +B: See XML::DOM::ignoreReadOnly to edit the DocumentType etc. + +=head2 Additional methods not in the DOM Spec + +=item Creating and setting the DocumentType + +A new DocumentType can be created with: + + $doctype = $doc->createDocumentType ($name, $sysId, $pubId, $internal); + +To set (or replace) the DocumentType for a particular document, use: + + $doc->setDocType ($doctype); + +=item getSysId and setSysId (sysId) + +Returns or sets the system id. + +=item getPubId and setPubId (pudId) + +Returns or sets the public id. + +=item setName (name) + +Sets the name of the DTD, i.e. the name immediately following the +DOCTYPE keyword. Note that this should always be the same as the element +tag name of the root element. + +=item getAttlistDecl (elemName) + +Returns the AttlistDecl for the Element with the specified name, or undef. + +=item getElementDecl (elemName) + +Returns the ElementDecl for the Element with the specified name, or undef. + +=item getEntity (entityName) + +Returns the Entity with the specified name, or undef. + +=item addAttlistDecl (elemName) + +Adds a new AttDecl node with the specified elemName if one doesn't exist yet. +Returns the AttlistDecl (new or existing) node. + +=item addElementDecl (elemName, model) + +Adds a new ElementDecl node with the specified elemName and model if one doesn't +exist yet. +Returns the AttlistDecl (new or existing) node. The model is ignored if one +already existed. + +=item addEntity (parameter, notationName, value, sysId, pubId, ndata) + +Adds a new Entity node. Don't use createEntity and appendChild, because it should +be added to the internal NamedNodeMap containing the entities. + +Parameters: + I whether it is a parameter entity (%ent;) or not (&ent;). + I the entity name. + I the entity value. + I the system id (if any.) + I the public id (if any.) + I the NDATA declaration (if any, for general unparsed entities.) + +SysId, pubId and ndata may be undefined. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the notationName does not conform to the XML spec. + +=back + +=item addNotation (name, base, sysId, pubId) + +Adds a new Notation object. + +Parameters: + I the notation name. + I the base to be used for resolving a relative URI. + I the system id. + I the public id. + +Base, sysId, and pubId may all be undefined. +(These parameters are passed by the XML::Parser Notation handler.) + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the notationName does not conform to the XML spec. + +=back + +=item addAttDef (elemName, attrName, type, default, fixed) + +Adds a new attribute definition. It will add the AttDef node to the AttlistDecl +if it exists. If an AttDef with the specified attrName already exists for the +given elemName, this function only generates a warning. + +See XML::DOM::AttDef::new for the other parameters. + +=item getDefaultAttrValue (elem, attr) + +Returns the default attribute value as a string or undef, if none is available. + +Parameters: + I The element tagName. + I The attribute name. + +=item expandEntity (entity [, parameter]) + +Expands the specified entity or parameter entity (if parameter=1) and returns +its value as a string, or undef if the entity does not exist. +(The entity name should not contain the '%', '&' or ';' delimiters.) + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Element.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Element.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,189 @@ +=head1 NAME + +XML::DOM::Element - An XML element node in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Element extends L. + +By far the vast majority of objects (apart from text) that authors +encounter when traversing a document are Element nodes. Assume the +following XML document: + + + + + + +When represented using DOM, the top node is an Element node for +"elementExample", which contains two child Element nodes, one for +"subelement1" and one for "subelement2". "subelement1" contains no +child nodes. + +Elements may have attributes associated with them; since the Element +interface inherits from Node, the generic Node interface method +getAttributes may be used to retrieve the set of all attributes for an +element. There are methods on the Element interface to retrieve either +an Attr object by name or an attribute value by name. In XML, where an +attribute value may contain entity references, an Attr object should be +retrieved to examine the possibly fairly complex sub-tree representing +the attribute value. On the other hand, in HTML, where all attributes +have simple string values, methods to directly access an attribute +value can safely be used as a convenience. + +=head2 METHODS + +=over 4 + +=item getTagName + +The name of the element. For example, in: + + + ... + + +tagName has the value "elementExample". Note that this is +case-preserving in XML, as are all of the operations of the +DOM. + +=item getAttribute (name) + +Retrieves an attribute value by name. + +Return Value: The Attr value as a string, or the empty string if that +attribute does not have a specified or default value. + +=item setAttribute (name, value) + +Adds a new attribute. If an attribute with that name is +already present in the element, its value is changed to be +that of the value parameter. This value is a simple string, +it is not parsed as it is being set. So any markup (such as +syntax to be recognized as an entity reference) is treated as +literal text, and needs to be appropriately escaped by the +implementation when it is written out. In order to assign an +attribute value that contains entity references, the user +must create an Attr node plus any Text and EntityReference +nodes, build the appropriate subtree, and use +setAttributeNode to assign it as the value of an attribute. + + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the specified name contains an invalid character. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=back + +=item removeAttribute (name) + +Removes an attribute by name. If the removed attribute has a +default value it is immediately replaced. + +DOMExceptions: + +=over 4 + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=back + +=item getAttributeNode + +Retrieves an Attr node by name. + +Return Value: The Attr node with the specified attribute name or undef +if there is no such attribute. + +=item setAttributeNode (attr) + +Adds a new attribute. If an attribute with that name is +already present in the element, it is replaced by the new one. + +Return Value: If the newAttr attribute replaces an existing attribute +with the same name, the previously existing Attr node is +returned, otherwise undef is returned. + +DOMExceptions: + +=over 4 + +=item * WRONG_DOCUMENT_ERR + +Raised if newAttr was created from a different document than the one that created +the element. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=item * INUSE_ATTRIBUTE_ERR + +Raised if newAttr is already an attribute of another Element object. The DOM +user must explicitly clone Attr nodes to re-use them in other elements. + +=back + +=item removeAttributeNode (oldAttr) + +Removes the specified attribute. If the removed Attr has a default value it is +immediately replaced. If the Attr already is the default value, nothing happens +and nothing is returned. + +Parameters: + I The Attr node to remove from the attribute list. + +Return Value: The Attr node that was removed. + +DOMExceptions: + +=over 4 + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=item * NOT_FOUND_ERR + +Raised if oldAttr is not an attribute of the element. + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item setTagName (newTagName) + +Sets the tag name of the Element. Note that this method is not portable +between DOM implementations. + +DOMExceptions: + +=over 4 + +=item * INVALID_CHARACTER_ERR + +Raised if the specified name contains an invalid character. + +=back + +=item check ($checker) + +Uses the specified L to validate the document. +NOTE: an XML::Checker must be supplied. The checker can be created in +different ways, e.g. when parsing a document with XML::DOM::ValParser, +or with XML::DOM::Document::createChecker(). +See L for more info. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/ElementDecl.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/ElementDecl.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,27 @@ +=head1 NAME + +XML::DOM::ElementDecl - An XML ELEMENT declaration in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::ElementDecl extends L but is not part of the +DOM Level 1 specification. + +This node represents an Element declaration, e.g. + + + +=head2 METHODS + +=over 4 + +=item getName + +Returns the Element tagName. + +=item getModel and setModel (model) + +Returns and sets the model as a string, e.g. +"(street+, city, state, zip, country?)" in the above example. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Entity.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Entity.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,56 @@ +=head1 NAME + +XML::DOM::Entity - An XML ENTITY in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Entity extends L. + +This node represents an Entity declaration, e.g. + + + + + +The first one is called a parameter entity and is referenced like this: %draft; +The 2nd is a (regular) entity and is referenced like this: &hatch-pic; + +=head2 METHODS + +=over 4 + +=item getNotationName + +Returns the name of the notation for the entity. + +I The DOM Spec says: For unparsed entities, the name of the +notation for the entity. For parsed entities, this is null. +(This implementation does not support unparsed entities.) + +=item getSysId + +Returns the system id, or undef. + +=item getPubId + +Returns the public id, or undef. + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item isParameterEntity + +Whether it is a parameter entity (%ent;) or not (&ent;) + +=item getValue + +Returns the entity value. + +=item getNdata + +Returns the NDATA declaration (for general unparsed entities), or undef. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/EntityReference.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/EntityReference.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,27 @@ +=head1 NAME + +XML::DOM::EntityReference - An XML ENTITY reference in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::EntityReference extends L. + +EntityReference objects may be inserted into the structure model when +an entity reference is in the source document, or when the user wishes +to insert an entity reference. Note that character references and +references to predefined entities are considered to be expanded by the +HTML or XML processor so that characters are represented by their +Unicode equivalent rather than by an entity reference. Moreover, the +XML processor may completely expand references to entities while +building the structure model, instead of providing EntityReference +objects. If it does provide such objects, then for a given +EntityReference node, it may be that there is no Entity node +representing the referenced entity; but if such an Entity exists, then +the child list of the EntityReference node is the same as that of the +Entity node. As with the Entity node, all descendants of the +EntityReference are readonly. + +The resolution of the children of the EntityReference (the replacement +value of the referenced Entity) may be lazily evaluated; actions by the +user (such as calling the childNodes method on the EntityReference +node) are assumed to trigger the evaluation. diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/NamedNodeMap.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/NamedNodeMap.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,271 @@ +###################################################################### +package XML::DOM::NamedNodeMap; +###################################################################### + +use strict; + +use Carp; +use XML::DOM::DOMException; +use XML::DOM::NodeList; + +use vars qw( $Special ); + +# Constant definition: +# Note: a real Name should have at least 1 char, so nobody else should use this +$Special = ""; + +sub new +{ + my ($class, %args) = @_; + + $args{Values} = new XML::DOM::NodeList; + + # Store all NamedNodeMap properties in element $Special + bless { $Special => \%args}, $class; +} + +sub getNamedItem +{ + # Don't return the $Special item! + ($_[1] eq $Special) ? undef : $_[0]->{$_[1]}; +} + +sub setNamedItem +{ + my ($self, $node) = @_; + my $prop = $self->{$Special}; + + my $name = $node->getNodeName; + + if ($XML::DOM::SafeMode) + { + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR) + if $self->isReadOnly; + + croak new XML::DOM::DOMException (WRONG_DOCUMENT_ERR) + if $node->[XML::DOM::Node::_Doc] != $prop->{Doc}; + + croak new XML::DOM::DOMException (INUSE_ATTRIBUTE_ERR) + if defined ($node->[XML::DOM::Node::_UsedIn]); + + croak new XML::DOM::DOMException (INVALID_CHARACTER_ERR, + "can't add name with NodeName [$name] to NamedNodeMap") + if $name eq $Special; + } + + my $values = $prop->{Values}; + my $index = -1; + + my $prev = $self->{$name}; + if (defined $prev) + { + # decouple previous node + $prev->decoupleUsedIn; + + # find index of $prev + $index = 0; + for my $val (@{$values}) + { + last if ($val == $prev); + $index++; + } + } + + $self->{$name} = $node; + $node->[XML::DOM::Node::_UsedIn] = $self; + + if ($index == -1) + { + push (@{$values}, $node); + } + else # replace previous node with new node + { + splice (@{$values}, $index, 1, $node); + } + + $prev; +} + +sub removeNamedItem +{ + my ($self, $name) = @_; + + # Be careful that user doesn't delete $Special node! + croak new XML::DOM::DOMException (NOT_FOUND_ERR) + if $name eq $Special; + + my $node = $self->{$name}; + + croak new XML::DOM::DOMException (NOT_FOUND_ERR) + unless defined $node; + + # The DOM Spec doesn't mention this Exception - I think it's an oversight + croak new XML::DOM::DOMException (NO_MODIFICATION_ALLOWED_ERR) + if $self->isReadOnly; + + $node->decoupleUsedIn; + delete $self->{$name}; + + # remove node from Values list + my $values = $self->getValues; + my $index = 0; + for my $val (@{$values}) + { + if ($val == $node) + { + splice (@{$values}, $index, 1, ()); + last; + } + $index++; + } + $node; +} + +# The following 2 are really bogus. DOM should use an iterator instead (Clark) + +sub item +{ + my ($self, $item) = @_; + $self->{$Special}->{Values}->[$item]; +} + +sub getLength +{ + my ($self) = @_; + my $vals = $self->{$Special}->{Values}; + int (@$vals); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub isReadOnly +{ + return 0 if $XML::DOM::IgnoreReadOnly; + + my $used = $_[0]->{$Special}->{UsedIn}; + defined $used ? $used->isReadOnly : 0; +} + +sub cloneNode +{ + my ($self, $deep) = @_; + my $prop = $self->{$Special}; + + my $map = new XML::DOM::NamedNodeMap (Doc => $prop->{Doc}); + # Not copying Parent property on purpose! + + local $XML::DOM::IgnoreReadOnly = 1; # temporarily... + + for my $val (@{$prop->{Values}}) + { + my $key = $val->getNodeName; + + my $newNode = $val->cloneNode ($deep); + $newNode->[XML::DOM::Node::_UsedIn] = $map; + $map->{$key} = $newNode; + push (@{$map->{$Special}->{Values}}, $newNode); + } + + $map; +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + my $special = $self->{$Special}; + + $special->{Doc} = $doc; + for my $kid (@{$special->{Values}}) + { + $kid->setOwnerDocument ($doc); + } +} + +sub getChildIndex +{ + my ($self, $attr) = @_; + my $i = 0; + for my $kid (@{$self->{$Special}->{Values}}) + { + return $i if $kid == $attr; + $i++; + } + -1; # not found +} + +sub getValues +{ + wantarray ? @{ $_[0]->{$Special}->{Values} } : $_[0]->{$Special}->{Values}; +} + +# Remove circular dependencies. The NamedNodeMap and its values should +# not be used afterwards. +sub dispose +{ + my $self = shift; + + for my $kid (@{$self->getValues}) + { + undef $kid->[XML::DOM::Node::_UsedIn]; # was delete + $kid->dispose; + } + + delete $self->{$Special}->{Doc}; + delete $self->{$Special}->{Parent}; + delete $self->{$Special}->{Values}; + + for my $key (keys %$self) + { + delete $self->{$key}; + } +} + +sub setParentNode +{ + $_[0]->{$Special}->{Parent} = $_[1]; +} + +sub getProperty +{ + $_[0]->{$Special}->{$_[1]}; +} + +#?? remove after debugging +sub toString +{ + my ($self) = @_; + my $str = "NamedNodeMap["; + while (my ($key, $val) = each %$self) + { + if ($key eq $Special) + { + $str .= "##Special ("; + while (my ($k, $v) = each %$val) + { + if ($k eq "Values") + { + $str .= $k . " => ["; + for my $a (@$v) + { +# $str .= $a->getNodeName . "=" . $a . ","; + $str .= $a->toString . ","; + } + $str .= "], "; + } + else + { + $str .= $k . " => " . $v . ", "; + } + } + $str .= "), "; + } + else + { + $str .= $key . " => " . $val . ", "; + } + } + $str . "]"; +} + +1; # package return code diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/NamedNodeMap.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/NamedNodeMap.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,130 @@ +=head1 NAME + +XML::DOM::NamedNodeMap - A hash table interface for XML::DOM + +=head1 DESCRIPTION + +Objects implementing the NamedNodeMap interface are used to represent +collections of nodes that can be accessed by name. Note that +NamedNodeMap does not inherit from NodeList; NamedNodeMaps are not +maintained in any particular order. Objects contained in an object +implementing NamedNodeMap may also be accessed by an ordinal index, but +this is simply to allow convenient enumeration of the contents of a +NamedNodeMap, and does not imply that the DOM specifies an order to +these Nodes. + +Note that in this implementation, the objects added to a NamedNodeMap +are kept in order. + +=head2 METHODS + +=over 4 + +=item getNamedItem (name) + +Retrieves a node specified by name. + +Return Value: A Node (of any type) with the specified name, or undef if +the specified name did not identify any node in the map. + +=item setNamedItem (arg) + +Adds a node using its nodeName attribute. + +As the nodeName attribute is used to derive the name which +the node must be stored under, multiple nodes of certain +types (those that have a "special" string value) cannot be +stored as the names would clash. This is seen as preferable +to allowing nodes to be aliased. + +Parameters: + I A node to store in a named node map. + +The node will later be accessible using the value of the nodeName +attribute of the node. If a node with that name is +already present in the map, it is replaced by the new one. + +Return Value: If the new Node replaces an existing node with the same +name the previously existing Node is returned, otherwise undef is returned. + +DOMExceptions: + +=over 4 + +=item * WRONG_DOCUMENT_ERR + +Raised if arg was created from a different document than the one that +created the NamedNodeMap. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this NamedNodeMap is readonly. + +=item * INUSE_ATTRIBUTE_ERR + +Raised if arg is an Attr that is already an attribute of another Element object. +The DOM user must explicitly clone Attr nodes to re-use them in other elements. + +=back + +=item removeNamedItem (name) + +Removes a node specified by name. If the removed node is an +Attr with a default value it is immediately replaced. + +Return Value: The node removed from the map or undef if no node with +such a name exists. + +DOMException: + +=over 4 + +=item * NOT_FOUND_ERR + +Raised if there is no node named name in the map. + +=back + +=item item (index) + +Returns the indexth item in the map. If index is greater than +or equal to the number of nodes in the map, this returns undef. + +Return Value: The node at the indexth position in the NamedNodeMap, or +undef if that is not a valid index. + +=item getLength + +Returns the number of nodes in the map. The range of valid child node +indices is 0 to length-1 inclusive. + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item getValues + +Returns a NodeList with the nodes contained in the NamedNodeMap. +The NodeList is "live", in that it reflects changes made to the NamedNodeMap. + +When this method is called in a list context, it returns a regular perl list +containing the values. Note that this list is not "live". E.g. + + @list = $map->getValues; # returns a perl list + $nodelist = $map->getValues; # returns a NodeList (object ref.) + for my $val ($map->getValues) # iterate over the values + +=item getChildIndex (node) + +Returns the index of the node in the NodeList as returned by getValues, or -1 +if the node is not in the NamedNodeMap. + +=item dispose + +Removes all circular references in this NamedNodeMap and its descendants so the +objects can be claimed for garbage collection. The objects should not be used +afterwards. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Node.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Node.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,451 @@ +=head1 NAME + +XML::DOM::Node - Super class of all nodes in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Node is the super class of all nodes in an XML::DOM document. +This means that all nodes that subclass XML::DOM::Node also inherit all +the methods that XML::DOM::Node implements. + +=head2 GLOBAL VARIABLES + +=over 4 + +=item @NodeNames + +The variable @XML::DOM::Node::NodeNames maps the node type constants to strings. +It is used by XML::DOM::Node::getNodeTypeName. + +=back + +=head2 METHODS + +=over 4 + +=item getNodeType + +Return an integer indicating the node type. See XML::DOM constants. + +=item getNodeName + +Return a property or a hardcoded string, depending on the node type. +Here are the corresponding functions or values: + + Attr getName + AttDef getName + AttlistDecl getName + CDATASection "#cdata-section" + Comment "#comment" + Document "#document" + DocumentType getNodeName + DocumentFragment "#document-fragment" + Element getTagName + ElementDecl getName + EntityReference getEntityName + Entity getNotationName + Notation getName + ProcessingInstruction getTarget + Text "#text" + XMLDecl "#xml-declaration" + +B: AttDef, AttlistDecl, ElementDecl and XMLDecl were added for +completeness. + +=item getNodeValue and setNodeValue (value) + +Returns a string or undef, depending on the node type. This method is provided +for completeness. In other languages it saves the programmer an upcast. +The value is either available thru some other method defined in the subclass, or +else undef is returned. Here are the corresponding methods: +Attr::getValue, Text::getData, CDATASection::getData, Comment::getData, +ProcessingInstruction::getData. + +=item getParentNode and setParentNode (parentNode) + +The parent of this node. All nodes, except Document, +DocumentFragment, and Attr may have a parent. However, if a +node has just been created and not yet added to the tree, or +if it has been removed from the tree, this is undef. + +=item getChildNodes + +A NodeList that contains all children of this node. If there +are no children, this is a NodeList containing no nodes. The +content of the returned NodeList is "live" in the sense that, +for instance, changes to the children of the node object that +it was created from are immediately reflected in the nodes +returned by the NodeList accessors; it is not a static +snapshot of the content of the node. This is true for every +NodeList, including the ones returned by the +getElementsByTagName method. + +NOTE: this implementation does not return a "live" NodeList for +getElementsByTagName. See L. + +When this method is called in a list context, it returns a regular perl list +containing the child nodes. Note that this list is not "live". E.g. + + @list = $node->getChildNodes; # returns a perl list + $nodelist = $node->getChildNodes; # returns a NodeList (object reference) + for my $kid ($node->getChildNodes) # iterate over the children of $node + +=item getFirstChild + +The first child of this node. If there is no such node, this returns undef. + +=item getLastChild + +The last child of this node. If there is no such node, this returns undef. + +=item getPreviousSibling + +The node immediately preceding this node. If there is no such +node, this returns undef. + +=item getNextSibling + +The node immediately following this node. If there is no such node, this returns +undef. + +=item getAttributes + +A NamedNodeMap containing the attributes (Attr nodes) of this node +(if it is an Element) or undef otherwise. +Note that adding/removing attributes from the returned object, also adds/removes +attributes from the Element node that the NamedNodeMap came from. + +=item getOwnerDocument + +The Document object associated with this node. This is also +the Document object used to create new nodes. When this node +is a Document this is undef. + +=item insertBefore (newChild, refChild) + +Inserts the node newChild before the existing child node +refChild. If refChild is undef, insert newChild at the end of +the list of children. + +If newChild is a DocumentFragment object, all of its children +are inserted, in the same order, before refChild. If the +newChild is already in the tree, it is first removed. + +Return Value: The node being inserted. + +DOMExceptions: + +=over 4 + +=item * HIERARCHY_REQUEST_ERR + +Raised if this node is of a type that does not allow children of the type of +the newChild node, or if the node to insert is one of this node's ancestors. + +=item * WRONG_DOCUMENT_ERR + +Raised if newChild was created from a different document than the one that +created this node. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=item * NOT_FOUND_ERR + +Raised if refChild is not a child of this node. + +=back + +=item replaceChild (newChild, oldChild) + +Replaces the child node oldChild with newChild in the list of +children, and returns the oldChild node. If the newChild is +already in the tree, it is first removed. + +Return Value: The node replaced. + +DOMExceptions: + +=over 4 + +=item * HIERARCHY_REQUEST_ERR + +Raised if this node is of a type that does not allow children of the type of +the newChild node, or it the node to put in is one of this node's ancestors. + +=item * WRONG_DOCUMENT_ERR + +Raised if newChild was created from a different document than the one that +created this node. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=item * NOT_FOUND_ERR + +Raised if oldChild is not a child of this node. + +=back + +=item removeChild (oldChild) + +Removes the child node indicated by oldChild from the list of +children, and returns it. + +Return Value: The node removed. + +DOMExceptions: + +=over 4 + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=item * NOT_FOUND_ERR + +Raised if oldChild is not a child of this node. + +=back + +=item appendChild (newChild) + +Adds the node newChild to the end of the list of children of +this node. If the newChild is already in the tree, it is +first removed. If it is a DocumentFragment object, the entire contents of +the document fragment are moved into the child list of this node + +Return Value: The node added. + +DOMExceptions: + +=over 4 + +=item * HIERARCHY_REQUEST_ERR + +Raised if this node is of a type that does not allow children of the type of +the newChild node, or if the node to append is one of this node's ancestors. + +=item * WRONG_DOCUMENT_ERR + +Raised if newChild was created from a different document than the one that +created this node. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=back + +=item hasChildNodes + +This is a convenience method to allow easy determination of +whether a node has any children. + +Return Value: 1 if the node has any children, 0 otherwise. + +=item cloneNode (deep) + +Returns a duplicate of this node, i.e., serves as a generic +copy constructor for nodes. The duplicate node has no parent +(parentNode returns undef.). + +Cloning an Element copies all attributes and their values, +including those generated by the XML processor to represent +defaulted attributes, but this method does not copy any text +it contains unless it is a deep clone, since the text is +contained in a child Text node. Cloning any other type of +node simply returns a copy of this node. + +Parameters: + I If true, recursively clone the subtree under the specified node. +If false, clone only the node itself (and its attributes, if it is an Element). + +Return Value: The duplicate node. + +=item normalize + +Puts all Text nodes in the full depth of the sub-tree +underneath this Element into a "normal" form where only +markup (e.g., tags, comments, processing instructions, CDATA +sections, and entity references) separates Text nodes, i.e., +there are no adjacent Text nodes. This can be used to ensure +that the DOM view of a document is the same as if it were +saved and re-loaded, and is useful when operations (such as +XPointer lookups) that depend on a particular document tree +structure are to be used. + +B: In the DOM Spec this method is defined in the Element and +Document class interfaces only, but it doesn't hurt to have it here... + +=item getElementsByTagName (name [, recurse]) + +Returns a NodeList of all descendant elements with a given +tag name, in the order in which they would be encountered in +a preorder traversal of the Element tree. + +Parameters: + I The name of the tag to match on. The special value "*" matches all tags. + I Whether it should return only direct child nodes (0) or any descendant that matches the tag name (1). This argument is optional and defaults to 1. It is not part of the DOM spec. + +Return Value: A list of matching Element nodes. + +NOTE: this implementation does not return a "live" NodeList for +getElementsByTagName. See L. + +When this method is called in a list context, it returns a regular perl list +containing the result nodes. E.g. + + @list = $node->getElementsByTagName("tag"); # returns a perl list + $nodelist = $node->getElementsByTagName("tag"); # returns a NodeList (object ref.) + for my $elem ($node->getElementsByTagName("tag")) # iterate over the result nodes + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item getNodeTypeName + +Return the string describing the node type. +E.g. returns "ELEMENT_NODE" if getNodeType returns ELEMENT_NODE. +It uses @XML::DOM::Node::NodeNames. + +=item toString + +Returns the entire subtree as a string. + +=item printToFile (filename) + +Prints the entire subtree to the file with the specified filename. + +Croaks: if the file could not be opened for writing. + +=item printToFileHandle (handle) + +Prints the entire subtree to the file handle. +E.g. to print to STDOUT: + + $node->printToFileHandle (\*STDOUT); + +=item print (obj) + +Prints the entire subtree using the object's print method. E.g to print to a +FileHandle object: + + $f = new FileHandle ("file.out", "w"); + $node->print ($f); + +=item getChildIndex (child) + +Returns the index of the child node in the list returned by getChildNodes. + +Return Value: the index or -1 if the node is not found. + +=item getChildAtIndex (index) + +Returns the child node at the specifed index or undef. + +=item addText (text) + +Appends the specified string to the last child if it is a Text node, or else +appends a new Text node (with the specified text.) + +Return Value: the last child if it was a Text node or else the new Text node. + +=item dispose + +Removes all circular references in this node and its descendants so the +objects can be claimed for garbage collection. The objects should not be used +afterwards. + +=item setOwnerDocument (doc) + +Sets the ownerDocument property of this node and all its children (and +attributes etc.) to the specified document. +This allows the user to cut and paste document subtrees between different +XML::DOM::Documents. The node should be removed from the original document +first, before calling setOwnerDocument. + +This method does nothing when called on a Document node. + +=item isAncestor (parent) + +Returns 1 if parent is an ancestor of this node or if it is this node itself. + +=item expandEntityRefs (str) + +Expands all the entity references in the string and returns the result. +The entity references can be character references (e.g. "{" or "ῂ"), +default entity references (""", ">", "<", "'" and "&") or +entity references defined in Entity objects as part of the DocumentType of +the owning Document. Character references are expanded into UTF-8. +Parameter entity references (e.g. %ent;) are not expanded. + +=item to_sax ( %HANDLERS ) + +E.g. + + $node->to_sax (DocumentHandler => $my_handler, + Handler => $handler2 ); + +%HANDLERS may contain the following handlers: + +=over 4 + +=item * DocumentHandler + +=item * DTDHandler + +=item * EntityResolver + +=item * Handler + +Default handler when one of the above is not specified + +=back + +Each XML::DOM::Node generates the appropriate SAX callbacks (for the +appropriate SAX handler.) Different SAX handlers can be plugged in to +accomplish different things, e.g. L would check the node +(currently only Document and Element nodes are supported), L +would create a new DOM subtree (thereby, in essence, copying the Node) +and in the near future, XML::Writer could print the node. +All Perl SAX related work is still in flux, so this interface may change a +little. + +See PerlSAX for the description of the SAX interface. + +=item check ( [$checker] ) + +See descriptions for check() in L and L. + +=item xql ( @XQL_OPTIONS ) + +To use the xql method, you must first I L and L. +This method is basically a shortcut for: + + $query = new XML::XQL::Query ( @XQL_OPTIONS ); + return $query->solve ($node); + +If the first parameter in @XQL_OPTIONS is the XQL expression, you can leave off +the 'Expr' keyword, so: + + $node->xql ("doc//elem1[@attr]", @other_options); + +is identical to: + + $node->xql (Expr => "doc//elem1[@attr]", @other_options); + +See L for other available XQL_OPTIONS. +See L and L for more info. + +=item isHidden () + +Whether the node is hidden. +See L for details. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/NodeList.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/NodeList.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,46 @@ +###################################################################### +package XML::DOM::NodeList; +###################################################################### + +use vars qw ( $EMPTY ); + +# Empty NodeList +$EMPTY = new XML::DOM::NodeList; + +sub new +{ + bless [], $_[0]; +} + +sub item +{ + $_[0]->[$_[1]]; +} + +sub getLength +{ + int (@{$_[0]}); +} + +#------------------------------------------------------------ +# Extra method implementations + +sub dispose +{ + my $self = shift; + for my $kid (@{$self}) + { + $kid->dispose; + } +} + +sub setOwnerDocument +{ + my ($self, $doc) = @_; + for my $kid (@{$self}) + { + $kid->setOwnerDocument ($doc); + } +} + +1; # package return code diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/NodeList.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/NodeList.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,46 @@ +=head1 NAME + +XML::DOM::NodeList - A node list as used by XML::DOM + +=head1 DESCRIPTION + +The NodeList interface provides the abstraction of an ordered +collection of nodes, without defining or constraining how this +collection is implemented. + +The items in the NodeList are accessible via an integral index, +starting from 0. + +Although the DOM spec states that all NodeLists are "live" in that they +allways reflect changes to the DOM tree, the NodeList returned by +getElementsByTagName is not live in this implementation. See L +for details. + +=head2 METHODS + +=over 4 + +=item item (index) + +Returns the indexth item in the collection. If index is +greater than or equal to the number of nodes in the list, +this returns undef. + +=item getLength + +The number of nodes in the list. The range of valid child +node indices is 0 to length-1 inclusive. + +=back + +=head2 Additional methods not in the DOM Spec + +=over 4 + +=item dispose + +Removes all circular references in this NodeList and its descendants so the +objects can be claimed for garbage collection. The objects should not be used +afterwards. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Notation.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Notation.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,47 @@ +=head1 NAME + +XML::DOM::Notation - An XML NOTATION in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Notation extends L. + +This node represents a Notation, e.g. + + + + + + + + + +=head2 METHODS + +=over 4 + +=item getName and setName (name) + +Returns (or sets) the Notation name, which is the first token after the +NOTATION keyword. + +=item getSysId and setSysId (sysId) + +Returns (or sets) the system ID, which is the token after the optional +SYSTEM keyword. + +=item getPubId and setPubId (pubId) + +Returns (or sets) the public ID, which is the token after the optional +PUBLIC keyword. + +=item getBase + +This is passed by XML::Parser in the Notation handler. +I don't know what it is yet. + +=item getNodeName + +Returns the same as getName. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Parser.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Parser.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,66 @@ +=head1 NAME + +XML::DOM::Parser - An XML::Parser that builds XML::DOM document structures + +=head1 SYNOPSIS + + use XML::DOM; + + my $parser = new XML::DOM::Parser; + my $doc = $parser->parsefile ("file.xml"); + +=head1 DESCRIPTION + +XML::DOM::Parser extends L + +The XML::Parser module was written by Clark Cooper and +is built on top of XML::Parser::Expat, +which is a lower level interface to James Clark's expat library. + +XML::DOM::Parser parses XML strings or files +and builds a data structure that conforms to the API of the Document Object +Model as described at L. +See the L manpage for other additional properties of the +XML::DOM::Parser class. +Note that the 'Style' property should not be used (it is set internally.) + +The XML::Parser B option is more or less supported, in that it will +generate EntityReference objects whenever an entity reference is encountered +in character data. I'm not sure how useful this is. Any comments are welcome. + +As described in the synopsis, when you create an XML::DOM::Parser object, +the parse and parsefile methods create an L object +from the specified input. This Document object can then be examined, modified and +written back out to a file or converted to a string. + +When using XML::DOM with XML::Parser version 2.19 and up, setting the +XML::DOM::Parser option B to 1 will store CDATASections in +CDATASection nodes, instead of converting them to Text nodes. +Subsequent CDATASection nodes will be merged into one. Let me know if this +is a problem. + +=head1 Using LWP to parse URLs + +The parsefile() method now also supports URLs, e.g. I. +It uses LWP to download the file and then calls parse() on the resulting string. +By default it will use a L that is created as follows: + + use LWP::UserAgent; + $LWP_USER_AGENT = LWP::UserAgent->new; + $LWP_USER_AGENT->env_proxy; + +Note that env_proxy reads proxy settings from environment variables, which is what I need to +do to get thru our firewall. If you want to use a different LWP::UserAgent, you can either set +it globally with: + + XML::DOM::Parser::set_LWP_UserAgent ($my_agent); + +or, you can specify it for a specific XML::DOM::Parser by passing it to the constructor: + + my $parser = new XML::DOM::Parser (LWP_UserAgent => $my_agent); + +Currently, LWP is used when the filename (passed to parsefile) starts with one of +the following URL schemes: http, https, ftp, wais, gopher, or file (followed by a colon.) +If I missed one, please let me know. + +The LWP modules are part of libwww-perl which is available at CPAN. diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/PerlSAX.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/PerlSAX.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,46 @@ +package XML::DOM::PerlSAX; +use strict; + +BEGIN +{ + if ($^W) + { + warn "XML::DOM::PerlSAX has been renamed to XML::Handler::DOM, " + "please modify your code accordingly."; + } +} + +use XML::Handler::DOM; +use vars qw{ @ISA }; +@ISA = qw{ XML::Handler::DOM }; + +1; # package return code + +__END__ + +=head1 NAME + +XML::DOM::PerlSAX - Old name of L + +=head1 SYNOPSIS + + See L + +=head1 DESCRIPTION + +XML::DOM::PerlSAX was renamed to L to comply +with naming conventions for PerlSAX filters/handlers. + +For backward compatibility, this package will remain in existence +(it simply includes XML::Handler::BuildDOM), but it will print a warning when +running with I<'perl -w'>. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=head1 SEE ALSO + +L, L + diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/ProcessingInstruction.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/ProcessingInstruction.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,32 @@ +=head1 NAME + +XML::DOM::ProcessingInstruction - An XML processing instruction in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::ProcessingInstruction extends L. + +It represents a "processing instruction", used in XML as a way to keep +processor-specific information in the text of the document. An example: + + + +Here, "PI" is the target and "processing instruction" is the data. + +=head2 METHODS + +=over 4 + +=item getTarget + +The target of this processing instruction. XML defines this +as being the first token following the markup that begins the +processing instruction. + +=item getData and setData (data) + +The content of this processing instruction. This is from the +first non white space character after the target to the +character immediately preceding the ?>. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/Text.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/Text.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,60 @@ +=head1 NAME + +XML::DOM::Text - A piece of XML text in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::Text extends L, which extends +L. + +The Text interface represents the textual content (termed character +data in XML) of an Element or Attr. If there is no markup inside an +element's content, the text is contained in a single object +implementing the Text interface that is the only child of the element. +If there is markup, it is parsed into a list of elements and Text nodes +that form the list of children of the element. + +When a document is first made available via the DOM, there is only one +Text node for each block of text. Users may create adjacent Text nodes +that represent the contents of a given element without any intervening +markup, but should be aware that there is no way to represent the +separations between these nodes in XML or HTML, so they will not (in +general) persist between DOM editing sessions. The normalize() method +on Element merges any such adjacent Text objects into a single node for +each block of text; this is recommended before employing operations +that depend on a particular document structure, such as navigation with +XPointers. + +=head2 METHODS + +=over 4 + +=item splitText (offset) + +Breaks this Text node into two Text nodes at the specified +offset, keeping both in the tree as siblings. This node then +only contains all the content up to the offset point. And a +new Text node, which is inserted as the next sibling of this +node, contains all the content at and after the offset point. + +Parameters: + I The offset at which to split, starting from 0. + +Return Value: The new Text node. + +DOMExceptions: + +=over 4 + +=item * INDEX_SIZE_ERR + +Raised if the specified offset is negative or greater than the number of +characters in data. + +=item * NO_MODIFICATION_ALLOWED_ERR + +Raised if this node is readonly. + +=back + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/ValParser.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/ValParser.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,123 @@ +# +# Use XML::DOM::ValParser instead of XML::DOM::Parser and it will +# use XML::Checker to validate XML at parse time. +# + +package XML::DOM::ValParser; + +use strict; +use XML::DOM; +use XML::Checker::Parser; + +use vars qw( @ISA @SupportedHandlers ); + +@ISA = qw( XML::Checker::Parser ); + +# These XML::Parser handlers are currently supported by XML::DOM +@SupportedHandlers = qw( Init Final Char Start End Default Doctype + CdataStart CdataEnd XMLDecl Entity Notation Proc + Default Comment Attlist Element Unparsed ); + +sub new +{ + my ($class, %args) = @_; + + my %handlers = (); + for (@SupportedHandlers) + { + my $domHandler = "XML::Parser::Dom::$_"; + $handlers{$_} = \&$domHandler; + } + $args{Handlers} = \%handlers; + $class->SUPER::new (%args); +} + +sub parse +{ + # Do what XML::DOM::Parser normally does. + # Temporarily override his @ISA, so that he thinks he's a + # XML::DOM::ValParser and calls the right SUPER::parse(), + # (otherwise he thinks he's an XML::DOM::Parser and you see runtime + # error: Can't call method "Init" on unblessed reference ...) + local @XML::DOM::Parser::ISA = @ISA; + local $XML::Checker::Parser::_skipInsignifWS = $_[0]->{SkipInsignifWS}; + XML::DOM::Parser::parse (@_); +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::DOM::ValParser - an XML::DOM::Parser that validates at parse time + +=head1 SYNOPSIS + + use XML::DOM::ValParser; + + my %expat_options = (KeepCDATA => 1, + Handlers => [ Unparsed => \&my_Unparsed_handler ]); + my $parser = new XML::DOM::ValParser (%expat_options); + + eval { + local $XML::Checker::FAIL = \&my_fail; + my $doc = $parser->parsefile ("fail.xml"); + ... XML::DOM::Document was created sucessfully ... + }; + if ($@) { + # Either XML::Parser (expat) threw an exception or my_fail() died. + ... your error handling code here ... + # Note that the XML::DOM::Document is automatically disposed off and + # will be garbage collected + } + + # Throws an exception (with die) when an error is encountered, this + # will stop the parsing process. + # Don't die if a warning or info message is encountered, just print a message. + sub my_fail { + my $code = shift; + die XML::Checker::error_string ($code, @_) if $code < 200; + XML::Checker::print_error ($code, @_); + } + +=head1 DESCRIPTION + +Use XML::DOM::ValParser wherever you would use L and +your XML will be checked using L at parse time. + +See L for details on XML::DOM::Parser options. +See L for details on setting the fail handler (my_fail.) + +The following handlers are currently supported, just like XML::DOM::Parser: +Init, Final, Char, Start, End, Default, Doctype, CdataStart, CdataEnd, +XMLDecl, Entity, Notation, Proc, Default, Comment, Attlist, Element, Unparsed. + +=head1 XML::DOM::ValParser + +XML::DOM::ValParser extends from L. It creates an +L object and routes all event handlers through the checker, +before processing the events to create the XML::DOM::Document. + +Just like L, the checker object can be retrieved with +the getChecker() method and can be reused later on (provided that the DOCTYPE +section of the XML::DOM::Document did not change in the mean time.) + +You can control which errors are fatal (and therefore should stop creation +of the XML::DOM::Document) by filtering the appropriate error codes in +the global $XML::Checker::FAIL handler +(see L) and +calling I or I appropriately. + +Just like XML::Checker::Parser, XML::DOM::ValParser supports the +SkipExternalDTD and SkipInsignifWS options. See L +for details. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=head1 SEE ALSO + +L, L (L) diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/DOM/XMLDecl.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/DOM/XMLDecl.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,33 @@ +=head1 NAME + +XML::DOM::XMLDecl - XML declaration in XML::DOM + +=head1 DESCRIPTION + +XML::DOM::XMLDecl extends L, but is not part of the DOM Level 1 +specification. + +It contains the XML declaration, e.g. + + + +See also XML::DOM::Document::getXMLDecl. + +=head2 METHODS + +=over 4 + +=item getVersion and setVersion (version) + +Returns and sets the XML version. At the time of this writing the version should +always be "1.0" + +=item getEncoding and setEncoding (encoding) + +undef may be specified for the encoding value. + +=item getStandalone and setStandalone (standalone) + +undef may be specified for the standalone value. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Filter/DetectWS.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Filter/DetectWS.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,622 @@ +package XML::Filter::DetectWS; +use strict; +use XML::Filter::SAXT; + +#---------------------------------------------------------------------- +# CONSTANT DEFINITIONS +#---------------------------------------------------------------------- + +# Locations of whitespace +sub WS_START (%) { 1 } # just after +sub WS_END (%) { 2 } # just before +sub WS_INTER (%) { 0 } # not at the start or end (i.e. intermediate) +sub WS_ONLY (%) { 3 } # both START and END, i.e. between and + +# The states of the WhiteSpace detection code +# for regular elements, i.e. elements that: +# 1) don't have xml:space="preserve" +# 2) have an ELEMENT model that allows text children (i.e. ANY or Mixed content) + +sub START (%) { 0 } # just saw +sub ONLY_WS (%) { 1 } # saw followed by whitespace (only) +sub ENDS_IN_WS (%) { 2 } # ends in whitespace (sofar) +sub ENDS_IN_NON_WS (%) { 3 } # ends in non-ws text or non-text node (sofar) + +# NO_TEXT States: when model does not allow text +# (we assume that all text children are whitespace) +sub NO_TEXT_START (%) { 4 } # just saw +sub NO_TEXT_ONLY_WS (%) { 5 } # saw followed by whitespace (only) +sub NO_TEXT_ENDS_IN_WS (%) { 6 } # ends in whitespace (sofar) +sub NO_TEXT_ENDS_IN_NON_WS (%) { 7 } # ends in non-text node (sofar) + +# State for elements with xml:space="preserve" (all text is non-WS) +sub PRESERVE_WS (%) { 8 } + +#---------------------------------------------------------------------- +# METHOD DEFINITIONS +#---------------------------------------------------------------------- + +# Constructor options: +# +# SkipIgnorableWS 1 means: don't forward ignorable_whitespace events +# Handler SAX Handler that will receive the resulting events +# + +sub new +{ + my ($class, %options) = @_; + + my $self = bless \%options, $class; + + $self->init_handlers; + + $self; +} + +# Does nothing +sub noop {} + +sub init_handlers +{ + my ($self) = @_; + my %handlers; + + my $handler = $self->{Handler}; + + for my $cb (map { @{$_} } values %XML::Filter::SAXT::SAX_HANDLERS) + { + if (UNIVERSAL::can ($handler, $cb)) + { + $handlers{$cb} = eval "sub { \$handler->$cb (\@_) }"; + } + else + { + $handlers{$cb} = \&noop; + } + } + + if ($self->{SkipIgnorableWS}) + { + delete $handlers{ignorable_whitespace}; # if it exists + } + elsif (UNIVERSAL::can ($handler, 'ignorable_whitespace')) + { + # Support ignorable_whitespace callback if it exists + # (if not, just use characters callback) + $handlers{ignorable_whitespace} = + sub { $handler->ignorable_whitespace (@_) }; + } + else + { + $handlers{ignorable_whitespace} = $handlers{characters}; + } + + $handlers{ws} = $handlers{characters}; +#?? were should whitespace go? + + # NOTE: 'cdata' is not a valid PerlSAX callback + if (UNIVERSAL::can ($handler, 'start_cdata') && + UNIVERSAL::can ($handler, 'end_cdata')) + { + $handlers{cdata} = sub { + $handler->start_cdata; + $handler->characters (@_); + $handler->end_cdata; + } + } + else # pass CDATA as regular characters + { + $handlers{cdata} = $handlers{characters}; + } + + $self->{Callback} = \%handlers; +} + +sub start_cdata +{ + my ($self, $event) = @_; + + $self->{InCDATA} = 1; +} + +sub end_cdata +{ + my ($self, $event) = @_; + + $self->{InCDATA} = 0; +} + +sub entity_reference +{ + my ($self, $event) = @_; + + $self->push_event ('entity_reference', $event); + + my $parent = $self->{ParentStack}->[-1]; + $parent->{State} |= ENDS_IN_NON_WS unless $parent->{State} == PRESERVE_WS; +} + +sub comment +{ + my ($self, $event) = @_; + + $self->push_event ('comment', $event); + + my $parent = $self->{ParentStack}->[-1]; + $parent->{State} |= ENDS_IN_NON_WS unless $parent->{State} == PRESERVE_WS; +} + +sub processing_instruction +{ + my ($self, $event) = @_; + + $self->push_event ('processing_instruction', $event); + + my $parent = $self->{ParentStack}->[-1]; + $parent->{State} |= ENDS_IN_NON_WS unless $parent->{State} == PRESERVE_WS; +} + +sub start_document +{ + my ($self, $event) = @_; + + # Initialize initial state + $self->{ParentStack} = []; + $self->{EventQ} = []; + $self->{InCDATA} = 0; + + $self->init_handlers; + + $event = {} unless defined $event; + # Don't preserve WS by default (unless specified by the user) + $event->{PreserveWS} = defined ($self->{PreserveWS}) ? + $self->{PreserveWS} : 0; + + # We don't need whitespace detection at the document level + $event->{State} = PRESERVE_WS; + + $self->push_event ('start_document', $event); + push @{ $self->{ParentStack} }, $event; +} + +sub end_document +{ + my ($self, $event) = @_; + $event = {} unless defined $event; + + $self->push_event ('end_document', $event); + + $self->flush; +} + +sub start_element +{ + my ($self, $event) = @_; + + my $pres = $event->{Attributes}->{'xml:space'}; + if (defined $pres) + { + $event->{PreserveWS} = $pres eq "preserve"; + } + else + { + $event->{PreserveWS} = $self->{ParentStack}->[-1]->{PreserveWS}; + } + + if ($self->{NoText}->{ $event->{Name} }) + { + $event->{NoText} = 1; + } + + $event->{State} = $self->get_init_state ($event); + + $self->push_event ('start_element', $event); + push @{ $self->{ParentStack} }, $event; +} + +sub end_element +{ + my ($self, $event) = @_; + + # Mark previous whitespace event as the last event (WS_END) + # (if it's there) + my $prev = $self->{EventQ}->[-1]; + $prev->{Loc} |= WS_END if exists $prev->{Loc}; + + $self->push_event ('end_element', $event); + + my $elem = pop @{ $self->{ParentStack} }; + delete $elem->{State}; +} + +sub characters +{ + my ($self, $event) = @_; + + if ($self->{InCDATA}) + { + # NOTE: 'cdata' is not a valid PerlSAX callback + $self->push_event ('cdata', $event); + + my $parent = $self->{ParentStack}->[-1]; + $parent->{State} |= ENDS_IN_NON_WS unless $parent->{State} == PRESERVE_WS; + return; + } + + my $text = $event->{Data}; + return unless length ($text); + + my $state = $self->{ParentStack}->[-1]->{State}; + if ($state == PRESERVE_WS) + { + $self->push_event ('characters', $event); + } + elsif ($state == NO_TEXT_START) + { + # ELEMENT model does not allow regular text. + # All characters are whitespace. + $self->push_event ('ignorable_whitespace', { Data => $text, Loc => WS_START }); + $state = NO_TEXT_ONLY_WS; + } + elsif ($state == NO_TEXT_ONLY_WS) + { + $self->merge_text ($text, 'ignorable_whitespace', WS_START ); + } + elsif ($state == NO_TEXT_ENDS_IN_NON_WS) + { + $self->push_event ('ignorable_whitespace', { Data => $text, Loc => WS_INTER }); + $state = NO_TEXT_ENDS_IN_WS; + } + elsif ($state == NO_TEXT_ENDS_IN_WS) + { + $self->merge_text ($text, 'ignorable_whitespace', WS_INTER ); + } + elsif ($state == START) + { +#?? add support for full Unicode + $text =~ /^(\s*)(\S(?:.*\S)?)?(\s*)$/; + if (length $1) + { + $self->push_event ('ws', { Data => $1, Loc => WS_START }); + $state = ONLY_WS; + } + if (length $2) + { + $self->push_event ('characters', { Data => $2 }); + $state = ENDS_IN_NON_WS; + } + if (length $3) + { + $self->push_event ('ws', { Data => $3, Loc => WS_INTER }); + $state = ENDS_IN_WS; + } + } + elsif ($state == ONLY_WS) + { + $text =~ /^(\s*)(\S(?:.*\S)?)?(\s*)$/; + if (length $1) + { + $self->merge_text ($1, 'ws', WS_START); + } + if (length $2) + { + $self->push_event ('characters', { Data => $2 }); + $state = ENDS_IN_NON_WS; + } + if (length $3) + { + $self->push_event ('ws', { Data => $3, Loc => WS_INTER }); + $state = ENDS_IN_WS; + } + } + else # state == ENDS_IN_WS or ENDS_IN_NON_WS + { + $text =~ /^(.*\S)?(\s*)$/; + if (length $1) + { + if ($state == ENDS_IN_NON_WS) + { + $self->merge_text ($1, 'characters'); + } + else + { + $self->push_event ('characters', { Data => $1 }); + $state = ENDS_IN_NON_WS; + } + } + if (length $2) + { + if ($state == ENDS_IN_WS) + { + $self->merge_text ($2, 'ws', WS_INTER); + } + else + { + $self->push_event ('ws', { Data => $2, Loc => WS_INTER }); + $state = ENDS_IN_WS; + } + } + } + + $self->{ParentStack}->[-1]->{State} = $state; +} + +sub element_decl +{ + my ($self, $event) = @_; + my $tag = $event->{Name}; + my $model = $event->{Model}; + + # Check the model to see if the elements may contain regular text + $self->{NoText}->{$tag} = ($model eq 'EMPTY' || $model !~ /\#PCDATA/); + + $self->push_event ('element_decl', $event); +} + +sub attlist_decl +{ + my ($self, $event) = @_; + + my $prev = $self->{EventQ}->[-1]; + if ($prev->{EventType} eq 'attlist_decl' && + $prev->{ElementName} eq $event->{ElementName}) + { + $prev->{MoreFollow} = 1; + $event->{First} = 0; + } + else + { + $event->{First} = 1; + } + + $self->push_event ('attlist_decl', $event); +} + +sub notation_decl +{ + my ($self, $event) = @_; + $self->push_event ('notation_decl', $event); +} + +sub unparsed_entity_decl +{ + my ($self, $event) = @_; + $self->push_event ('unparsed_entity_decl', $event); +} + +sub entity_decl +{ + my ($self, $event) = @_; + $self->push_event ('entity_decl', $event); +} + +sub doctype_decl +{ + my ($self, $event) = @_; + $self->push_event ('doctype_decl', $event); +} + +sub xml_decl +{ + my ($self, $event) = @_; + $self->push_event ('xml_decl', $event); +} + +#?? what about set_document_locator, resolve_entity + +# +# Determine the initial State for the current Element. +# By default, we look at the PreserveWS property (i.e. value of xml:space.) +# The user can override this to force xml:space="preserve" for a particular +# element with e.g. +# +# sub get_init_state +# { +# my ($self, $event) = @_; +# ($event->{Name} eq 'foo' || $event->{PreserveWS}) ? PRESERVE_WS : START; +# } +# +sub get_init_state +{ + my ($self, $event) = @_; + my $tag = $event->{Name}; + + if ($self->{NoText}->{$tag}) # ELEMENT model does not allow text + { + return NO_TEXT_START; + } + $event->{PreserveWS} ? PRESERVE_WS : START; +} + +sub push_event +{ + my ($self, $type, $event) = @_; + + $event->{EventType} = $type; + + $self->flush; + push @{ $self->{EventQ} }, $event; +} + +# Merge text with previous event (if it has the same EventType) +# or push a new text event +sub merge_text +{ + my ($self, $str, $eventType, $wsLocation) = @_; + my $q = $self->{EventQ}; + + my $prev = $q->[-1]; + if (defined $prev && $prev->{EventType} eq $eventType) + { + $prev->{Data} .= $str; + } + else + { + my $event = { Data => $str }; + $event->{Loc} = $wsLocation if defined $wsLocation; + $self->push_event ($eventType, $event); + } +} + +# Forward all events on the EventQ +sub flush +{ + my ($self) = @_; + + my $q = $self->{EventQ}; + while (@$q) + { + my $event = shift @$q; + my $type = $event->{EventType}; + delete $event->{EventType}; + + $self->{Callback}->{$type}->($event); + } +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Filter::DetectWS - A PerlSAX filter that detects ignorable whitespace + +=head1 SYNOPSIS + + use XML::Filter::DetectWS; + + my $detect = new XML::Filter::DetectWS (Handler => $handler, + SkipIgnorableWS => 1); + +=head1 DESCRIPTION + +This a PerlSAX filter that detects which character data contains +ignorable whitespace and optionally filters it. + +Note that this is just a first stab at the implementation and it may +change completely in the near future. Please provide feedback whether +you like it or not, so I know whether I should change it. + +The XML spec defines ignorable whitespace as the character data found in elements +that were defined in an declaration with a model of 'EMPTY' or +'Children' (Children is the rule that does not contain '#PCDATA'.) + +In addition, XML::Filter::DetectWS allows the user to define other whitespace to +be I. The ignorable whitespace is passed to the PerlSAX Handler with +the B handler, provided that the Handler implements this +method. (Otherwise it is passed to the characters handler.) +If the B is set, the ignorable whitespace is simply +discarded. + +XML::Filter::DetectWS also takes xml:space attributes into account. See below +for details. + +CDATA sections are passed in the standard PerlSAX way (i.e. with surrounding +start_cdata and end_cdata events), unless the Handler does not implement these +methods. In that case, the CDATA section is simply passed to the characters +method. + +=head1 Constructor Options + +=over 4 + +=item * SkipIgnorableWS (Default: 0) + +When set, detected ignorable whitespace is discarded. + +=item * Handler + +The PerlSAX handler (or filter) that will receive the PerlSAX events from this +filter. + +=back + +=head1 Current Implementation + +When determining which whitespace is ignorable, it first looks at the +xml:space attribute of the parent element node (and its ancestors.) +If the attribute value is "preserve", then it is *NOT* ignorable. +(If someone took the trouble of adding xml:space="preserve", then that is +the final answer...) + +If xml:space="default", then we look at the definition of the parent +element. If the model is 'EMPTY' or follows the 'Children' rule (i.e. does not +contain '#PCDATA') then we know that the whitespace is ignorable. +Otherwise we need input from the user somehow. + +The idea is that the API of DetectWS will be extended, so that you can +specify/override e.g. which elements should behave as if xml:space="preserve" +were set, and/or which elements should behave as if the model was +defined a certain way, etc. + +Please send feedback! + +The current implementation also detects whitespace after an element-start tag, +whitespace before an element-end tag. +It also detects whitespace before an element-start and after an element-end tag +and before or after comments, processing instruction, cdata sections etc., +but this needs to be reimplemented. +In either case, the detected whitespace is split off into its own PerlSAX +characters event and an extra property 'Loc' is added. It can have 4 possible +values: + +=over 4 + +=item * 1 (WS_START) - whitespace immediately after element-start tag + +=item * 2 (WS_END) - whitespace just before element-end tag + +=item * 3 (WS_ONLY) - both WS_START and WS_END, i.e. it's the only text found between the start and end tag and it's all whitespace + +=item * 0 (WS_INTER) - none of the above, probably before an element-start tag, +after an element-end tag, or before or after a comment, PI, cdata section etc. + +=back + +Note that WS_INTER may not be that useful, so this may change. + +=head1 xml:space attribute + +The XML spec states that: A special attribute +named xml:space may be attached to an element +to signal an intention that in that element, +white space should be preserved by applications. +In valid documents, this attribute, like any other, must be +declared if it is used. +When declared, it must be given as an +enumerated type whose only +possible values are "default" and "preserve". +For example: + + + +The value "default" signals that applications' +default white-space processing modes are acceptable for this element; the +value "preserve" indicates the intent that applications preserve +all the white space. +This declared intent is considered to apply to all elements within the content +of the element where it is specified, unless overriden with another instance +of the xml:space attribute. + +The root element of any document +is considered to have signaled no intentions as regards application space +handling, unless it provides a value for +this attribute or the attribute is declared with a default value. + +[... end of excerpt ...] + +=head1 CAVEATS + +This code is highly experimental! +It has not been tested well and the API may change. + +The code that detects of blocks of whitespace at potential indent positions +may need some work. See + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Filter/Reindent.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Filter/Reindent.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,301 @@ +package XML::Filter::Reindent; +use strict; +use XML::Filter::DetectWS; + +use vars qw{ @ISA }; +@ISA = qw{ XML::Filter::DetectWS }; + +sub MAYBE (%) { 2 } + +sub new +{ + my $class = shift; + my $self = $class->SUPER::new (@_); + + # Use one space per indent level (by default) + $self->{Tab} = " " unless defined $self->{Tab}; + + # Note that this is a PerlSAX filter so we use the XML newline ("\x0A"), + # not the Perl output newline ("\n"), by default. + $self->{Newline} = "\x0A" unless defined $self->{Newline}; + + $self; +} + +# Indent the element if its parent element says so +sub indent_element +{ + my ($self, $event, $parent_says_indent) = @_; + return $parent_says_indent; +} + +# Always indent children unless element (or its ancestor) has +# xml:space="preserve" attribute +sub indent_children +{ + my ($self, $event) = @_; + return $event->{PreserveWS} ? 0 : MAYBE; +} + +sub start_element +{ + my ($self, $event) = @_; + + my $parent = $self->{ParentStack}->[-1]; + my $level = $self->{Level}++; + $self->SUPER::start_element ($event); + + my $parent_says_indent = $parent->{IndentChildren} ? 1 : 0; + # init with 1 if parent says MAYBE + $event->{Indent} = $self->indent_element ($event, $parent_says_indent) ? + $level : undef; + + $event->{IndentChildren} = $self->indent_children ($event); +} + +sub end_element +{ + my ($self, $event) = @_; + my $start_element = $self->{ParentStack}->[-1]; + + if ($start_element->{IndentChildren} == MAYBE) + { + my $q = $self->{EventQ}; + my $prev = $q->[-1]; + + if ($prev == $start_element) + { + # End tag follows start tag: compress tag + $start_element->{Compress} = 1; + $event->{Compress} = 1; +#?? could detect if it contains only ignorable_ws + } + elsif ($prev->{EventType} eq 'characters') + { + if ($q->[-2] == $start_element) + { + # Element has only one child, a text node. + # Print element as: text here + delete $prev->{Indent}; + $start_element->{IndentChildren} = 0; + } + } + } + + my $level = --$self->{Level}; + $event->{Indent} = $start_element->{IndentChildren} ? $level : undef; + + my $compress = $start_element->{Compress}; + if ($compress) + { + $event->{Compress} = $compress; + delete $event->{Indent}; + } + + $self->SUPER::end_element ($event); +} + +sub end_document +{ + my ($self, $event) = @_; + + $self->push_event ('end_document', $event || {}); + $self->flush (0); # send remaining events +} + +sub push_event +{ + my ($self, $type, $event) = @_; + + $event->{EventType} = $type; + if ($type =~ /^(characters|comment|processing_instruction|entity_reference|cdata)$/) + { + my $indent_kids = $self->{ParentStack}->[-1]->{IndentChildren} ? 1 : 0; + $event->{Indent} = $indent_kids ? $self->{Level} : undef; + } + + my $q = $self->{EventQ}; + push @$q, $event; + + $self->flush (4); # keep 4 events on the stack (maybe 3 is enough) +} + +sub flush +{ + my ($self, $keep) = @_; + my $q = $self->{EventQ}; + + while (@$q > $keep) + { + my $head = $q->[0]; +# print "head=" . $head->{EventType} . " indent=" . $head->{Indent} . "\n"; + + if ($head->{EventType} =~ /ws|ignorable/) + { + my $next = $q->[1]; + my $indent = $next->{Indent}; + + if (defined $indent) # fix existing indent + { + $head->{Data} = $self->{Newline} . ($self->{Tab} x $indent); + $self->send (2); + } + else # remove existing indent + { + shift @$q; + $self->send (1); + } +#?? remove keys: Indent, ... + } + else + { + my $indent = $head->{Indent}; + + if (defined $indent) # insert indent + { + unshift @$q, { EventType => 'ws', + Data => $self->{Newline} . ($self->{Tab} x $indent) }; + $self->send (2); + } + else # no indent - leave as is + { + $self->send (1); + } + } + } +} + +sub send +{ + my ($self, $i) = @_; + + my $q = $self->{EventQ}; + + while ($i--) + { + my $event = shift @$q; + my $type = $event->{EventType}; + delete $event->{EventType}; + +#print "TYPE=$type " . join(",", map { "$_=" . $event->{$_} } keys %$event) . "\n"; + $self->{Callback}->{$type}->($event); + } +} + +1; # package return code + +=head1 NAME + +XML::Filter::Reindent - Reformats whitespace for pretty printing XML + +=head1 SYNOPSIS + + use XML::Handler::Composer; + use XML::Filter::Reindent; + + my $composer = new XML::Handler::Composer (%OPTIONS); + my $indent = new XML::Filter::Reindent (Handler => $composer, %OPTIONS); + +=head1 DESCRIPTION + +XML::Filter::Reindent is a sub class of L. + +XML::Filter::Reindent can be used as a PerlSAX filter to reformat an +XML document before sending it to a PerlSAX handler that prints it +(like L.) + +Like L, it detects ignorable whitespace and blocks of +whitespace characters in certain places. It uses this information and +information supplied by the user to determine where whitespace may be +modified, deleted or inserted. +Based on the indent settings, it then modifies, inserts and deletes characters +and ignorable_whitespace events accordingly. + +This is just a first stab at the implementation. +It may be buggy and may change completely! + +=head1 Constructor Options + +=over 4 + +=item * Handler + +The PerlSAX handler (or filter) that will receive the PerlSAX events from this +filter. + +=item * Tab (Default: one space) + +The number of spaces per indent level for elements etc. in document content. + +=item * Newline (Default: "\x0A") + +The newline to use when re-indenting. +The default is the internal newline used by L, L etc., +and should be fine when used in combination with L. + +=back + +=head1 $self->indent_children ($start_element_event) + +This method determines whether children of a certain element +may be reformatted. +The default implementation checks the PreserveWS parameter of the specified +start_element event and returns 0 if it is set or MAYBE otherwise. +The value MAYBE (2) indicates that further investigation is needed, e.g. +by examining the element contents. A value of 1 means yes, indent the +child nodes, no further investigation is needed. + +NOTE: the PreserveWS parameter is set by the parent class, +L, when the element or one of its ancestors has +the attribute xml:space="preserve". + +Override this method to tweak the behavior of this class. + +=head1 $self->indent_element ($start_element_event, $parent_says_indent) + +This method determines whether a certain element may be re-indented. +The default implementation returns the value of the $parent_says_indent +parameter, which was set to the value returned by indent_children for the +parent element. In other words, the element will be re-indented if the +parent element allows it. + +Override this method to tweak the behavior of this class. +I'm not sure how useful this hook is. Please provide feedback! + +=head1 Current Implementation + +The current implementation puts all incoming Perl SAX events in a queue for +further processing. When determining which nodes should be re-indented, +it sometimes needs information from previous events, hence the use of the +queue. + +The parameter (Compress => 1) is added to +matching start_element and end_element events with no events in between +This indicates to an XML printer that a compressed notation can be used, +e.g . + +If an element allows reformatting of its contents (xml:space="preserve" was +not active and indent_children returned MAYBE), the element +contents will be reformatted unless it only has one child node and that +child is a regular text node (characters event.) +In that case, the element will be printed as text contents. + +If you want element nodes with just one text child to be reindented as well, +simply override indent_children to return 1 instead of MAYBE (2.) + +This behavior may be changed or extended in the future. + +=head1 CAVEATS + +This code is highly experimental! +It has not been tested well and the API may change. + +The code that detects blocks of whitespace at potential indent positions +may need some work. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Filter/SAXT.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Filter/SAXT.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,188 @@ +# +# To do: +# - later: ErrorHandler, Locale? + +package XML::Filter::SAXT; +use strict; + +use vars qw( %SAX_HANDLERS ); + +%SAX_HANDLERS = ( DocumentHandler => + [ "start_document", + "end_document", + "start_element", + "end_element", + "characters", + "processing_instruction", + "comment", + "start_cdata", + "end_cdata", + "entity_reference", + "set_document_locator" # !! passes {Locator=>$perlsax} + ], + + DTDHandler => + [ "notation_decl", + "unparsed_entity_decl", + "entity_decl", + "element_decl", + "attlist_decl", + "doctype_decl", + "xml_decl" + ], + + EntityResolver => + [ "resolve_entity" ]); + +# +# Usage: +# +# $saxt = new XML::Filter::SAXT ( { Handler => $out0 }, +# { DocumentHandler => $out1 }, +# { DTDHandler => $out3, +# Handler => $out4 +# } +# ); +# +# $perlsax = new XML::Parser::PerlSAX ( Handler => $saxt ); +# $perlsax->parse ( [OPTIONS] ); +# +sub new +{ + my ($class, @out) = @_; + + my $self = bless { Out => \@out }, $class; + + for (my $i = 0; $i < @out; $i++) + { + for my $handler (keys %SAX_HANDLERS) + { + my $callbacks = $SAX_HANDLERS{$handler}; + my $h = ($self->{Out}->[$i]->{$handler} ||= $self->{Out}->[$i]->{Handler}); + next unless defined $h; + + for my $cb (@$callbacks) + { + if (UNIVERSAL::can ($h, $cb)) + { + $self->{$cb} .= "\$out->[$i]->{$handler}->$cb (\@_);\n"; + } + } + } + } + + for my $handler (keys %SAX_HANDLERS) + { + my $callbacks = $SAX_HANDLERS{$handler}; + for my $cb (@$callbacks) + { + my $code = $self->{$cb}; + if (defined $code) + { + $self->{$cb} = + eval "sub { my \$out = shift->{Out}; $code }"; + } + else + { + $self->{$cb} = \&noop; + } + } + } + return $self; +} + +sub noop +{ + # does nothing +} + +for my $cb (map { @{ $_ } } values %SAX_HANDLERS) +{ + eval "sub $cb { shift->{$cb}->(\@_); }"; +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Filter::SAXT - Replicates SAX events to several SAX event handlers + +=head1 SYNOPSIS + + $saxt = new XML::Filter::SAXT ( { Handler => $out1 }, + { DocumentHandler => $out2 }, + { DTDHandler => $out3, + Handler => $out4 + } + ); + + $perlsax = new XML::Parser::PerlSAX ( Handler => $saxt ); + $perlsax->parse ( [OPTIONS] ); + +=head1 DESCRIPTION + +SAXT is like the Unix 'tee' command in that it multiplexes the input stream +to several output streams. In this case, the input stream is a PerlSAX event +producer (like XML::Parser::PerlSAX) and the output streams are PerlSAX +handlers or filters. + +The SAXT constructor takes a list of hash references. Each hash specifies +an output handler. The hash keys can be: DocumentHandler, DTDHandler, +EntityResolver or Handler, where Handler is a combination of the previous three +and acts as the default handler. +E.g. if DocumentHandler is not specified, it will try to use Handler. + +=head2 EXAMPLE + +In this example we use L to parse an XML file and +to invoke the PerlSAX callbacks of our SAXT object. The SAXT object then +forwards the callbacks to L, which will 'die' if it encounters +an error, and to L, which will store the XML in an +L. + + use XML::Parser::PerlSAX; + use XML::Filter::SAXT; + use XML::Handler::BuildDOM; + use XML::Checker; + + my $checker = new XML::Checker; + my $builder = new XML::Handler::BuildDOM (KeepCDATA => 1); + my $tee = new XML::Filter::SAXT ( { Handler => $checker }, + { Handler => $builder } ); + + my $parser = new XML::Parser::PerlSAX (Handler => $tee); + eval + { + # This is how you set the error handler for XML::Checker + local $XML::Checker::FAIL = \&my_fail; + + my $dom_document = $parser->parsefile ("file.xml"); + ... your code here ... + }; + if ($@) + { + # Either XML::Parser::PerlSAX threw an exception (bad XML) + # or XML::Checker found an error and my_fail died. + ... your error handling code here ... + } + + # XML::Checker error handler + sub my_fail + { + my $code = shift; + die XML::Checker::error_string ($code, @_) + if $code < 200; # warnings and info messages are >= 200 + } + +=head1 CAVEATS + +This is still alpha software. +Package names and interfaces are subject to change. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/BuildDOM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/BuildDOM.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,338 @@ +package XML::Handler::BuildDOM; +use strict; +use XML::DOM; + +# +# TODO: +# - add support for parameter entity references +# - expand API: insert Elements in the tree or stuff into DocType etc. + +sub new +{ + my ($class, %args) = @_; + bless \%args, $class; +} + +#-------- PerlSAX Handler methods ------------------------------ + +sub start_document # was Init +{ + my $self = shift; + + # Define Document if it's not set & not obtainable from Element or DocType + $self->{Document} ||= + (defined $self->{Element} ? $self->{Element}->getOwnerDocument : undef) + || (defined $self->{DocType} ? $self->{DocType}->getOwnerDocument : undef) + || new XML::DOM::Document(); + + $self->{Element} ||= $self->{Document}; + + unless (defined $self->{DocType}) + { + $self->{DocType} = $self->{Document}->getDoctype + if defined $self->{Document}; + + unless (defined $self->{Doctype}) + { +#?? should be $doc->createDocType for extensibility! + $self->{DocType} = new XML::DOM::DocumentType ($self->{Document}); + $self->{Document}->setDoctype ($self->{DocType}); + } + } + + # Prepare for document prolog + $self->{InProlog} = 1; + + # We haven't passed the root element yet + $self->{EndDoc} = 0; + + undef $self->{LastText}; +} + +sub end_document # was Final +{ + my $self = shift; + unless ($self->{SawDocType}) + { + my $doctype = $self->{Document}->removeDoctype; + $doctype->dispose; +#?? do we always want to destroy the Doctype? + } + $self->{Document}; +} + +sub characters # was Char +{ + my $self = $_[0]; + my $str = $_[1]->{Data}; + + if ($self->{InCDATA} && $self->{KeepCDATA}) + { + undef $self->{LastText}; + # Merge text with previous node if possible + $self->{Element}->addCDATA ($str); + } + else + { + # Merge text with previous node if possible + # Used to be: $expat->{DOM_Element}->addText ($str); + if ($self->{LastText}) + { + $self->{LastText}->appendData ($str); + } + else + { + $self->{LastText} = $self->{Document}->createTextNode ($str); + $self->{Element}->appendChild ($self->{LastText}); + } + } +} + +sub start_element # was Start +{ + my ($self, $hash) = @_; + my $elem = $hash->{Name}; + my $attr = $hash->{Attributes}; + + my $parent = $self->{Element}; + my $doc = $self->{Document}; + + if ($parent == $doc) + { + # End of document prolog, i.e. start of first Element + $self->{InProlog} = 0; + } + + undef $self->{LastText}; + my $node = $doc->createElement ($elem); + $self->{Element} = $node; + $parent->appendChild ($node); + + my $i = 0; + my $n = scalar keys %$attr; + return unless $n; + + if (exists $hash->{AttributeOrder}) + { + my $defaulted = $hash->{Defaulted}; + my @order = @{ $hash->{AttributeOrder} }; + + # Specified attributes + for (my $i = 0; $i < $defaulted; $i++) + { + my $a = $order[$i]; + my $att = $doc->createAttribute ($a, $attr->{$a}, 1); + $node->setAttributeNode ($att); + } + + # Defaulted attributes + for (my $i = $defaulted; $i < @order; $i++) + { + my $a = $order[$i]; + my $att = $doc->createAttribute ($elem, $attr->{$a}, 0); + $node->setAttributeNode ($att); + } + } + else + { + # We're assuming that all attributes were specified (1) + for my $a (keys %$attr) + { + my $att = $doc->createAttribute ($a, $attr->{$a}, 1); + $node->setAttributeNode ($att); + } + } +} + +sub end_element +{ + my $self = shift; + $self->{Element} = $self->{Element}->getParentNode; + undef $self->{LastText}; + + # Check for end of root element + $self->{EndDoc} = 1 if ($self->{Element} == $self->{Document}); +} + +sub entity_reference # was Default +{ + my $self = $_[0]; + my $name = $_[1]->{Name}; + + $self->{Element}->appendChild ( + $self->{Document}->createEntityReference ($name)); + undef $self->{LastText}; +} + +sub start_cdata +{ + my $self = shift; + $self->{InCDATA} = 1; +} + +sub end_cdata +{ + my $self = shift; + $self->{InCDATA} = 0; +} + +sub comment +{ + my $self = $_[0]; + + local $XML::DOM::IgnoreReadOnly = 1; + + undef $self->{LastText}; + my $comment = $self->{Document}->createComment ($_[1]->{Data}); + $self->{Element}->appendChild ($comment); +} + +sub doctype_decl +{ + my ($self, $hash) = @_; + + $self->{DocType}->setParams ($hash->{Name}, $hash->{SystemId}, + $hash->{PublicId}, $hash->{Internal}); + $self->{SawDocType} = 1; +} + +sub attlist_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + $self->{DocType}->addAttDef ($hash->{ElementName}, + $hash->{AttributeName}, + $hash->{Type}, + $hash->{Default}, + $hash->{Fixed}); +} + +sub xml_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + undef $self->{LastText}; + $self->{Document}->setXMLDecl (new XML::DOM::XMLDecl ($self->{Document}, + $hash->{Version}, + $hash->{Encoding}, + $hash->{Standalone})); +} + +sub entity_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + # Parameter Entities names are passed starting with '%' + my $parameter = 0; + +#?? parameter entities currently not supported by PerlSAX! + + undef $self->{LastText}; + $self->{DocType}->addEntity ($parameter, $hash->{Name}, $hash->{Value}, + $hash->{SystemId}, $hash->{PublicId}, + $hash->{Notation}); +} + +# Unparsed is called when it encounters e.g: +# +# +# +sub unparsed_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + # same as regular ENTITY, as far as DOM is concerned + $self->entity_decl ($hash); +} + +sub element_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + undef $self->{LastText}; + $self->{DocType}->addElementDecl ($hash->{Name}, $hash->{Model}); +} + +sub notation_decl +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + undef $self->{LastText}; + $self->{DocType}->addNotation ($hash->{Name}, $hash->{Base}, + $hash->{SystemId}, $hash->{PublicId}); +} + +sub processing_instruction +{ + my ($self, $hash) = @_; + + local $XML::DOM::IgnoreReadOnly = 1; + + undef $self->{LastText}; + $self->{Element}->appendChild (new XML::DOM::ProcessingInstruction + ($self->{Document}, $hash->{Target}, $hash->{Data})); +} + +return 1; + +__END__ + +=head1 NAME + +XML::Handler::BuildDOM - PerlSAX handler that creates XML::DOM document structures + +=head1 SYNOPSIS + + use XML::Handler::BuildDOM; + use XML::Parser::PerlSAX; + + my $handler = new XML::Handler::BuildDOM (KeepCDATA => 1); + my $parser = new XML::Parser::PerlSAX (Handler => $handler); + + my $doc = $parser->parsefile ("file.xml"); + +=head1 DESCRIPTION + +XML::Handler::BuildDOM creates L document structures +(i.e. L) from PerlSAX events. + +This class used to be called L prior to libxml-enno 1.0.1. + +=head2 CONSTRUCTOR OPTIONS + +The XML::Handler::BuildDOM constructor supports the following options: + +=over 4 + +=item * KeepCDATA => 1 + +If set to 0 (default), CDATASections will be converted to regular text. + +=item * Document => $doc + +If undefined, start_document will extract it from Element or DocType (if set), +otherwise it will create a new XML::DOM::Document. + +=item * Element => $elem + +If undefined, it is set to Document. This will be the insertion point (or parent) +for the nodes defined by the following callbacks. + +=item * DocType => $doctype + +If undefined, start_document will extract it from Document (if possible). +Otherwise it adds a new XML::DOM::DocumentType to the Document. + +=back diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/CanonXMLWriter.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/CanonXMLWriter.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,180 @@ +# +# Copyright (C) 1998, 1999 Ken MacLeod +# XML::Handler::CanonXMLWriter is free software; you can redistribute +# it and/or modify it under the same terms as Perl itself. +# +# $Id: CanonXMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ +# + +use strict; + +package XML::Handler::CanonXMLWriter; +use vars qw{ $VERSION %char_entities }; + +# will be substituted by make-rel script +$VERSION = "0.07"; + +%char_entities = ( + "\x09" => ' ', + "\x0a" => ' ', + "\x0d" => ' ', + '&' => '&', + '<' => '<', + '>' => '>', + '"' => '"', +); + +sub new { + my ($class, %args) = @_; + + my $self = \%args; + return bless $self, $class; +} + +sub start_document { + my $self = shift; my $document = shift; + + $self->{'_text_array'} = []; +} + +sub end_document { + my $self = shift; my $document = shift; + + if (defined $self->{IOHandle}) { + return (); + } else { + my $text = join ('', @{$self->{'_text_array'}}); + undef $self->{'_text_array'}; + return $text; + } +} + +sub start_element { + my $self = shift; my $element = shift; + + $self->_print('<' . $element->{Name}); + my $key; + my $attrs = $element->{Attributes}; + foreach $key (sort keys %$attrs) { + $self->_print(" $key=\"" . $self->_escape($attrs->{$key}) . '"'); + } + $self->_print('>'); +} + +sub end_element { + my $self = shift; my $element = shift; + + $self->_print('{Name} . '>'); +} + +sub characters { + my $self = shift; my $characters = shift; + + $self->_print($self->_escape($characters->{Data})); +} + +sub ignorable_whitespace { + my $self = shift; my $characters = shift; + + $self->_print($self->_escape($characters->{Data})); +} + +sub processing_instruction { + my $self = shift; my $pi = shift; + + $self->_print('{Target} . ' ' . $pi->{Data} . '?>'); +} + +sub entity { + # entities don't occur in text + return (); +} + +sub comment { + my $self = shift; my $comment = shift; + + if ($self->{PrintComments}) { + $self->_print(''); + } else { + return (); + } +} + +sub _print { + my $self = shift; my $string = shift; + + if (defined $self->{IOHandle}) { + $self->{IOHandle}->print($string); + return (); + } else { + push @{$self->{'_text_array'}}, $string; + } +} + +sub _escape { + my $self = shift; my $string = shift; + + $string =~ s/([\x09\x0a\x0d&<>"])/$char_entities{$1}/ge; + return $string; +} + +1; + +__END__ + +=head1 NAME + +XML::Handler::CanonXMLWriter - output XML in canonical XML format + +=head1 SYNOPSIS + + use XML::Handler::CanonXMLWriter; + + $writer = XML::Handler::CanonXMLWriter OPTIONS; + $parser->parse(Handler => $writer); + +=head1 DESCRIPTION + +C is a PerlSAX handler that will return +a string or write a stream of canonical XML for an XML instance and it's +content. + +C objects hold the options used for +writing the XML objects. Options can be supplied when the the object +is created, + + $writer = new XML::Handler::CanonXMLWriter PrintComments => 1; + +or modified at any time before calling the parser's `C' method: + + $writer->{PrintComments} = 0; + +=head1 OPTIONS + +=over 4 + +=item IOHandle + +IOHandle contains a handle for writing the canonical XML to. If an +IOHandle is not provided, the canonical XML string will be returned +from `C'. + +=item PrintComments + +By default comments are not written to the output. Setting comment to +a true value will include comments in the output. + +=back + +=head1 AUTHOR + +Ken MacLeod, ken@bitsko.slc.ut.us + +=head1 SEE ALSO + +perl(1), PerlSAX + +James Clark's Canonical XML definition + + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/Composer.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/Composer.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,821 @@ +package XML::Handler::Composer; +use strict; +use XML::UM; +use Carp; + +use vars qw{ %DEFAULT_QUOTES %XML_MAPPING_CRITERIA }; + +%DEFAULT_QUOTES = ( + XMLDecl => '"', + Attr => '"', + Entity => '"', + SystemLiteral => '"', + ); + +%XML_MAPPING_CRITERIA = +( + Text => + { + '<' => '<', + '&' => '&', + + ']]>' => ']]>', + }, + + CDataSection => + { + ']]>' => ']]>', # NOTE: this won't be translated back correctly + }, + + Attr => # attribute value (assuming double quotes "" are used) + { +# '"' => '"', # Use ("'" => ''') when using single quotes + '<' => '<', + '&' => '&', + }, + + Entity => # entity value (assuming double quotes "" are used) + { +# '"' => '"', # Use ("'" => ''') when using single quotes + '%' => '%', + '&' => '&', + }, + + Comment => + { + '--' => '--', # NOTE: this won't be translated back correctly + }, + + ProcessingInstruction => + { + '?>' => '?>', # not sure if this will be translated back correctly + }, + + # The SYSTEM and PUBLIC identifiers in DOCTYPE declaration (quoted strings) + SystemLiteral => + { +# '"' => '"', # Use ("'" => ''') when using single quotes + }, + +); + +sub new +{ + my ($class, %options) = @_; + my $self = bless \%options, $class; + + $self->{EndWithNewline} = 1 unless defined $self->{EndWithNewline}; + + if (defined $self->{Newline}) + { + $self->{ConvertNewlines} = 1; + } + else + { + # Use this when printing newlines in case the user didn't specify one + $self->{Newline} = "\x0A"; + } + + $self->{DocTypeIndent} = $self->{Newline} . " " + unless defined $self->{DocTypeIndent}; + + $self->{IndentAttlist} = " " unless defined $self->{IndentAttlist}; + + $self->{Print} = sub { print @_ } unless defined $self->{Print}; + + $self->{Quote} ||= {}; + for my $q (keys %DEFAULT_QUOTES) + { + $self->{Quote}->{$q} ||= $DEFAULT_QUOTES{$q}; + } + + # Convert to UTF-8 by default, i.e. when is missing + # and no {Encoding} is specified. + # Note that the internal representation *is* UTF-8, so we + # simply return the (string) parameter. + $self->{Encode} = sub { shift } unless defined $self->{Encode}; + + # Convert unmapped characters to hexadecimal constants a la '号' + $self->{EncodeUnmapped} = \&XML::UM::encode_unmapped_hex + unless defined $self->{EncodeUnmapped}; + + my $encoding = $self->{Encoding}; + $self->setEncoding ($encoding) if defined $encoding; + + $self->initMappers; + + $self; +} + +# +# Setup the mapping routines that convert '<' to '<' etc. +# for the specific XML constructs. +# +sub initMappers +{ + my $self = shift; + my %escape; + my $convert_newlines = $self->{ConvertNewlines}; + + for my $n (qw{ Text Comment CDataSection Attr SystemLiteral + ProcessingInstruction Entity }) + { + $escape{$n} = $self->create_utf8_mapper ($n, $convert_newlines); + } + + # Text with xml:space="preserve", should not have newlines converted. + $escape{TextPreserveNL} = $self->create_utf8_mapper ('Text', 0); + # (If newline conversion is inactive, $escape{TextPreserveNL} does the + # same as $escape{Text} defined above ...) + + $self->{Escape} = \%escape; +} + +sub setEncoding +{ + my ($self, $encoding) = @_; + + $self->{Encode} = XML::UM::get_encode ( + Encoding => $encoding, EncodeUnmapped => $self->{EncodeUnmapped}); +} + +sub create_utf8_mapper +{ + my ($self, $construct, $convert_newlines) = @_; + + my $c = $XML_MAPPING_CRITERIA{$construct}; + croak "no XML mapping criteria defined for $construct" + unless defined $c; + + my %hash = %$c; + + # If this construct appears between quotes in the XML document + # (and it has a quoting character defined), + # ensure that the quoting character is appropriately converted + # to " or ' + my $quote = $self->{Quote}->{$construct}; + if (defined $quote) + { + $hash{$quote} = $quote eq '"' ? '"' : '''; + } + + if ($convert_newlines) + { + $hash{"\x0A"} = $self->{Newline}; + } + + gen_utf8_subst (%hash); +} + +# +# Converts a string literal e.g. "ABC" into '\x41\x42\x43' +# so it can be stuffed into a regular expression +# +sub str_to_hex # static +{ + my $s = shift; + + $s =~ s/(.)/ sprintf ("\\x%02x", ord ($1)) /egos; + + $s; +} + +# +# In later perl versions (5.005_55 and up) we can simply say: +# +# use utf8; +# $literals = join ("|", map { str_to_hex ($_) } keys %hash); +# $s =~ s/($literals)/$hash{$1}/ego; +# + +sub gen_utf8_subst # static +{ + my (%hash) = @_; + + my $code = 'sub { my $s = shift; $s =~ s/('; + $code .= join ("|", map { str_to_hex ($_) } keys %hash); + $code .= ')|('; + $code .= '[\\x00-\\xBF]|[\\xC0-\\xDF].|[\\xE0-\\xEF]..|[\\xF0-\\xFF]...'; + $code .= ')/ defined ($1) ? $hash{$1} : $2 /ego; $s }'; + + my $f = eval $code; + croak "XML::Handler::Composer - can't eval code: $code\nReason: $@" if $@; + + $f; +} + +# This should be optimized! +sub print +{ + my ($self, $str) = @_; + $self->{Print}->($self->{Encode}->($str)); +} + +# Used by start_element. It determines the style in which empty elements +# are printed. The default implementation returns "/>" so they are printed +# like this: +# Override this method to support e.g. XHTML style tags. +sub get_compressed_element_suffix +{ + my ($self, $event) = @_; + + "/>"; + + # return " />" for XHTML style, or + # "><$tagName/>" for uncompressed tags (where $tagName is $event->{Name}) +} + +#----- PerlSAX handlers ------------------------------------------------------- + +sub start_document +{ + my ($self) = @_; + + $self->{InCDATA} = 0; + $self->{DTD} = undef; + $self->{PreserveWS} = 0; # root element has xml:space="default" + $self->{PreserveStack} = []; + $self->{PrintedXmlDecl} = 0; # whether was printed +} + +sub end_document +{ + my ($self) = @_; + + # Print final Newline at the end of the XML document (if desired) + $self->print ($self->{Newline}) if $self->{EndWithNewline}; +} + +# This event is received *AFTER* the Notation, Element, Attlist etc. events +# that are contained within the DTD. +sub doctype_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $q = $self->{Quote}->{SystemLiteral}; + my $escape_literal = $self->{Escape}->{SystemLiteral}; + + my $name = $event->{Name}; + my $sysId = $event->{SystemId}; + $sysId = &$escape_literal ($sysId) if defined $sysId; + my $pubId = $event->{PublicId}; + $pubId = &$escape_literal ($pubId) if defined $pubId; + + my $str = "{DTD}; + my $nl = $self->{Newline}; + + if (defined $dtd_contents) + { + delete $self->{DTD}; + + $str .= " [$dtd_contents$nl]>$nl"; + } + else + { + $str .= ">$nl"; + } + $self->print ($str); +} + +sub start_element +{ + my ($self, $event) = @_; + + my $preserve_stack = $self->{PreserveStack}; + if (@$preserve_stack == 0) + { + # This is the root element. Print the declaration now if + # it wasn't printed and it should be. + $self->flush_xml_decl; + } + + my $str = "<" . $event->{Name}; + + my $suffix = ">"; + if ($event->{Compress}) + { + $suffix = $self->get_compressed_element_suffix ($event); + } + + # Push PreserveWS state of parent element on the stack + push @{ $preserve_stack }, $self->{PreserveWS}; + $self->{PreserveWS} = $event->{PreserveWS}; + + my $ha = $event->{Attributes}; + my @attr; + if (exists $event->{AttributeOrder}) + { + my $defaulted = $event->{Defaulted}; + if (defined $defaulted && !$self->{PrintDefaultAttr}) + { + if ($defaulted > 0) + { + @attr = @{ $event->{AttributeOrder} }[0 .. $defaulted - 1]; + } + # else: all attributes are defaulted i.e. @attr = (); + } + else # no attr are defaulted + { + @attr = @{ $event->{AttributeOrder} }; + } + } + else # no attr order defined + { + @attr = keys %$ha; + } + + my $escape = $self->{Escape}->{Attr}; + my $q = $self->{Quote}->{Attr}; + + for (my $i = 0; $i < @attr; $i++) + { +#?? could print a newline every so often... + my $name = $attr[$i]; + my $val = &$escape ($ha->{$name}); + $str .= " $name=$q$val$q"; + } + $str .= $suffix; + + $self->print ($str); +} + +sub end_element +{ + my ($self, $event) = @_; + + $self->{PreserveWS} = pop @{ $self->{PreserveStack} }; + + return if $event->{Compress}; + + $self->print ("{Name} . ">"); +} + +sub characters +{ + my ($self, $event) = @_; + + if ($self->{InCDATA}) + { +#?? should this use $self->{PreserveWS} ? + + my $esc = $self->{Escape}->{CDataSection}; + $self->print (&$esc ($event->{Data})); + } + else # regular text + { + my $esc = $self->{PreserveWS} ? + $self->{Escape}->{TextPreserveNL} : + $self->{Escape}->{Text}; + + $self->print (&$esc ($event->{Data})); + } +} + +sub start_cdata +{ + my $self = shift; + $self->{InCDATA} = 1; + + $self->print ("{InCDATA} = 0; + + $self->print ("]]>"); +} + +sub comment +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $esc = $self->{Escape}->{Comment}; +#?? still need to support comments in the DTD + + $self->print (""); +} + +sub entity_reference +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $par = $event->{Parameter} ? '%' : '&'; +#?? parameter entities (like %par;) are NOT supported! +# PerlSAX::handle_default should be fixed! + + $self->print ($par . $event->{Name} . ";"); +} + +sub unparsed_entity_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + $self->entity_decl ($event); +} + +sub notation_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $name = $event->{Name}; + my $sysId = $event->{SystemId}; + my $pubId = $event->{PublicId}; + + my $q = $self->{Quote}->{SystemLiteral}; + my $escape = $self->{Escape}->{SystemLiteral}; + + $sysId = &$escape ($sysId) if defined $sysId; + $pubId = &$escape ($pubId) if defined $pubId; + + my $str = $self->{DocTypeIndent} . ""; + + $self->{DTD} .= $str; +} + +sub element_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $name = $event->{Name}; + my $model = $event->{Model}; + + $self->{DTD} .= $self->{DocTypeIndent} . ""; +} + +sub entity_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $name = $event->{Name}; + + my $par = ""; + if ($name =~ /^%/) + { + # It's a parameter entity (i.e. %ent; instead of &ent;) + $name = substr ($name, 1); + $par = "% "; + } + + my $str = $self->{DocTypeIndent} . "{Value}; + my $sysId = $event->{SysId}; + my $pubId = $event->{PubId}; + my $ndata = $event->{Ndata}; + + my $q = $self->{Quote}->{SystemLiteral}; + my $escape = $self->{Escape}->{SystemLiteral}; + + if (defined $value) + { +#?? use {Entity} quote etc... + my $esc = $self->{Escape}->{Entity}; + my $p = $self->{Quote}->{Entity}; + $str .= " $p" . &$esc ($value) . $p; + } + if (defined $pubId) + { + $str .= " PUBLIC $q" . &$escape ($pubId) . $q; + } + elsif (defined $sysId) + { + $str .= " SYSTEM"; + } + + if (defined $sysId) + { + $str .= " $q" . &$escape ($sysId) . $q; + } + $str .= " NDATA $ndata" if defined $ndata; + $str .= ">"; + + $self->{DTD} .= $str; +} + +sub attlist_decl +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $elem = $event->{ElementName}; + + my $str = $event->{AttributeName} . " " . $event->{Type}; + $str .= " #FIXED" if defined $event->{Fixed}; + + $str = $str; + + my $def = $event->{Default}; + if (defined $def) + { + $str .= " $def"; + + # Note sometimes Default is a value with quotes. + # We'll use the existing quotes in that case... + } + + my $indent; + if (!exists($event->{First}) || $event->{First}) + { + $self->{DTD} .= $self->{DocTypeIndent} . "{MoreFollow}) + { + $indent = $self->{Newline} . $self->{IndentAttlist}; + } + else + { + $indent = " "; + } + } + else + { + $indent = $self->{Newline} . $self->{IndentAttlist}; + } + + $self->{DTD} .= $indent . $str; + + unless ($event->{MoreFollow}) + { + $self->{DTD} .= '>'; + } +} + +sub xml_decl +{ + my ($self, $event) = @_; + return if $self->{PrintedXmlDecl}; # already printed it + + my $version = $event->{Version}; + my $encoding = $event->{Encoding}; + if (defined $self->{Encoding}) + { + $encoding = $self->{Encoding}; + } + else + { + $self->setEncoding ($encoding) if defined $encoding; + } + + my $standalone = $event->{Standalone}; + $standalone = ($standalone ? "yes" : "no") if defined $standalone; + + my $q = $self->{Quote}->{XMLDecl}; + my $nl = $self->{Newline}; + + my $str = "$nl$nl"; + + $self->print ($str); + $self->{PrintedXmlDecl} = 1; +} + +# +# Prints the declaration if it wasn't already printed +# *and* the user wanted it to be printed (because s/he set $self->{Encoding}) +# +sub flush_xml_decl +{ + my ($self) = @_; + return if $self->{PrintedXmlDecl}; + + if (defined $self->{Encoding}) + { + $self->xml_decl ({ Version => '1.0', Encoding => $self->{Encoding} }); + } + + # If it wasn't printed just now, it doesn't need to be printed at all, + # so pretend we did print it. + $self->{PrintedXmlDecl} = 1; +} + +sub processing_instruction +{ + my ($self, $event) = @_; + $self->flush_xml_decl; + + my $escape = $self->{Escape}->{ProcessingInstruction}; + + my $str = "{Target} . " " . + &$escape ($event->{Data}). "?>"; + + $self->print ($str); +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Handler::Composer - Another XML printer/writer/generator + +=head1 SYNOPSIS + +use XML::Handler::Composer; + +my $composer = new XML::Handler::Composer ( [OPTIONS] ); + +=head1 DESCRIPTION + +XML::Handler::Composer is similar to XML::Writer, XML::Handler::XMLWriter, +XML::Handler::YAWriter etc. in that it generates XML output. + +This implementation may not be fast and it may not be the best solution for +your particular problem, but it has some features that may be missing in the +other implementations: + +=over 4 + +=item * Supports every output encoding that L supports + +L supports every encoding for which there is a mapping file +in the L distribution. + +=item * Pretty printing + +When used with L. + +=item * Fine control over which kind of quotes are used + +See options below. + +=item * Supports PerlSAX interface + +=back + +=head1 Constructor Options + +=over 4 + +=item * EndWithNewline (Default: 1) + +Whether to print a newline at the end of the file (i.e. after the root element) + +=item * Newline (Default: undef) + +If defined, which newline to use for printing. +(Note that XML::Parser etc. convert newlines into "\x0A".) + +If undef, newlines will not be converted and XML::Handler::Composer will +use "\x0A" when printing. + +A value of "\n" will convert the internal newlines into the platform +specific line separator. + +See the PreserveWS option in the characters event (below) for finer control +over when newline conversion is active. + +=item * DocTypeIndent (Default: a Newline and 2 spaces) + +Newline plus indent that is used to separate lines inside the DTD. + +=item * IndentAttList (Default: 8 spaces) + +Indent used when printing an declaration that has more than one +attribute definition, e.g. + + + +=item * Quote (Default: { XMLDecl => '"', Attr => '"', Entity => '"', SystemLiteral => '"' }) + +Quote contains a reference to a hash that defines which quoting characters +to use when printing XML declarations (XMLDecl), attribute values (Attr), + values (Entity) and system/public literals (SystemLiteral) +as found in , declarations etc. + +=item * PrintDefaultAttr (Default: 0) + +If 1, prints attribute values regardless of whether they are default +attribute values (as defined in declarations.) +Normally, default attributes are not printed. + +=item * Encoding (Default: undef) + +Defines the output encoding (if specified.) +Note that future calls to the xml_decl() handler may override this setting +(if they contain an Encoding definition.) + +=item * EncodeUnmapped (Default: \&XML::UM::encode_unmapped_dec) + +Defines how Unicode characters not found in the mapping file (of the +specified encoding) are printed. +By default, they are converted to decimal entity references, like '{' + +Use \&XML::UM::encode_unmapped_hex for hexadecimal constants, like '«' + +=item * Print (Default: sub { print @_ }, which prints to stdout) + +The subroutine that is used to print the encoded XML output. +The default prints the string to stdout. + +=back + +=head1 Method: get_compressed_element_suffix ($event) + +Override this method to support the different styles for printing +empty elements in compressed notation, e.g.

,

,

,

. + +The default returns "/>", which results in

. +Use " />" for XHTML style elements or ">" for certain HTML style elements. + +The $event parameter is the hash reference that was received from the +start_element() handler. + +=head1 Extra PerlSAX event information + +XML::Handler::Composer relies on hints from previous SAX filters to +format certain parts of the XML. +These SAX filters (e.g. XML::Filter::Reindent) pass extra information by adding +name/value pairs to the appropriate PerlSAX events (the events themselves are +hash references.) + +=over 4 + +=item * entity_reference: Parameter => 1 + +If Parameter is 1, it means that it is a parameter entity reference. +A parameter entity is referenced with %ent; instead of &ent; and the +entity declaration starts with instead of + +NOTE: This should be added to the PerlSAX interface! + +=item * start_element/end_element: Compress => 1 + +If Compress is 1 in both the start_element and end_element event, the element +will be printed in compressed form, e.g. instead of . + +=item * start_element: PreserveWS => 1 + +If newline conversion is active (i.e. Newline was defined in the constructor), +then newlines will *NOT* be converted in text (character events) within this +element. + +=item * attlist_decl: First, MoreFollow + +The First and MoreFollow options can be used to force successive +declarations for the same element to be merged, e.g. + + + +In this example, the attlist_decl event for foo should contain +(First => 1, MoreFollow => 1) and the event for bar should contain +(MoreFollow => 1). The quux event should have no extra info. + +'First' indicates that the event is the first of a sequence. +'MoreFollow' indicates that more events will follow in this sequence. + +If neither option is set by the preceding PerlSAX filter, each attribute +definition will be printed as a separate line. + +=back + +=head1 CAVEATS + +This code is highly experimental! +It has not been tested well and the API may change. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/PrintEvents.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/PrintEvents.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,67 @@ +# +# This PerlSAX handler prints out all the PerlSAX events/callbacks +# it receives. Very useful when debugging. +# + +package XML::Handler::PrintEvents; +use strict; +use XML::Filter::SAXT; + +my @EXTRA_HANDLERS = ( 'ignorable_whitespace' ); + +sub new +{ + my ($class, %options) = @_; + bless \%options, $class; +} + +sub print_event +{ + my ($self, $event_name, $event) = @_; + + printf "%-22s ", $event_name; + if (defined $event) + { + print join (", ", map { "$_ => [" . + (defined $event->{$_} ? $event->{$_} : "(undef)") + . "]" } keys %$event); + } + print "\n"; +} + +# +# This generates the PerlSAX handler methods for PrintEvents. +# They basically forward the event to print_event() while adding the callback +# (event) name. +# +for my $cb (@EXTRA_HANDLERS, map { @{$_} } values %XML::Filter::SAXT::SAX_HANDLERS) +{ + eval "sub $cb { shift->print_event ('$cb', \@_) }"; +} + +1; # package return code + +__END__ + +=head1 NAME + +XML::Handler::PrintEvents - Prints PerlSAX events (for debugging) + +=head1 SYNOPSIS + +use XML::Handler::PrintEvents; + +my $pr = new XML::Handler::PrintEvents; + +=head1 DESCRIPTION + +This PerlSAX handler prints the PerlSAX events it receives to STDOUT. +It can be useful when debugging PerlSAX filters. +It supports all PerlSAX handler including ignorable_whitespace. + +=head1 AUTHOR + +Send bug reports, hints, tips, suggestions to Enno Derksen at +>. + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/Sample.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/Sample.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,101 @@ +# This template file is in the Public Domain. +# You may do anything you want with this file. +# +# $Id: Sample.pm,v 1.4 1999/08/16 16:04:03 kmacleod Exp $ +# + +package XML::Handler::Sample; + +use vars qw{ $AUTOLOAD }; + +sub new { + my $type = shift; + my $self = ( $#_ == 0 ) ? shift : { @_ }; + + return bless $self, $type; +} + +# Basic PerlSAX +sub start_document { print "start_document\n"; } +sub end_document { print "end_document\n"; } +sub start_element { print "start_element\n"; } +sub end_element { print "end_element\n"; } +sub characters { print "characters\n"; } +sub processing_instruction { print "processing_instruction\n"; } +sub ignorable_whitespace { print "ignorable_whitespace\n"; } + +# Additional expat callbacks in XML::Parser::PerlSAX +sub comment { print "comment\n"; } +sub notation_decl { print "notation_decl\n"; } +sub unparsed_entity_decl { print "unparsed_entity_decl\n"; } +sub entity_decl { print "entity_decl\n"; } +sub element_decl { print "element_decl\n"; } +sub doctype_decl { print "doctype_decl\n"; } +sub xml_decl { print "xml_decl\n"; } + +# Additional SP/nsgmls callbacks in XML::ESISParser +sub start_subdoc { print "start_subdoc\n"; } +sub end_subdoc { print "start_subdoc\n"; } +sub appinfo { print "appinfo\n"; } +sub internal_entity_ref { print "sdata\n"; } +sub external_entity_ref { print "sdata\n"; } +sub record_end { print "record_end\n"; } +sub internal_entity_decl { print "internal_entity_decl\n"; } +sub external_entity_decl { print "external_entity_decl\n"; } +sub external_sgml_entity_decl { print "external_sgml_entity_decl\n"; } +sub subdoc_entity_decl { print "subdoc_entity_decl\n"; } +sub notation { print "notation\n"; } +sub error { print "error\n"; } +sub conforming { print "conforming\n"; } + +# Others +sub AUTOLOAD { + my $self = shift; + + my $method = $AUTOLOAD; + $method =~ s/.*:://; + return if $method eq 'DESTROY'; + + print "UNRECOGNIZED $method\n"; +} + +1; + +__END__ + +=head1 NAME + +XML::Handler::Sample - a trivial PerlSAX handler + +=head1 SYNOPSIS + + use XML::Parser::PerlSAX; + use XML::Handler::Sample; + + $my_handler = XML::Handler::Sample->new; + + XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' }, + Handler => $my_handler); + +=head1 DESCRIPTION + +C is a trivial PerlSAX handler that prints out +the name of each event it receives. The source for +C lists all the currently known PerlSAX +handler methods. + +C is intended for Perl module authors who wish +to look at example PerlSAX handler modules. C +can be used as a template for writing your own PerlSAX handler +modules. C is in the Public Domain and can be +used for any purpose without restriction. + +=head1 AUTHOR + +Ken MacLeod, ken@bitsko.slc.ut.us + +=head1 SEE ALSO + +perl(1), PerlSAX.pod(3) + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/Subs.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/Subs.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,177 @@ +# +# Copyright (C) 1999 Ken MacLeod +# XML::Handler::XMLWriter is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +# $Id: Subs.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ +# + +use strict; + +package XML::Handler::Subs; + +use UNIVERSAL; + +use vars qw{ $VERSION }; + +# will be substituted by make-rel script +$VERSION = "0.07"; + +sub new { + my $type = shift; + my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; + + return bless $self, $type; +} + +sub start_document { + my ($self, $document) = @_; + + $self->{Names} = []; + $self->{Nodes} = []; +} + +sub end_document { + my ($self, $document) = @_; + + delete $self->{Names}; + delete $self->{Nodes}; + + return(); +} + +sub start_element { + my ($self, $element) = @_; + + push @{$self->{Names}}, $element->{Name}; + push @{$self->{Nodes}}, $element; + + my $el_name = "s_" . $element->{Name}; + $el_name =~ s/[^a-zA-Z0-9_]/_/g; + if ($self->can($el_name)) { + $self->$el_name($element); + return 1; + } + + return 0; +} + +sub end_element { + my ($self, $element) = @_; + + my $called_sub = 0; + my $el_name = "e_" . $element->{Name}; + $el_name =~ s/[^a-zA-Z0-9_]/_/g; + if ($self->can(${el_name})) { + $self->$el_name($element); + $called_sub = 1; + } + + pop @{$self->{Names}}; + pop @{$self->{Nodes}}; + + return $called_sub; +} + +sub in_element { + my ($self, $name) = @_; + + return ($self->{Names}[-1] eq $name); +} + +sub within_element { + my ($self, $name) = @_; + + my $count = 0; + foreach my $el_name (@{$self->{Names}}) { + $count ++ if ($el_name eq $name); + } + + return $count; +} + +1; + +__END__ + +=head1 NAME + +XML::Handler::Subs - a PerlSAX handler base class for calling user-defined subs + +=head1 SYNOPSIS + + use XML::Handler::Subs; + + package MyHandlers; + use vars qw{ @ISA }; + + sub s_NAME { my ($self, $element) = @_ }; + sub e_NAME { my ($self, $element) = @_ }; + + $self->{Names}; # an array of names + $self->{Nodes}; # an array of $element nodes + + $handler = MyHandlers->new(); + $self->in_element($name); + $self->within_element($name); + +=head1 DESCRIPTION + +C is a base class for PerlSAX handlers. +C is subclassed to implement complete behavior and +to add element-specific handling. + +Each time an element starts, a method by that name prefixed with `s_' +is called with the element to be processed. Each time an element +ends, a method with that name prefixed with `e_' is called. Any +special characters in the element name are replaced by underscores. + +Subclassing XML::Handler::Subs in this way is similar to +XML::Parser's Subs style. + +XML::Handler::Subs maintains a stack of element names, +`C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>' +that can be used by subclasses. The current element is pushed on the +stacks before calling an element-name start method and popped off the +stacks after calling the element-name end method. The +`C' and `C' calls use these stacks. + +If the subclass implements `C', `C', +`C', and `C', be sure to use +`C' to call the the superclass methods also. See perlobj(1) +for details on SUPER::. `C' and +`C' return 1 if an element-name method is +called, they return 0 if no method was called. + +XML::Handler::Subs does not implement any other PerlSAX handlers. + +XML::Handler::Subs supports the following methods: + +=over 4 + +=item new( I ) + +A basic `C' method. `C' takes a list of key, value +pairs or a hash and creates and returns a hash with those options; the +hash is blessed into the subclass. + +=item in_element($name) + +Returns true if `C<$name>' is equal to the name of the innermost +currently opened element. + +=item within_element($name) + +Returns the number of times the `C<$name>' appears in Names. + +=back + +=head1 AUTHOR + +Ken MacLeod, ken@bitsko.slc.ut.us + +=head1 SEE ALSO + +perl(1), PerlSAX.pod(3) + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Handler/XMLWriter.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Handler/XMLWriter.pm Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,313 @@ +# +# Copyright (C) 1999 Ken MacLeod +# Portions derived from code in XML::Writer by David Megginson +# XML::Handler::XMLWriter is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +# $Id: XMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ +# + +use strict; + +package XML::Handler::XMLWriter; +use XML::Handler::Subs; + +use vars qw{ $VERSION @ISA $escapes }; + +# will be substituted by make-rel script +$VERSION = "0.07"; + +@ISA = qw{ XML::Handler::Subs }; + +$escapes = { '&' => '&', + '<' => '<', + '>' => '>', + '"' => '"' + }; + +sub start_document { + my ($self, $document) = @_; + + $self->SUPER::start_document($document); + + # create a temporary Output_ in case we're creating a standard + # output file that we'll delete later. + if (!$self->{AsString} && !defined($self->{Output})) { + require IO::File; + import IO::File; + $self->{Output_} = new IO::File(">-"); + } elsif (defined($self->{Output})) { + $self->{Output_} = $self->{Output}; + } + + if ($self->{AsString}) { + $self->{Strings} = []; + } + + $self->print("\n"); + + # FIXME support Doctype declarations +} + +sub end_document { + my ($self, $document) = @_; + + if (defined($self->{Output_})) { + $self->{Output_}->print("\n"); + delete $self->{Output_}; + } + + my $string = undef; + if (defined($self->{AsString})) { + push @{$self->{Strings}}, "\n"; + $string = join('', @{$self->{Strings}}); + delete $self->{Strings}; + } + + $self->SUPER::end_document($document); + + return($string); +} + +sub start_element { + my ($self, $element) = @_; + + if ($self->SUPER::start_element($element) == 0) { + $self->print_start_element($element); + } +} + +sub print_start_element { + my ($self, $element) = @_; + + my $output = "<$element->{Name}"; + if (defined($element->{Attributes})) { + foreach my $name (sort keys %{$element->{Attributes}}) { + my $esc_value = $element->{Attributes}{$name}; + $esc_value =~ s/([\&\<\>\"])/$escapes->{$1}/ge; + $output .= " $name=\"$esc_value\""; + } + } + + if ($self->{Newlines}) { + $output .= "\n"; + } + + $output .= ">"; + + $self->print($output); +} + +sub end_element { + my ($self, $element) = @_; + + if ($self->SUPER::end_element($element) == 0) { + $self->print_end_element($element); + } +} + +sub print_end_element { + my ($self, $element) = @_; + + my $output = "{Name}" + . ($self->{Newlines} ? "\n" : "") . ">"; + + $self->print($output); +} +sub characters { + my ($self, $characters) = @_; + + my $output = $characters->{Data}; + + $output =~ s/([\&\<\>])/$escapes->{$1}/ge; + + $self->print($output); +} + +sub processing_instruction { + my ($self, $pi) = @_; + + my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; + + my $output; + if ($self->{IsSGML}) { + $output = "{Data}>\n"; + } else { + if ($pi->{Data}) { + $output = "{Target} $pi->{Data}?>$nl"; + } else { + $output = "{Target}?>$nl"; + } + } + + $self->print($output); +} + +sub ignorable_whitespace { + my ($self, $whitespace) = @_; + + $self->print($whitespace->{Data}); +} + +sub comment { + my ($self, $comment) = @_; + + my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; + + my $output = "$nl"; + + $self->print($output); +} + +sub print { + my ($self, $output) = @_; + + $self->{Output_}->print($output) + if (defined($self->{Output_})); + + push(@{$self->{Strings}}, $output) + if (defined($self->{AsString})); +} + +1; + +__END__ + +=head1 NAME + +XML::Handler::XMLWriter - a PerlSAX handler for writing readable XML + +=head1 SYNOPSIS + + use XML::Parser::PerlSAX; + use XML::Handler::XMLWriter; + + $my_handler = XML::Handler::XMLWriter->new( I ); + + XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' }, + Handler => $my_handler); + +=head1 DESCRIPTION + +C is a PerlSAX handler for writing readable +XML (in contrast to Canonical XML, for example). +XML::Handler::XMLWriter can be used with a parser to reformat XML, +with XML::DOM or XML::Grove to write out XML, or with other PerlSAX +modules that generate events. + +C is intended to be used with PerlSAX event +generators and does not perform any checking itself (for example, +matching start and end element events). If you want to generate XML +directly from your Perl code, use the XML::Writer module. XML::Writer +has an easy to use interface and performs many checks to make sure +that the XML you generate is well-formed. + +C is a subclass of C. +C can be further subclassed to alter it's +behavior or to add element-specific handling. In the subclass, each +time an element starts, a method by that name prefixed with `s_' is +called with the element to be processed. Each time an element ends, a +method with that name prefixed with `e_' is called. Any special +characters in the element name are replaced by underscores. If there +isn't a start or end method for an element, the default action is to +write the start or end tag. Start and end methods can use the +`C' and `C' methods to +print start or end tags. Subclasses can call the `C' method +to write additional output. + +Subclassing XML::Handler::XMLWriter in this way is similar to +XML::Parser's Stream style. + +XML::Handler::Subs maintains a stack of element names, +`C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>' +that can be used by subclasses. The current element is pushed on the +stacks before calling an element-name start method and popped off the +stacks after calling the element-name end method. + +See XML::Handler::Subs for additional methods. + +In addition to the standard PerlSAX handler methods (see PerlSAX for +descriptions), XML::Handler::XMLWriter supports the following methods: + +=over 4 + +=item new( I ) + +Creates and returns a new instance of XML::Handler::XMLWriter with the +given I. Options may be changed at any time by modifying +them directly in the hash returned. I can be a list of key, +value pairs or a hash. The following I are supported: + +=over 4 + +=item Output + +An IO::Handle or one of it's subclasses (such as IO::File), if this +parameter is not present and the AsString option is not used, the +module will write to standard output. + +=item AsString + +Return the generated XML as a string from the `C' method of +the PerlSAX event generator. + +=item Newlines + +A true or false value; if this parameter is present and its value is +true, then the module will insert an extra newline before the closing +delimiter of start, end, and empty tags to guarantee that the document +does not end up as a single, long line. If the paramter is not +present, the module will not insert the newlines. + +=item IsSGML + +A true or false value; if this parameter is present and its value is +true, then the module will generate SGML rather than XML. + +=back + +=item print_start_element($element) + +Print a start tag for `C<$element>'. This is the default action for +the PerlSAX `C' handler, but subclasses may use this +if they define a start method for an element. + +=item print_end_element($element) + +Prints an end tag for `C<$element>'. This is the default action for +the PerlSAX `C' handler, but subclasses may use this +if they define a start method for an element. + +=item print($output) + +Write `C<$output>' to Output and/or append it to the string to be +returned. Subclasses may use this to write additional output. + +=back + +=head1 TODO + +=over 4 + +=item * + +An Elements option that provides finer control over newlines than the +Newlines option, where you can choose before and after newline for +element start and end tags. Inspired by the Python XMLWriter. + +=item * + +Support Doctype and XML declarations. + +=back + +=head1 AUTHOR + +Ken MacLeod, ken@bitsko.slc.ut.us +This module is partially derived from XML::Writer by David Megginson. + +=head1 SEE ALSO + +perl(1), PerlSAX.pod(3) + +=cut diff -r 000000000000 -r 02cd6b52f378 dummy_foundation/lib/XML/Parser.pod --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dummy_foundation/lib/XML/Parser.pod Thu May 28 10:10:03 2009 +0100 @@ -0,0 +1,466 @@ +=head1 WARNING + +This manual page was copied from the XML::Parser distribution (version 2.27) +written by Clark Cooper. You can find newer versions at CPAN. + +=head1 NAME + +XML::Parser - A perl module for parsing XML documents + +=head1 SYNOPSIS + + use XML::Parser; + + $p1 = new XML::Parser(Style => 'Debug'); + $p1->parsefile('REC-xml-19980210.xml'); + $p1->parse('Hello World'); + + # Alternative + $p2 = new XML::Parser(Handlers => {Start => \&handle_start, + End => \&handle_end, + Char => \&handle_char}); + $p2->parse($socket); + + # Another alternative + $p3 = new XML::Parser(ErrorContext => 2); + + $p3->setHandlers(Char => \&text, + Default => \&other); + + open(FOO, 'xmlgenerator |'); + $p3->parse(*FOO, ProtocolEncoding => 'ISO-8859-1'); + close(FOO); + + $p3->parsefile('junk.xml', ErrorContext => 3); + +=head1 DESCRIPTION + +This module provides ways to parse XML documents. It is built on top of +L, which is a lower level interface to James Clark's +expat library. Each call to one of the parsing methods creates a new +instance of XML::Parser::Expat which is then used to parse the document. +Expat options may be provided when the XML::Parser object is created. +These options are then passed on to the Expat object on each parse call. +They can also be given as extra arguments to the parse methods, in which +case they override options given at XML::Parser creation time. + +The behavior of the parser is controlled either by C> and/or +C> options, or by L method. These all provide +mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. +If neither C