deprecated/buildtools/buildsystemtools/lib/XML/Checker.pm
changeset 662 60be34e1b006
parent 655 3f65fd25dfd4
equal deleted inserted replaced
654:7c11c3d8d025 662:60be34e1b006
       
     1 #
       
     2 #
       
     3 # TO DO
       
     4 # - update docs regarding PerlSAX interface
       
     5 # - add current node to error context when checking DOM subtrees
       
     6 # - add parsed Entity to test XML files
       
     7 # - free circular references
       
     8 # - Implied handler?
       
     9 # - Notation, Entity, Unparsed checks, Default handler?
       
    10 # - check no root element (it's checked by expat) ?
       
    11 
       
    12 package XML::Checker::Term;
       
    13 use strict;
       
    14 
       
    15 sub new     
       
    16 { 
       
    17     my ($class, %h) = @_;
       
    18     bless \%h, $class;
       
    19 }
       
    20 
       
    21 sub str     
       
    22 { 
       
    23     '<' . $_[0]->{C} . $_[0]->{N} . '>'
       
    24 }
       
    25 
       
    26 sub re
       
    27 { 
       
    28     $_[0]->{S} 
       
    29 }
       
    30 
       
    31 sub rel 
       
    32 { 
       
    33     my $self = shift;
       
    34     defined $self->{SL} ? @{ $self->{SL} } : ( $self->{S} );
       
    35 }
       
    36 
       
    37 sub debug
       
    38 {
       
    39     my $t = shift;
       
    40     my ($c, $n, $s) = ($t->{C}, $t->{N}, $t->{S});
       
    41     my @sl = $t->rel;
       
    42     "{C=$c N=$n S=$s SL=@sl}";
       
    43 }
       
    44 
       
    45 #-------------------------------------------------------------------------
       
    46 
       
    47 package XML::Checker::Context;
       
    48 
       
    49 sub new
       
    50 {
       
    51     my ($class) = @_;
       
    52     my $scalar;
       
    53     bless \$scalar, $class;
       
    54 }
       
    55 
       
    56 sub Start {}
       
    57 sub End   {}
       
    58 sub Char  {}
       
    59 
       
    60 #
       
    61 # The initial Context when checking an entire XML Document
       
    62 #
       
    63 package XML::Checker::DocContext;
       
    64 use vars qw( @ISA );
       
    65 @ISA = qw( XML::Checker::Context );
       
    66 
       
    67 sub new
       
    68 {
       
    69 #??checker not used
       
    70     my ($class, $checker) = @_;
       
    71     bless { }, $class;
       
    72 }
       
    73 
       
    74 sub setRootElement
       
    75 {
       
    76     $_[0]->{RootElement} = $_[1];
       
    77 }
       
    78 
       
    79 sub Start 
       
    80 {
       
    81     my ($self, $checker, $tag) = @_;
       
    82     if (exists $self->{Elem})
       
    83     {
       
    84 	my $tags = join (", ", @{$self->{Elem}});
       
    85 	$checker->fail (155, "more than one root Element [$tags]");
       
    86 	push @{$self->{Elem}}, $tag;
       
    87     }
       
    88     else
       
    89     {
       
    90 	$self->{Elem} = [ $tag ];
       
    91     }
       
    92 
       
    93     my $exp_root = $self->{RootElement};
       
    94     $checker->fail (156, "unexpected root Element [$tag], expected [$exp_root]")
       
    95 	if defined ($exp_root) and $tag ne $exp_root;
       
    96 }
       
    97 
       
    98 sub debug
       
    99 {
       
   100     my $self = shift;
       
   101     "DocContext[Count=" . $self->{Count} . ",Root=" . 
       
   102 	$self->{RootElement} . "]";
       
   103 }
       
   104 
       
   105 package XML::Checker::Context::ANY;
       
   106 use vars qw( @ISA );
       
   107 @ISA = qw( XML::Checker::Context );
       
   108 
       
   109 # No overrides, because everything is accepted
       
   110 
       
   111 sub debug { "XML::Checker::Context::ANY" }
       
   112 
       
   113 package XML::Checker::Context::EMPTY;
       
   114 use vars qw( @ISA $ALLOW_WHITE_SPACE );
       
   115 @ISA = qw( XML::Checker::Context );
       
   116 
       
   117 $ALLOW_WHITE_SPACE = 0;
       
   118 
       
   119 sub debug { "XML::Checker::Context::EMPTY" }
       
   120 
       
   121 sub Start
       
   122 {
       
   123     my ($self, $checker, $tag) = @_;
       
   124     $checker->fail (152, "Element should be EMPTY, found Element [$tag]");
       
   125 }
       
   126 
       
   127 sub Char
       
   128 {
       
   129     my ($self, $checker, $str) = @_;
       
   130     $checker->fail (153, "Element should be EMPTY, found text [$str]")
       
   131 	unless ($ALLOW_WHITE_SPACE and $checker->isWS ($str));
       
   132 
       
   133     # NOTE: if $ALLOW_WHITE_SPACE = 1, the isWS call does not only check
       
   134     # whether it is whitespace, but it also informs the checker that this 
       
   135     # might be insignificant whitespace
       
   136 }
       
   137 
       
   138 #?? what about Comments
       
   139 
       
   140 package XML::Checker::Context::Children;
       
   141 use vars qw( @ISA );
       
   142 @ISA = qw( XML::Checker::Context );
       
   143 
       
   144 sub new
       
   145 {
       
   146     my ($class, $rule) = @_;
       
   147     bless { Name => $rule->{Name}, RE => $rule->{RE}, Buf => "", N => 0 }, $class;
       
   148 }
       
   149 
       
   150 sub phash
       
   151 {
       
   152     my $href = shift;
       
   153     my $str = "";
       
   154     for (keys %$href)
       
   155     {
       
   156 	$str .= ' ' if $str;
       
   157 	$str .= $_ . '=' . $href->{$_};
       
   158     }
       
   159     $str;
       
   160 }
       
   161 
       
   162 sub debug
       
   163 {
       
   164     my $self = shift;
       
   165     "Context::Children[Name=(" . phash ($self->{Name}) . ",N=" . $self->{N} .
       
   166 	",RE=" . $self->{RE} . ",Buf=[" . $self->{Buf} . "]";
       
   167 }
       
   168 
       
   169 sub Start
       
   170 {
       
   171     my ($self, $checker, $tag) = @_;
       
   172 
       
   173 #print "Children.Start tag=$tag rule=$checker drule=" . $checker->debug . "\n";
       
   174 
       
   175     if (exists $self->{Name}->{$tag})
       
   176     {
       
   177 #print "Buf=[".$self->{Buf}. "] tag=[" . $self->{Name}->{$tag}->{S} . "]\n";
       
   178 	$self->{Buf} .= $self->{Name}->{$tag}->{S};
       
   179     }
       
   180     else
       
   181     {
       
   182 	$checker->fail (157, "unexpected Element [$tag]", 
       
   183 			ChildElementIndex => $self->{N})
       
   184     }
       
   185     $self->{N}++;
       
   186 }
       
   187 
       
   188 sub decode
       
   189 {
       
   190     my ($self) = @_;
       
   191     my $re = $self->{RE};
       
   192     my $name = $self->{Name};
       
   193     my $buf = $self->{Buf};
       
   194 
       
   195     my %s = ();
       
   196     while (my ($key, $val) = each %$name)
       
   197     {
       
   198 	$s{$val->{S}} = $key;
       
   199     }
       
   200 
       
   201     my ($len) = scalar (keys %$name);
       
   202     $len = length $len;
       
   203     my $dots = "[^()*+?]" x $len;
       
   204 
       
   205     $buf =~ s/($dots)/$s{$1} . ","/ge;
       
   206     chop $buf;
       
   207 
       
   208     $re =~ s/($dots)/"(" . $s{$1} . ")"/ge;
       
   209 
       
   210     "Found=[$buf] RE=[$re]"
       
   211 }
       
   212 
       
   213 sub End
       
   214 {
       
   215     my ($self, $checker) = @_;
       
   216     my $re = $self->{RE};
       
   217 
       
   218 #print "End " . $self->debug . "\n";
       
   219     $checker->fail (154, "bad order of Elements " . $self->decode)
       
   220 	unless $self->{Buf} =~ /^$re$/;
       
   221 }
       
   222 
       
   223 sub Char
       
   224 {
       
   225     my ($self, $checker, $str) = @_;
       
   226 
       
   227     # Inform the checker that this might be insignificant whitespace
       
   228     $checker->isWS ($str);
       
   229 }
       
   230 
       
   231 package XML::Checker::Context::Mixed;
       
   232 use vars qw( @ISA );
       
   233 @ISA = qw( XML::Checker::Context );
       
   234 
       
   235 sub new
       
   236 {
       
   237     my ($class, $rule) = @_;
       
   238     bless { Name => $rule->{Name}, N => 0 }, $class;
       
   239 }
       
   240 
       
   241 sub debug
       
   242 {
       
   243     my $self = shift;
       
   244     "Context::Mixed[Name=" . $self->{Name} . ",N=" , $self->{N} . "]";
       
   245 }
       
   246 
       
   247 sub Start
       
   248 {
       
   249     my ($self, $checker, $tag) = @_;
       
   250 
       
   251     $checker->fail (157, "unexpected Element [$tag]",
       
   252 		    ChildElementIndex => $self->{N})
       
   253 	unless exists $self->{Name}->{$tag};
       
   254     $self->{N}++;
       
   255 }
       
   256 
       
   257 package XML::Checker::ERule;
       
   258 
       
   259 package XML::Checker::ERule::EMPTY;
       
   260 use vars qw( @ISA );
       
   261 @ISA = qw( XML::Checker::ERule );
       
   262 
       
   263 sub new
       
   264 {
       
   265     my ($class) = @_;
       
   266     bless {}, $class;
       
   267 }
       
   268 
       
   269 my $context = new XML::Checker::Context::EMPTY;
       
   270 sub context { $context }	# share the context
       
   271 
       
   272 sub debug { "EMPTY" }
       
   273 
       
   274 package XML::Checker::ERule::ANY;
       
   275 use vars qw( @ISA );
       
   276 @ISA = qw( XML::Checker::ERule );
       
   277 
       
   278 sub new
       
   279 {
       
   280     my ($class) = @_;
       
   281     bless {}, $class;
       
   282 }
       
   283 
       
   284 my $any_context = new XML::Checker::Context::ANY;
       
   285 sub context { $any_context }	# share the context
       
   286 
       
   287 sub debug { "ANY" }
       
   288 
       
   289 package XML::Checker::ERule::Mixed;
       
   290 use vars qw( @ISA );
       
   291 @ISA = qw( XML::Checker::ERule );
       
   292 
       
   293 sub new
       
   294 {
       
   295     my ($class) = @_;
       
   296     bless { Name => {} }, $class;
       
   297 }
       
   298 
       
   299 sub context 
       
   300 {
       
   301     my ($self) = @_;
       
   302     new XML::Checker::Context::Mixed ($self);
       
   303 }
       
   304 
       
   305 sub setModel
       
   306 {
       
   307     my ($self, $model) = @_;
       
   308     my $rule = $model;
       
   309 
       
   310     # Mixed := '(' '#PCDATA' ')' '*'?
       
   311     if ($rule =~ /^\(\s*#PCDATA\s*\)(\*)?$/)
       
   312     {
       
   313 #? how do we interpret the '*' ??
       
   314          return 1;
       
   315     }
       
   316     else	# Mixed := '(' '#PCDATA' ('|' Name)* ')*'
       
   317     {
       
   318 	return 0 unless $rule =~ s/^\(\s*#PCDATA\s*//;
       
   319 	return 0 unless $rule =~ s/\s*\)\*$//;
       
   320 
       
   321 	my %names = ();
       
   322 	while ($rule =~ s/^\s*\|\s*($XML::RegExp::Name)//)
       
   323 	{
       
   324 	    $names{$1} = 1;
       
   325 	}
       
   326 	if ($rule eq "")
       
   327 	{
       
   328 	    $self->{Name} = \%names;
       
   329 	    return 1;
       
   330 	}
       
   331     }
       
   332     return 0;
       
   333 }
       
   334 
       
   335 sub debug
       
   336 {
       
   337     my ($self) = @_;
       
   338     "Mixed[Names=" . join("|", keys %{$self->{Name}}) . "]";
       
   339 }
       
   340 
       
   341 package XML::Checker::ERule::Children;
       
   342 use vars qw( @ISA %_name %_map $_n );
       
   343 @ISA = qw( XML::Checker::ERule );
       
   344 
       
   345 sub new
       
   346 {
       
   347     my ($class) = @_;
       
   348     bless {}, $class;
       
   349 }
       
   350 
       
   351 sub context 
       
   352 {
       
   353     my ($self) = @_;
       
   354     new XML::Checker::Context::Children ($self);
       
   355 }
       
   356 
       
   357 sub _add	# static
       
   358 {
       
   359     my $exp = new XML::Checker::Term (@_);
       
   360     $_map{$exp->{N}} = $exp;
       
   361     $exp->str;
       
   362 }
       
   363 
       
   364 my $IDS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
       
   365 
       
   366 sub _tokenize
       
   367 {
       
   368     my ($self, $rule) = @_;
       
   369 
       
   370     # Replace names with Terms of the form "<n#>", e.g. "<n2>".
       
   371     # Lookup already used names and store new names in %_name.
       
   372     # 
       
   373     $$rule =~ s/($XML::RegExp::Name)(?!>)/
       
   374 	if (exists $_name{$1})		# name already used?
       
   375 	{
       
   376 	    $_name{$1}->str;
       
   377 	}
       
   378         else
       
   379 	{
       
   380 	    my $exp = new XML::Checker::Term (C => 'n', N => $_n++, 
       
   381 					      Name => $1);
       
   382 	    $_name{$1} = $_map{$exp->{N}} = $exp;
       
   383 	    $exp->str;
       
   384 	}
       
   385     /eg;
       
   386 
       
   387     if ($_n < length $IDS)
       
   388     {
       
   389 	# Generate regular expression for the name Term, i.e.
       
   390 	# a single character from $IDS
       
   391 	my $i = 0;
       
   392 	for (values %_name)
       
   393 	{
       
   394 	    $_->{S} = substr ($IDS, $i++, 1);
       
   395 #print "tokenized " . $_->{Name} . " num=" . $_->{N} . " to " . $_->{S} . "\n";
       
   396 	}
       
   397     }
       
   398     else
       
   399     {
       
   400 	# Generate RE, convert Term->{N} to hex string a la "(#)", 
       
   401 	# e.g. "(03d)". Calculate needed length of hex string first.
       
   402 	my $len = 1;
       
   403 	for (my $n = $_n - 1; ($n >> 4) > 0; $len++) {}
       
   404 
       
   405 	my $i = 0;
       
   406 	for (values %_name)
       
   407 	{
       
   408 	    $_->{S} = sprintf ("(0${len}lx)", $i++);
       
   409 #print "tokenized " . $_->{Name} . " num=" . $_->{N} . " to " . $_->{S} . "\n";
       
   410 	}
       
   411     }
       
   412 }
       
   413 
       
   414 sub setModel
       
   415 {
       
   416     my ($self, $rule) = @_;
       
   417 
       
   418     local $_n = 0;
       
   419     local %_map = ();
       
   420     local %_name = ();
       
   421     
       
   422     $self->_tokenize (\$rule);
       
   423 
       
   424 #?? check for single name - die "!ELEMENT contents can't be just a NAME" if $rule =~ /^$XML::RegExp::Name$/;
       
   425 
       
   426     for ($rule)
       
   427     {
       
   428 	my $n = 1;
       
   429 	while ($n)
       
   430 	{
       
   431 	    $n = 0;
       
   432 
       
   433 	    # cp := ( name | choice | seq ) ('?' | '*' | '+')?
       
   434 	    $n++ while s/<[ncs](\d+)>([?*+]?)/_add
       
   435 	    (C => 'a', N => $_n++, 
       
   436 	     S => ($_map{$1}->re . $2))/eg;
       
   437 
       
   438 	    # choice := '(' ch_l ')'
       
   439 	    $n++ while s/\(\s*<[ad](\d+)>\s*\)/_add
       
   440 	    (C => 'c', N => $_n++, 
       
   441 	     S => "(" . join ("|", $_map{$1}->rel) . ")")/eg;
       
   442 	
       
   443 	    # ch_l := ( cp | ch_l ) '|' ( cp | ch_l )
       
   444 	    $n++ while s/<[ad](\d+)>\s*\|\s*<[ad](\d+)>/_add
       
   445 	    (C => 'd', N => $_n++, 
       
   446 	     SL => [ $_map{$1}->rel, $_map{$2}->rel ])/eg;
       
   447 
       
   448 	    # seq := '(' (seq_l ')'
       
   449 	    $n++ while s/\(\s*<[at](\d+)>\s*\)/_add
       
   450 	    (C => 's', N => $_n++, 
       
   451 	     S => "(".join("", $_map{$1}->rel).")")/eg;
       
   452 
       
   453 	    # seq_l := ( cp | seq_l ) ',' ( cp | seq_l )
       
   454 	    $n++ while s/<[at](\d+)>\s*,\s*<[at](\d+)>/_add
       
   455 	    (C => 't', N => $_n++, 
       
   456 	     SL => [ $_map{$1}->rel, $_map{$2}->rel ])/eg;
       
   457 	}
       
   458     }
       
   459 
       
   460     return 0 if ($rule !~ /^<a(\d+)>$/);
       
   461 
       
   462     $self->{Name} = \%_name;
       
   463     $self->{RE} = $_map{$1}->re;
       
   464 
       
   465     return 1;
       
   466 }
       
   467 
       
   468 sub debug
       
   469 {
       
   470     my ($self) = @_;
       
   471     "Children[RE=" . $self->{RE} . "]";
       
   472 }
       
   473 
       
   474 
       
   475 package XML::Checker::ARule;
       
   476 use XML::RegExp;
       
   477 
       
   478 sub new
       
   479 {
       
   480     my ($class, $elem, $checker) = @_;
       
   481     bless { Elem => $elem, Checker => $checker, Required => {} }, $class;
       
   482 }
       
   483 
       
   484 sub Attlist
       
   485 {
       
   486     my ($self, $attr, $type, $default, $fixed, $checker) = @_;
       
   487     my ($c1, $c2);
       
   488 
       
   489     if ($self->{Defined}->{$attr})
       
   490     {
       
   491 	my $tag = $self->{Elem};
       
   492 	$self->fail ($attr, 110, "attribute [$attr] of element [$tag] already defined");
       
   493     }
       
   494     else
       
   495     {
       
   496 	$self->{Defined}->{$attr} = 1;
       
   497     }
       
   498 
       
   499     if ($default =~ /^\#(REQUIRED|IMPLIED)$/)
       
   500     {
       
   501 	$c1 = $1;
       
   502 
       
   503 	# Keep list of all required attributes
       
   504 	if ($default eq '#REQUIRED')
       
   505 	{
       
   506 	    $self->{Required}->{$attr} = 1;
       
   507 	}
       
   508     }
       
   509     else
       
   510     {
       
   511 	$self->fail ($attr, 122, "invalid default attribute value [$default]")
       
   512 	    unless $default =~ /^$XML::RegExp::AttValue$/;
       
   513 				   
       
   514 	$default = substr ($default, 1, length($default)-2);
       
   515 	$self->{Default}->{$attr} = $default;
       
   516 	$c1 = 'FIXED' if $fixed;
       
   517     }
       
   518 
       
   519     if ($type eq 'ID')
       
   520     {
       
   521 	$self->fail ($attr, 123, "invalid default ID [$default], must be #REQUIRED or #IMPLIED")
       
   522 	    unless $default =~ /^#(REQUIRED|IMPLIED)$/;
       
   523 
       
   524 	if (exists ($self->{ID}) && $self->{ID} ne $attr)
       
   525 	{
       
   526 	    $self->fail ($attr, 151, "only one ID allowed per ELEMENT " .
       
   527 			 "first=[" . $self->{ID} . "]");
       
   528 	}
       
   529 	else
       
   530 	{
       
   531 	    $self->{ID} = $attr;
       
   532 	}
       
   533 	$c2 = 'ID';
       
   534     }
       
   535     elsif ($type =~ /^(IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS)$/)
       
   536     {
       
   537 	my $def = $self->{Default}->{$attr};
       
   538 	if (defined $def)
       
   539 	{
       
   540 	    my $re = ($type =~ /^[IE]/) ? $XML::RegExp::Name : $XML::RegExp::NmToken;
       
   541 	    if ($type =~ /S$/)
       
   542 	    {
       
   543 		for (split (/\s+/, $def))
       
   544 		{
       
   545 		    $self->fail ($attr, 121,
       
   546 				 "invalid default [$_] in $type [$def]")
       
   547 			unless $_ =~ /^$re$/;
       
   548 		}
       
   549 	    }
       
   550 	    else	# singular
       
   551 	    {
       
   552 		$self->fail ($attr, 120, "invalid default $type [$def]")
       
   553 			unless $def =~ /^$re$/;
       
   554 	    }
       
   555 	}
       
   556 	$c2 = $type;
       
   557     }
       
   558     elsif ($type ne 'CDATA')	# Enumerated := NotationType | Enumeration
       
   559     {
       
   560 	if ($type =~ /^\s*NOTATION\s*\(\s*($XML::RegExp::Name(\s*\|\s*$XML::RegExp::Name)*)\s*\)\s*$/)
       
   561 	{
       
   562 	    $self->fail ($attr, 135, "empty NOTATION list in ATTLIST")
       
   563 		unless defined $1;
       
   564 
       
   565 	    my @tok = split (/\s*\|\s*/, $1);
       
   566 	    for (@tok)
       
   567 	    {
       
   568 		$self->fail ($attr, 100, "undefined NOTATION [$_] in ATTLIST")
       
   569 			unless exists $checker->{NOTATION}->{$_};
       
   570 	    }
       
   571 
       
   572 	    my $re = join ("|", @tok);
       
   573 	    $self->{NotationRE} = "^($re)\$";
       
   574 	    $c2 = 'NotationType';
       
   575 	}
       
   576 	elsif ($type =~ /^\s*\(\s*($XML::RegExp::NmToken(\s*\|\s*$XML::RegExp::NmToken)*)\s*\)\s*$/)
       
   577 	{
       
   578 	    # Enumeration
       
   579 
       
   580 	    $self->fail ($attr, 136, "empty Enumeration list in ATTLIST")
       
   581 		    unless defined $1;
       
   582 
       
   583 	    my @tok = split (/\s*\|\s*/, $1);
       
   584 	    for (@tok)
       
   585 	    {
       
   586 		$self->fail ($attr, 134,
       
   587 			     "invalid Enumeration value [$_] in ATTLIST")
       
   588 		unless $_ =~ /^$XML::RegExp::NmToken$/;
       
   589 	    }
       
   590 	    $self->{EnumRE}->{$attr} = '^(' . join ("|", @tok) . ')$'; #';
       
   591 	    $c2 = 'Enumeration';
       
   592 	}
       
   593 	else
       
   594 	{
       
   595 	    $self->fail ($attr, 137, "invalid ATTLIST type [$type]");
       
   596 	}
       
   597     }
       
   598 
       
   599     $self->{Check1}->{$attr} = $c1 if $c1;
       
   600     $self->{Check2}->{$attr} = $c2 if $c2;
       
   601 }
       
   602 
       
   603 sub fail
       
   604 {
       
   605     my $self = shift;
       
   606     my $attr = shift;
       
   607     $self->{Checker}->fail (@_, Element => $self->{Elem}, Attr => $attr);
       
   608 }
       
   609 
       
   610 sub check
       
   611 {
       
   612     my ($self, $attr) = @_;
       
   613     my $func1 = $self->{Check1}->{$attr};
       
   614     my $func2 = $self->{Check2}->{$attr};
       
   615 #    print "check func1=$func1 func2=$func2 @_\n";
       
   616 
       
   617     if (exists $self->{ReqNotSeen}->{$attr})
       
   618     {
       
   619 	delete $self->{ReqNotSeen}->{$attr};
       
   620     }
       
   621     no strict;
       
   622 
       
   623     &$func1 (@_) if defined $func1;
       
   624     &$func2 (@_) if defined $func2;
       
   625 }
       
   626 
       
   627 # Copies the list of all required attributes from $self->{Required} to
       
   628 # $self->{ReqNotSeen}. 
       
   629 # When check() encounters a required attribute, it is removed from ReqNotSeen. 
       
   630 # In EndAttr we look at which attribute names are still in ReqNotSeen - those
       
   631 # are the ones that were not specified and are, therefore, in error.
       
   632 sub StartAttr
       
   633 {
       
   634     my $self = shift;
       
   635     my %not_seen = %{ $self->{Required} };
       
   636     $self->{ReqNotSeen} = \%not_seen;
       
   637 }
       
   638 
       
   639 # Checks which of the #REQUIRED attributes were not specified
       
   640 sub EndAttr
       
   641 {
       
   642     my $self = shift;
       
   643 
       
   644     for my $attr (keys %{ $self->{ReqNotSeen} })
       
   645     {
       
   646 	$self->fail ($attr, 159, 
       
   647 		     "unspecified value for \#REQUIRED attribute [$attr]");
       
   648     }
       
   649 }
       
   650 
       
   651 sub FIXED
       
   652 {
       
   653     my ($self, $attr, $val, $specified) = @_;
       
   654 
       
   655     my $default = $self->{Default}->{$attr};
       
   656     $self->fail ($attr, 150, 
       
   657 		 "bad \#FIXED attribute value [$val], it should be [$default]")
       
   658 	unless ($val eq $default);
       
   659 }
       
   660 
       
   661 sub IMPLIED
       
   662 {
       
   663     my ($self, $attr, $val, $specified) = @_;
       
   664 
       
   665 #?? should #IMPLIED be specified?
       
   666     $self->fail ($attr, 158, 
       
   667 		 "unspecified value for \#IMPLIED attribute [$attr]")
       
   668 	unless $specified;
       
   669 
       
   670 #?? Implied handler ?
       
   671 }
       
   672 
       
   673 # This is called when an attribute is passed to the check() method by
       
   674 # XML::Checker::Attr(), i.e. when the attribute was specified explicitly
       
   675 # or defaulted by the parser (which should never happen), *NOT* when the 
       
   676 # attribute was omitted. (The latter is checked by StartAttr/EndAttr)
       
   677 sub REQUIRED
       
   678 {
       
   679     my ($self, $attr, $val, $specified) = @_;
       
   680 #    print "REQUIRED attr=$attr val=$val spec=$specified\n";
       
   681 
       
   682     $self->fail ($attr, 159, 
       
   683 		 "unspecified value for \#REQUIRED attribute [$attr]")
       
   684 	unless $specified;
       
   685 }
       
   686 
       
   687 sub ID		# must be #IMPLIED or #REQUIRED
       
   688 {
       
   689     my ($self, $attr, $val, $specified) = @_;
       
   690 
       
   691     $self->fail ($attr, 131, "invalid ID [$val]")
       
   692 	unless $val =~ /^$XML::RegExp::Name$/;
       
   693 
       
   694     $self->fail ($attr, 111, "ID [$val] already defined")
       
   695 	if $self->{Checker}->{ID}->{$val}++;
       
   696 }
       
   697 
       
   698 sub IDREF
       
   699 {
       
   700     my ($self, $attr, $val, $specified) = @_;
       
   701     
       
   702     $self->fail ($attr, 132, "invalid IDREF [$val]")
       
   703 	unless $val =~ /^$XML::RegExp::Name$/;
       
   704 
       
   705     $self->{Checker}->{IDREF}->{$val}++;
       
   706 }
       
   707 
       
   708 sub IDREFS
       
   709 {
       
   710     my ($self, $attr, $val, $specified) = @_;
       
   711     for (split /\s+/, $val)
       
   712     {
       
   713 	$self->IDREF ($attr, $_);
       
   714     }
       
   715 }
       
   716 
       
   717 sub ENTITY
       
   718 {
       
   719     my ($self, $attr, $val, $specified) = @_;
       
   720 #?? should it be specified?
       
   721 
       
   722     $self->fail ($attr, 133, "invalid ENTITY name [$val]")
       
   723 	unless $val =~ /^$XML::RegExp::Name$/;
       
   724 
       
   725     $self->fail ($attr, 102, "undefined unparsed ENTITY [$val]")
       
   726 	unless exists $self->{Checker}->{Unparsed}->{$val};
       
   727 }
       
   728 
       
   729 sub ENTITIES
       
   730 {
       
   731     my ($self, $attr, $val, $specified) = @_;
       
   732     for (split /\s+/, $val)
       
   733     {
       
   734 	$self->ENTITY ($attr, $_);
       
   735     }
       
   736 }
       
   737 
       
   738 sub NMTOKEN
       
   739 {
       
   740     my ($self, $attr, $val, $specified) = @_;
       
   741     $self->fail ($attr, 130, "invalid NMTOKEN [$val]")
       
   742 	unless $val =~ /^$XML::RegExp::NmToken$/;
       
   743 }
       
   744 
       
   745 sub NMTOKENS
       
   746 {
       
   747     my ($self, $attr, $val, $specified) = @_;
       
   748     for (split /\s+/, $val)
       
   749     {
       
   750 	$self->NMTOKEN ($attr, $_, $specified);
       
   751     }
       
   752 }
       
   753 
       
   754 sub Enumeration
       
   755 {
       
   756     my ($self, $attr, $val, $specified) = @_;
       
   757     my $re = $self->{EnumRE}->{$attr};
       
   758     
       
   759     $self->fail ($attr, 160, "invalid Enumeration value [$val]")
       
   760 	unless $val =~ /$re/;
       
   761 }
       
   762 
       
   763 sub NotationType
       
   764 {
       
   765     my ($self, $attr, $val, $specified) = @_;
       
   766     my $re = $self->{NotationRE};
       
   767 
       
   768     $self->fail ($attr, 161, "invalid NOTATION value [$val]")
       
   769 	unless $val =~ /$re/;
       
   770 
       
   771     $self->fail ($attr, 162, "undefined NOTATION [$val]")
       
   772 	unless exists $self->{Checker}->{NOTATION}->{$val};
       
   773 }
       
   774 
       
   775 package XML::Checker;
       
   776 use vars qw ( $VERSION $FAIL $INSIGNIF_WS );
       
   777 
       
   778 BEGIN 
       
   779 { 
       
   780     $VERSION = '0.09'; 
       
   781 }
       
   782 
       
   783 $FAIL = \&print_error;
       
   784 
       
   785 # Whether the last seen Char data was insignicant whitespace
       
   786 $INSIGNIF_WS = 0;
       
   787 
       
   788 sub new
       
   789 {
       
   790     my ($class, %args) = @_;
       
   791 
       
   792     $args{ERule} = {};
       
   793     $args{ARule} = {};
       
   794     $args{InCDATA} = 0;
       
   795 
       
   796 #    $args{Debug} = 1;
       
   797     bless \%args, $class;
       
   798 }
       
   799 
       
   800 # PerlSAX API
       
   801 sub element_decl
       
   802 {
       
   803     my ($self, $hash) = @_;
       
   804     $self->Element ($hash->{Name}, $hash->{Model});
       
   805 }
       
   806 
       
   807 # Same parameter order as the Element handler in XML::Parser module
       
   808 sub Element
       
   809 {
       
   810     my ($self, $name, $model) = @_;
       
   811     
       
   812     if (defined $self->{ERule}->{$name})
       
   813     {
       
   814 	$self->fail (115, "ELEMENT [$name] already defined",
       
   815 		     Element => $name);
       
   816     }
       
   817 
       
   818     if ($model eq "EMPTY")
       
   819     {
       
   820 	$self->{ERule}->{$name} = new XML::Checker::ERule::EMPTY;
       
   821     }
       
   822     elsif ($model eq "ANY")
       
   823     {
       
   824 	$self->{ERule}->{$name} = new XML::Checker::ERule::ANY;
       
   825     }
       
   826     elsif ($model =~ /#PCDATA/)
       
   827     {
       
   828         my $rule = new XML::Checker::ERule::Mixed;
       
   829 	if ($rule->setModel ($model))
       
   830         {
       
   831 	    $self->{ERule}->{$name} = $rule;
       
   832 	}
       
   833         else
       
   834         {
       
   835 	    $self->fail (124, "bad model [$model] for ELEMENT [$name]",
       
   836 			 Element => $name);
       
   837 	}
       
   838     }
       
   839     else
       
   840     {
       
   841         my $rule = new XML::Checker::ERule::Children;
       
   842 	if ($rule->setModel ($model))
       
   843         {
       
   844 	    $self->{ERule}->{$name} = $rule;
       
   845 	}
       
   846         else
       
   847         {
       
   848 	    $self->fail (124, "bad model [$model] for ELEMENT [$name]",
       
   849 			 Element => $name);
       
   850 	}
       
   851     }
       
   852     my $rule = $self->{ERule}->{$name};
       
   853     print "added ELEMENT model for $name: " . $rule->debug . "\n"
       
   854 	   if $rule and $self->{Debug};
       
   855 }
       
   856 
       
   857 # PerlSAX API
       
   858 sub attlist_decl
       
   859 {
       
   860     my ($self, $hash) = @_;
       
   861     $self->Attlist ($hash->{ElementName}, $hash->{AttributeName},
       
   862 		    $hash->{Type}, $hash->{Default}, $hash->{Fixed});
       
   863 }
       
   864 
       
   865 sub Attlist
       
   866 { 
       
   867     my ($self, $tag, $attrName, $type, $default, $fixed) = @_;
       
   868     my $arule = $self->{ARule}->{$tag} ||= 
       
   869 	new XML::Checker::ARule ($tag, $self);
       
   870 
       
   871     $arule->Attlist ($attrName, $type, $default, $fixed, $self);
       
   872 }
       
   873 
       
   874 # Initializes the context stack to check an XML::DOM::Element
       
   875 sub InitDomElem
       
   876 {
       
   877     my $self = shift;
       
   878 
       
   879     # initialize Context stack
       
   880     $self->{Context} = [ new XML::Checker::Context::ANY ($self) ];
       
   881     $self->{InCDATA} = 0;
       
   882 }
       
   883 
       
   884 # Clears the context stack after checking an XML::DOM::Element
       
   885 sub FinalDomElem
       
   886 {
       
   887     my $self = shift;
       
   888     delete $self->{Context};
       
   889 }
       
   890 
       
   891 # PerlSAX API
       
   892 sub start_document
       
   893 {
       
   894     shift->Init;
       
   895 }
       
   896 
       
   897 sub Init
       
   898 {
       
   899     my $self = shift;
       
   900 
       
   901     # initialize Context stack
       
   902     $self->{Context} = [ new XML::Checker::DocContext ($self) ];
       
   903     $self->{InCDATA} = 0;
       
   904 }
       
   905 
       
   906 # PerlSAX API
       
   907 sub end_document
       
   908 {
       
   909     shift->Final;
       
   910 }
       
   911 
       
   912 sub Final
       
   913 {
       
   914     my $self = shift;
       
   915 #?? could add more statistics: unreferenced Unparsed, ID
       
   916 
       
   917     for (keys %{ $self->{IDREF} })
       
   918     {
       
   919 	my $n = $self->{IDREF}->{$_};
       
   920 	$self->fail (200, "undefined ID [$_] was referenced [$n] times")
       
   921 	    unless defined $self->{ID}->{$_};
       
   922     }
       
   923 
       
   924     for (keys %{ $self->{ID} })
       
   925     {
       
   926 	my $n = $self->{IDREF}->{$_} || 0;
       
   927 	$self->fail (300, "[$n] references to ID [$_]");
       
   928     }
       
   929 
       
   930     delete $self->{Context};
       
   931 }
       
   932 
       
   933 sub getRootElement
       
   934 {
       
   935     my $self = shift;
       
   936 #    print "getRoot $self " . $self->{RootElement} . "\n";
       
   937     $_[0]->{RootElement};
       
   938 }
       
   939 
       
   940 # PerlSAX API
       
   941 sub doctype_decl
       
   942 {
       
   943     my ($self, $hash) = @_;
       
   944     $self->Doctype ($hash->{Name}, $hash->{SystemId},
       
   945 		    $hash->{PublicId}, $hash->{Internal});
       
   946 }
       
   947 
       
   948 sub Doctype
       
   949 {
       
   950     my ($self, $name, $sysid, $pubid, $internal) = @_;
       
   951     $self->{RootElement} = $name;
       
   952 
       
   953     my $context = $self->{Context}->[0];
       
   954     $context->setRootElement ($name);
       
   955 
       
   956 #?? what else
       
   957 }
       
   958 
       
   959 sub Attr
       
   960 {
       
   961     my ($self, $tag, $attr, $val, $specified) = @_;
       
   962 
       
   963 #print "Attr for tag=$tag attr=$attr val=$val spec=$specified\n";
       
   964 
       
   965     my $arule = $self->{ARule}->{$tag};
       
   966     if (defined $arule && $arule->{Defined}->{$attr})
       
   967     {
       
   968 	$arule->check ($attr, $val, $specified);
       
   969     }
       
   970     else
       
   971     {
       
   972 	$self->fail (103, "undefined attribute [$attr]", Element => $tag);
       
   973     }
       
   974 }
       
   975 
       
   976 sub EndAttr
       
   977 {
       
   978     my $self = shift;
       
   979 
       
   980     my $arule = $self->{CurrARule};
       
   981     if (defined $arule)
       
   982     {
       
   983 	$arule->EndAttr;
       
   984     }
       
   985 }
       
   986 
       
   987 # PerlSAX API
       
   988 sub start_element
       
   989 {
       
   990     my ($self, $hash) = @_;
       
   991     my $tag = $hash->{Name};
       
   992     my $attr = $hash->{Attributes};
       
   993 
       
   994     $self->Start ($tag);
       
   995 
       
   996     if (exists $hash->{AttributeOrder})
       
   997     {
       
   998 	my $defaulted = $hash->{Defaulted};
       
   999 	my @order = @{ $hash->{AttributeOrder} };
       
  1000 
       
  1001 	# Specified attributes
       
  1002 	for (my $i = 0; $i < $defaulted; $i++)
       
  1003 	{
       
  1004 	    my $a = $order[$i];
       
  1005 	    $self->Attr ($tag, $a, $attr->{$a}, 1);
       
  1006 	}
       
  1007 
       
  1008 	# Defaulted attributes
       
  1009 	for (my $i = $defaulted; $i < @order; $i++)
       
  1010 	{
       
  1011 	    my $attr = $order[$i];
       
  1012 	    $self->Attr ($tag, $a, $attr->{$a}, 0);
       
  1013 	}
       
  1014     }
       
  1015     else
       
  1016     {
       
  1017 	# Assume all attributes were specified
       
  1018 	my @attr = %$attr;
       
  1019 	my ($key, $val);
       
  1020 	while ($key = shift @attr)
       
  1021 	{
       
  1022 	    $val = shift @attr;
       
  1023 	    
       
  1024 	    $self->Attr ($tag, $key, $val, 1);
       
  1025 	}
       
  1026     }
       
  1027     $self->EndAttr;
       
  1028 }
       
  1029 
       
  1030 sub Start
       
  1031 {
       
  1032     my ($self, $tag) = @_;
       
  1033 #?? if first tag, check with root element - or does expat check this already?
       
  1034 
       
  1035     my $context = $self->{Context};
       
  1036     $context->[0]->Start ($self, $tag);
       
  1037 
       
  1038     my $erule = $self->{ERule}->{$tag};
       
  1039     if (defined $erule)
       
  1040     {
       
  1041 	unshift @$context, $erule->context;
       
  1042     }
       
  1043     else
       
  1044     {
       
  1045 	# It's not a real error according to the XML Spec.
       
  1046 	$self->fail (101, "undefined ELEMENT [$tag]");
       
  1047 	unshift @$context, new XML::Checker::Context::ANY;
       
  1048     }
       
  1049 
       
  1050 #?? what about ARule ??
       
  1051     my $arule = $self->{ARule}->{$tag};
       
  1052     if (defined $arule)
       
  1053     {
       
  1054 	$self->{CurrARule} = $arule;
       
  1055 	$arule->StartAttr;
       
  1056     }
       
  1057 }
       
  1058 
       
  1059 # PerlSAX API
       
  1060 sub end_element
       
  1061 {
       
  1062     shift->End;
       
  1063 }
       
  1064 
       
  1065 sub End
       
  1066 {
       
  1067     my ($self) = @_;
       
  1068     my $context = $self->{Context};
       
  1069 
       
  1070     $context->[0]->End ($self);
       
  1071     shift @$context;
       
  1072 }
       
  1073 
       
  1074 # PerlSAX API
       
  1075 sub characters
       
  1076 {
       
  1077     my ($self, $hash) = @_;
       
  1078     my $data = $hash->{Data};
       
  1079 
       
  1080     if ($self->{InCDATA})
       
  1081     {
       
  1082 	$self->CData ($data);
       
  1083     }
       
  1084     else
       
  1085     {
       
  1086 	$self->Char ($data);
       
  1087     }
       
  1088 }
       
  1089 
       
  1090 # PerlSAX API
       
  1091 sub start_cdata
       
  1092 {
       
  1093     $_[0]->{InCDATA} = 1;
       
  1094 }
       
  1095 
       
  1096 # PerlSAX API
       
  1097 sub end_cdata
       
  1098 {
       
  1099     $_[0]->{InCDATA} = 0;
       
  1100 }
       
  1101 
       
  1102 sub Char
       
  1103 {
       
  1104     my ($self, $text) = @_;
       
  1105     my $context = $self->{Context};
       
  1106 
       
  1107     # NOTE: calls to isWS may set this to 1.
       
  1108     $INSIGNIF_WS = 0;
       
  1109 
       
  1110     $context->[0]->Char ($self, $text);
       
  1111 }
       
  1112 
       
  1113 # Treat CDATASection same as Char (Text)
       
  1114 sub CData
       
  1115 {
       
  1116     my ($self, $cdata) = @_;
       
  1117     my $context = $self->{Context};
       
  1118 
       
  1119     $context->[0]->Char ($self, $cdata);
       
  1120 
       
  1121     # CDATASection can never be insignificant whitespace
       
  1122     $INSIGNIF_WS = 0;
       
  1123 #?? I'm not sure if this assumption is correct
       
  1124 }
       
  1125 
       
  1126 # PerlSAX API
       
  1127 sub comment
       
  1128 {
       
  1129     my ($self, $hash) = @_;
       
  1130     $self->Comment ($hash->{Data});
       
  1131 }
       
  1132 
       
  1133 sub Comment
       
  1134 {
       
  1135 # ?? what can be checked here?
       
  1136 }
       
  1137 
       
  1138 # PerlSAX API
       
  1139 sub entity_reference
       
  1140 {
       
  1141     my ($self, $hash) = @_;
       
  1142     $self->EntityRef ($hash->{Name}, 0);
       
  1143 #?? parameter entities (like %par;) are NOT supported!
       
  1144 # PerlSAX::handle_default should be fixed!
       
  1145 }
       
  1146 
       
  1147 sub EntityRef
       
  1148 {
       
  1149     my ($self, $ref, $isParam) = @_;
       
  1150 
       
  1151     if ($isParam)
       
  1152     {
       
  1153 	# expand to "%name;"
       
  1154 	print STDERR "XML::Checker::Entity -  parameter Entity (%ent;) not implemented\n";
       
  1155     }
       
  1156     else
       
  1157     {
       
  1158 	# Treat same as Char - for now
       
  1159 	my $context = $self->{Context};
       
  1160 	$context->[0]->Char ($self, "&$ref;");
       
  1161 	$INSIGNIF_WS = 0;
       
  1162 #?? I could count the number of times each Entity is referenced
       
  1163     }
       
  1164 }
       
  1165 
       
  1166 # PerlSAX API
       
  1167 sub unparsed_entity_decl
       
  1168 {
       
  1169     my ($self, $hash) = @_;
       
  1170     $self->Unparsed ($hash->{Name});
       
  1171 #?? what about Base, SytemId, PublicId ?
       
  1172 }
       
  1173 
       
  1174 sub Unparsed
       
  1175 {
       
  1176     my ($self, $entity) = @_;
       
  1177 #    print "ARule::Unparsed $entity\n";
       
  1178     if ($self->{Unparsed}->{$entity})
       
  1179     {
       
  1180 	$self->fail (112, "unparsed ENTITY [$entity] already defined");
       
  1181     }
       
  1182     else
       
  1183     {
       
  1184 	$self->{Unparsed}->{$entity} = 1;
       
  1185     }
       
  1186 }
       
  1187 
       
  1188 # PerlSAX API
       
  1189 sub notation_decl
       
  1190 {
       
  1191     my ($self, $hash) = @_;
       
  1192     $self->Notation ($hash->{Name});
       
  1193 #?? what about Base, SytemId, PublicId ?
       
  1194 }
       
  1195 
       
  1196 sub Notation
       
  1197 {
       
  1198     my ($self, $notation) = @_;
       
  1199     if ($self->{NOTATION}->{$notation})
       
  1200     {
       
  1201 	$self->fail (113, "NOTATION [$notation] already defined");
       
  1202     }
       
  1203     else
       
  1204     {
       
  1205 	$self->{NOTATION}->{$notation} = 1;
       
  1206     }
       
  1207 }
       
  1208 
       
  1209 # PerlSAX API
       
  1210 sub entity_decl
       
  1211 {
       
  1212     my ($self, $hash) = @_;
       
  1213 
       
  1214     $self->Entity ($hash->{Name}, $hash->{Value}, $hash->{SystemId},
       
  1215 		   $hash->{PublicId}, $hash->{'Notation'});
       
  1216 }
       
  1217 
       
  1218 sub Entity
       
  1219 {
       
  1220     my ($self, $name, $val, $sysId, $pubId, $ndata) = @_;
       
  1221 
       
  1222     if (exists $self->{ENTITY}->{$name})
       
  1223     {
       
  1224 	$self->fail (114, "ENTITY [$name] already defined");
       
  1225     }
       
  1226     else
       
  1227     {
       
  1228 	$self->{ENTITY}->{$name} = $val;
       
  1229     }    
       
  1230 }
       
  1231 
       
  1232 # PerlSAX API
       
  1233 #sub xml_decl {} $hash=> Version, Encoding, Standalone
       
  1234 # Don't implement resolve_entity() which is called by ExternEnt!
       
  1235 #sub processing_instruction {} $hash=> Target, Data
       
  1236 
       
  1237 # Returns whether the Char data is whitespace and also updates the
       
  1238 # $INSIGNIF_WS variable to indicate whether it is insignificant whitespace.
       
  1239 # Note that this method is only called in places where potential whitespace
       
  1240 # can be insignificant (i.e. when the ERule is Children or EMPTY)
       
  1241 sub isWS
       
  1242 {
       
  1243     $INSIGNIF_WS = ($_[1] =~ /^\s*$/);
       
  1244 }
       
  1245 
       
  1246 sub isInsignifWS
       
  1247 {
       
  1248     $INSIGNIF_WS;
       
  1249 }
       
  1250 
       
  1251 sub fail
       
  1252 {
       
  1253     my $self = shift;
       
  1254     &$FAIL (@_);
       
  1255 }
       
  1256 
       
  1257 sub print_error		# static
       
  1258 {
       
  1259     my $str = error_string (@_);
       
  1260     print STDERR $str;
       
  1261 }
       
  1262 
       
  1263 sub error_string	# static
       
  1264 {
       
  1265     my $code = shift;
       
  1266     my $msg = shift;
       
  1267 
       
  1268     my @a = ();
       
  1269     my ($key, $val);
       
  1270     while ($key = shift)
       
  1271     {
       
  1272 	$val = shift;
       
  1273 	push @a, ("$key " . (defined $val ? $val : "(undef)"));
       
  1274     }
       
  1275 
       
  1276     my $cat = $code >= 200 ? ($code >= 300 ? "INFO" : "WARNING") : "ERROR";
       
  1277     my $str = join (", ", @a);
       
  1278     $str = length($str) ? "\tContext: $str\n" : "";
       
  1279 
       
  1280     "XML::Checker $cat-$code: $msg\n$str";
       
  1281 }
       
  1282 
       
  1283 sub debug
       
  1284 {
       
  1285     my ($self) = @_;
       
  1286     my $context = $self->{Context}->[0];
       
  1287     my $c = $context ? $context->debug : "no context";
       
  1288     my $root = $self->{RootElement};
       
  1289 
       
  1290     "Checker[$c,RootElement=$root]";
       
  1291 }
       
  1292 
       
  1293 1; # package return code
       
  1294 
       
  1295 __END__
       
  1296 
       
  1297 =head1 NAME
       
  1298 
       
  1299 XML::Checker - A perl module for validating XML
       
  1300 
       
  1301 =head1 SYNOPSIS
       
  1302 
       
  1303 L<XML::Checker::Parser> - an L<XML::Parser> that validates at parse time
       
  1304 
       
  1305 L<XML::DOM::ValParser> - an L<XML::DOM::Parser> that validates at parse time
       
  1306 
       
  1307 (Some of the package names may change! This is only an alpha release...)
       
  1308 
       
  1309 =head1 DESCRIPTION
       
  1310 
       
  1311 XML::Checker can be used in different ways to validate XML. See the manual
       
  1312 pages of L<XML::Checker::Parser> and L<XML::DOM::ValParser>
       
  1313 for more information. 
       
  1314 
       
  1315 This document only describes common topics like error handling
       
  1316 and the XML::Checker class itself.
       
  1317 
       
  1318 WARNING: Not all errors are currently checked. Almost everything is subject to
       
  1319 change. Some reported errors may not be real errors.
       
  1320 
       
  1321 =head1 ERROR HANDLING
       
  1322 
       
  1323 Whenever XML::Checker (or one of the packages that uses XML::Checker) detects a
       
  1324 potential error, the 'fail handler' is called. It is currently also called 
       
  1325 to report information, like how many times an Entity was referenced. 
       
  1326 (The whole error handling mechanism is subject to change, I'm afraid...)
       
  1327 
       
  1328 The default fail handler is XML::Checker::print_error(), which prints an error 
       
  1329 message to STDERR. It does not stop the XML::Checker, so it will continue 
       
  1330 looking for other errors. 
       
  1331 The error message is created with XML::Checker::error_string().
       
  1332 
       
  1333 You can define your
       
  1334 own fail handler in two ways, locally and globally. Use a local variable to
       
  1335 temporarily override the fail handler. This way the default fail handler is restored
       
  1336 when the local variable goes out of scope, esp. when exceptions are thrown e.g.
       
  1337 
       
  1338  # Using a local variable to temporarily override the fail handler (preferred)
       
  1339  { # new block - start of local scope
       
  1340    local $XML::Checker::FAIL = \&my_fail;
       
  1341    ... your code here ...
       
  1342  } # end of block - the previous fail handler is restored
       
  1343 
       
  1344 You can also set the error handler globally, risking that your code may not 
       
  1345 be reusable or may clash with other modules that use XML::Checker.
       
  1346 
       
  1347  # Globally setting the fail handler (not recommended)
       
  1348  $XML::Checker::FAIL = \&my_fail;
       
  1349  ... rest of your code ...
       
  1350 
       
  1351 The fail handler is called with the following parameters ($code, $msg, @context), 
       
  1352 where $code is the error code, $msg is the error description and 
       
  1353 @context contains information on where the error occurred. The @context is
       
  1354 a (ordered) list of (key,value) pairs and can easily be turned into a hash.
       
  1355 It contains the following information:
       
  1356 
       
  1357  Element - tag name of Element node (if applicable)
       
  1358  Attr - attribute name (if applicable)
       
  1359  ChildElementIndex - if applicable (see error 157)
       
  1360  line - only when parsing
       
  1361  column - only when parsing
       
  1362  byte - only when parsing (-1 means: end of file)
       
  1363 
       
  1364 Some examples of fail handlers:
       
  1365 
       
  1366  # Don't print info messages
       
  1367  sub my_fail
       
  1368  {
       
  1369      my $code = shift;
       
  1370      print STDERR XML::Checker::error_message ($code, @_)
       
  1371          if $code < 300;
       
  1372  }
       
  1373 
       
  1374  # Die when the first error is encountered - this will stop
       
  1375  # the parsing process. Ignore information messages.
       
  1376  sub my_fail
       
  1377  {
       
  1378      my $code = shift;
       
  1379      die XML::Checker::error_message ($code, @_) if $code < 300;
       
  1380  }
       
  1381 
       
  1382  # Count the number of undefined NOTATION references
       
  1383  # and print the error as usual
       
  1384  sub my_fail
       
  1385  {
       
  1386      my $code = shift;
       
  1387      $count_undef_notations++ if $code == 100;
       
  1388      XML::Checker::print_error ($code, @_);
       
  1389  }
       
  1390 
       
  1391  # Die when an error is encountered.
       
  1392  # Don't die if a warning or info message is encountered, just print a message.
       
  1393  sub my_fail {
       
  1394      my $code = shift;
       
  1395      die XML::Checker::error_string ($code, @_) if $code < 200;
       
  1396      XML::Checker::print_error ($code, @_);
       
  1397  }
       
  1398 
       
  1399 =head1 INSIGNIFICANT WHITESPACE
       
  1400 
       
  1401 XML::Checker keeps track of whether whitespace found in character data 
       
  1402 is significant or not. It is considered insignicant if it is found inside
       
  1403 an element that has a ELEMENT rule that is not of type Mixed or of type ANY. 
       
  1404 (A Mixed ELEMENT rule does contains the #PCDATA keyword. 
       
  1405 An ANY rule contains the ANY keyword. See the XML spec for more info.)
       
  1406 
       
  1407 XML::Checker can not determine whether whitespace is insignificant in those two 
       
  1408 cases, because they both allow regular character data to appear within
       
  1409 XML elements and XML::Checker can therefore not deduce whether whitespace 
       
  1410 is part of the actual data or was just added for readability of the XML file.
       
  1411 
       
  1412 XML::Checker::Parser and XML::DOM::ValParser both have the option to skip
       
  1413 insignificant whitespace when setting B<SkipInsignifWS> to 1 in their constructor.
       
  1414 If set, they will not call the Char handler when insignificant whitespace is
       
  1415 encountered. This means that in XML::DOM::ValParser no Text nodes are created
       
  1416 for insignificant whitespace.
       
  1417 
       
  1418 Regardless of whether the SkipInsignifWS options is set, XML::Checker always 
       
  1419 keeps track of whether whitespace is insignificant. After making a call to
       
  1420 XML::Checker's Char handler, you can find out if it was insignificant whitespace
       
  1421 by calling the isInsignifWS method.
       
  1422 
       
  1423 When using multiple (nested) XML::Checker instances or when using XML::Checker
       
  1424 without using XML::Checker::Parser or XML::DOM::ValParser (which hardly anybody
       
  1425 probably will), make sure to set a local variable in the scope of your checking
       
  1426 code, e.g.
       
  1427 
       
  1428   { # new block - start of local scope
       
  1429     local $XML::Checker::INSIGNIF_WS = 0;
       
  1430     ... insert your code here ...
       
  1431   } # end of scope
       
  1432 
       
  1433 =head1 ERROR CODES
       
  1434 
       
  1435 There are 3 categories, errors, warnings and info messages.
       
  1436 (The codes are still subject to change, as well the error descriptions.) 
       
  1437 
       
  1438 Most errors have a link to the appropriate Validaty Constraint (B<VC>)
       
  1439 or other section in the XML specification.
       
  1440 
       
  1441 =head2 ERROR Messages
       
  1442 
       
  1443 =head2 100 - 109
       
  1444 
       
  1445 =over 4
       
  1446 
       
  1447 =item *
       
  1448 
       
  1449 B<100> - undefined NOTATION [$notation] in ATTLIST
       
  1450 
       
  1451 The ATTLIST contained a Notation reference that was not defined in a
       
  1452 NOTATION definition. 
       
  1453 B<VC:> L<Notation Attributes|http://www.w3.org/TR/REC-xml#notatn>
       
  1454  
       
  1455 
       
  1456 =item *
       
  1457 
       
  1458 B<101> - undefined ELEMENT [$tagName]
       
  1459 
       
  1460 The specified Element was never defined in an ELEMENT definition.
       
  1461 This is not an error according to the XML spec.
       
  1462 See L<Element Type Declarations|http://www.w3.org/TR/REC-xml#elemdecls>
       
  1463  
       
  1464 
       
  1465 =item *
       
  1466 
       
  1467 B<102> - undefined unparsed ENTITY [$entity]
       
  1468 
       
  1469 The attribute value referenced an undefined unparsed entity.
       
  1470 B<VC:> L<Entity Name|http://www.w3.org/TR/REC-xml#entname>
       
  1471  
       
  1472 
       
  1473 =item *
       
  1474 
       
  1475 B<103> - undefined attribute [$attrName]
       
  1476 
       
  1477 The specified attribute was not defined in an ATTLIST for that Element.
       
  1478 B<VC:> L<Attribute Value Type|http://www.w3.org/TR/REC-xml#ValueType>
       
  1479  
       
  1480 
       
  1481 =back
       
  1482 
       
  1483 =head2 110 - 119
       
  1484 
       
  1485 =over 4
       
  1486 
       
  1487 =item *
       
  1488 
       
  1489 B<110> - attribute [$attrName] of element [$tagName] already defined
       
  1490 
       
  1491 The specified attribute was already defined in this ATTLIST definition or
       
  1492 in a previous one.
       
  1493 This is not an error according to the XML spec.
       
  1494 See L<Attribute-List Declarations|http://www.w3.org/TR/REC-xml#attdecls>
       
  1495  
       
  1496 
       
  1497 =item *
       
  1498 
       
  1499 B<111> - ID [$value] already defined
       
  1500 
       
  1501 An ID with the specified value was already defined in an attribute
       
  1502 within the same document.
       
  1503 B<VC:> L<ID|http://www.w3.org/TR/REC-xml#id>
       
  1504  
       
  1505 
       
  1506 =item *
       
  1507 
       
  1508 B<112> - unparsed ENTITY [$entity] already defined
       
  1509 
       
  1510 This is not an error according to the XML spec.
       
  1511 See L<Entity Declarations|http://www.w3.org/TR/REC-xml#sec-entity-decl>
       
  1512  
       
  1513 
       
  1514 =item *
       
  1515 
       
  1516 B<113> - NOTATION [$notation] already defined
       
  1517  
       
  1518 
       
  1519 =item *
       
  1520 
       
  1521 B<114> - ENTITY [$entity] already defined
       
  1522 
       
  1523 This is not an error according to the XML spec.
       
  1524 See L<Entity Declarations|http://www.w3.org/TR/REC-xml#sec-entity-decl>
       
  1525  
       
  1526 
       
  1527 =item *
       
  1528 
       
  1529 B<115> - ELEMENT [$name] already defined
       
  1530 B<VC:> L<Unique Element Type Declaration|http://www.w3.org/TR/REC-xml#EDUnique>
       
  1531  
       
  1532 
       
  1533 =back
       
  1534 
       
  1535 =head2 120 - 129
       
  1536 
       
  1537 =over 4
       
  1538 
       
  1539 =item *
       
  1540 
       
  1541 B<120> - invalid default ENTITY [$default]
       
  1542 
       
  1543 (Or IDREF or NMTOKEN instead of ENTITY.)
       
  1544 The ENTITY, IDREF or NMTOKEN reference in the default attribute 
       
  1545 value for an attribute with types ENTITY, IDREF or NMTOKEN was not
       
  1546 valid.
       
  1547 B<VC:> L<Attribute Default Legal|http://www.w3.org/TR/REC-xml#defattrvalid>
       
  1548  
       
  1549 
       
  1550 =item *
       
  1551 
       
  1552 B<121> - invalid default [$token] in ENTITIES [$default]
       
  1553 
       
  1554 (Or IDREFS or NMTOKENS instead of ENTITIES)
       
  1555 One of the ENTITY, IDREF or NMTOKEN references in the default attribute 
       
  1556 value for an attribute with types ENTITIES, IDREFS or NMTOKENS was not
       
  1557 valid.
       
  1558 B<VC:> L<Attribute Default Legal|http://www.w3.org/TR/REC-xml#defattrvalid>
       
  1559  
       
  1560 
       
  1561 =item *
       
  1562 
       
  1563 B<122> - invalid default attribute value [$default]
       
  1564 
       
  1565 The specified default attribute value is not a valid attribute value.
       
  1566 B<VC:> L<Attribute Default Legal|http://www.w3.org/TR/REC-xml#defattrvalid>
       
  1567  
       
  1568 
       
  1569 =item *
       
  1570 
       
  1571 B<123> - invalid default ID [$default], must be #REQUIRED or #IMPLIED
       
  1572 
       
  1573 The default attribute value for an attribute of type ID has to be 
       
  1574 #REQUIRED or #IMPLIED.
       
  1575 B<VC:> L<ID Attribute Default|http://www.w3.org/TR/REC-xml#id-default>
       
  1576  
       
  1577 
       
  1578 =item *
       
  1579 
       
  1580 B<124> - bad model [$model] for ELEMENT [$name]
       
  1581 
       
  1582 The model in the ELEMENT definition did not conform to the XML syntax 
       
  1583 for Mixed models.
       
  1584 See L<Mixed Content|http://www.w3.org/TR/REC-xml#sec-mixed-content>
       
  1585  
       
  1586 
       
  1587 =back
       
  1588 
       
  1589 =head2 130 - 139
       
  1590 
       
  1591 =over 4
       
  1592 
       
  1593 =item *
       
  1594 
       
  1595 B<130> - invalid NMTOKEN [$attrValue]
       
  1596 
       
  1597 The attribute value is not a valid NmToken token.
       
  1598 B<VC:> L<Enumeration|http://www.w3.org/TR/REC-xml#enum>
       
  1599  
       
  1600 
       
  1601 =item *
       
  1602 
       
  1603 B<131> - invalid ID [$attrValue]
       
  1604 
       
  1605 The specified attribute value is not a valid Name token.
       
  1606 B<VC:> L<ID|http://www.w3.org/TR/REC-xml#id>
       
  1607  
       
  1608 
       
  1609 =item *
       
  1610 
       
  1611 B<132> - invalid IDREF [$value]
       
  1612 
       
  1613 The specified attribute value is not a valid Name token.  
       
  1614 B<VC:> L<IDREF|http://www.w3.org/TR/REC-xml#idref>
       
  1615  
       
  1616 
       
  1617 =item *
       
  1618 
       
  1619 B<133> - invalid ENTITY name [$name]
       
  1620 
       
  1621 The specified attribute value is not a valid Name token.  
       
  1622 B<VC:> L<Entity Name|http://www.w3.org/TR/REC-xml#entname>
       
  1623  
       
  1624 
       
  1625 =item *
       
  1626 
       
  1627 B<134> - invalid Enumeration value [$value] in ATTLIST
       
  1628 
       
  1629 The specified value is not a valid NmToken (see XML spec for def.)
       
  1630 See definition of L<NmToken|http://www.w3.org/TR/REC-xml#NT-Nmtoken>
       
  1631  
       
  1632 
       
  1633 =item *
       
  1634 
       
  1635 B<135> - empty NOTATION list in ATTLIST
       
  1636 
       
  1637 The NOTATION list of the ATTLIST definition did not contain any NOTATION
       
  1638 references.
       
  1639 See definition of L<NotationType|http://www.w3.org/TR/REC-xml#NT-NotationType>
       
  1640  
       
  1641 
       
  1642 =item *
       
  1643 
       
  1644 B<136> - empty Enumeration list in ATTLIST
       
  1645 
       
  1646 The ATTLIST definition of the attribute of type Enumeration did not
       
  1647 contain any values.
       
  1648 See definition of L<Enumeration|http://www.w3.org/TR/REC-xml#NT-Enumeration>
       
  1649  
       
  1650 
       
  1651 =item *
       
  1652 
       
  1653 B<137> - invalid ATTLIST type [$type]
       
  1654 
       
  1655 The attribute type has to be one of: ID, IDREF, IDREFS, ENTITY, ENTITIES, 
       
  1656 NMTOKEN, NMTOKENS, CDATA, NOTATION or an Enumeration.
       
  1657 See definition of L<AttType|http://www.w3.org/TR/REC-xml#NT-AttType>
       
  1658  
       
  1659 
       
  1660 =back
       
  1661 
       
  1662 =head2 150 - 159
       
  1663 
       
  1664 =over 4
       
  1665 
       
  1666 =item *
       
  1667 
       
  1668 B<150> - bad #FIXED attribute value [$value], it should be [$default]
       
  1669 
       
  1670 The specified attribute was defined as #FIXED in the ATTLIST definition
       
  1671 and the found attribute $value differs from the specified $default value.
       
  1672 B<VC:> L<Fixed Attribute Default|http://www.w3.org/TR/REC-xml#FixedAttr>
       
  1673 
       
  1674 
       
  1675 =item *
       
  1676 
       
  1677 B<151> - only one ID allowed in ATTLIST per element first=[$attrName]
       
  1678 
       
  1679 The ATTLIST definitions for an Element may contain only one attribute
       
  1680 with the type ID. The specified $attrName is the one that was found first.
       
  1681 B<VC:> L<One ID per Element Type|http://www.w3.org/TR/REC-xml#one-id-per-el>
       
  1682 
       
  1683 
       
  1684 =item *
       
  1685 
       
  1686 B<152> - Element should be EMPTY, found Element [$tagName]
       
  1687 
       
  1688 The ELEMENT definition for the specified Element said it should be
       
  1689 EMPTY, but a child Element was found.
       
  1690 B<VC:> L<Element Valid (sub1)|http://www.w3.org/TR/REC-xml#elementvalid>
       
  1691  
       
  1692 
       
  1693 =item *
       
  1694 
       
  1695 B<153> - Element should be EMPTY, found text [$text]
       
  1696 
       
  1697 The ELEMENT definition for the specified Element said it should be
       
  1698 EMPTY, but text was found. Currently, whitespace is not allowed between the
       
  1699 open and close tag. (This may be wrong, please give feedback.)
       
  1700 To allow whitespace (subject to change), set:
       
  1701 
       
  1702     $XML::Checker::Context::EMPTY::ALLOW_WHITE_SPACE = 1;
       
  1703 
       
  1704 B<VC:> L<Element Valid (sub1)|http://www.w3.org/TR/REC-xml#elementvalid>
       
  1705  
       
  1706 
       
  1707 =item *
       
  1708 
       
  1709 B<154> - bad order of Elements Found=[$found] RE=[$re]
       
  1710 
       
  1711 The child elements of the specified Element did not match the
       
  1712 regular expression found in the ELEMENT definition. $found contains
       
  1713 a comma separated list of all the child element tag names that were found.
       
  1714 $re contains the (decoded) regular expression that was used internally.
       
  1715 B<VC:> L<Element Valid|http://www.w3.org/TR/REC-xml#elementvalid>
       
  1716  
       
  1717 
       
  1718 =item *
       
  1719 
       
  1720 B<155> - more than one root Element [$tags]
       
  1721 
       
  1722 An XML Document may only contain one Element.
       
  1723 $tags is a comma separated list of element tag names encountered sofar.
       
  1724 L<XML::Parser> (expat) throws 'no element found' exception.
       
  1725 See two_roots.xml for an example.
       
  1726 See definition of L<document|http://www.w3.org/TR/REC-xml#dt-root>
       
  1727  
       
  1728 
       
  1729 =item *
       
  1730 
       
  1731 B<156> - unexpected root Element [$tagName], expected [$rootTagName]
       
  1732 
       
  1733 The tag name of the root Element of the XML Document differs from the name 
       
  1734 specified in the DOCTYPE section.
       
  1735 L<XML::Parser> (expat) throws 'not well-formed' exception.
       
  1736 See bad_root.xml for an example.
       
  1737 B<VC:> L<Root Element Type|http://www.w3.org/TR/REC-xml#vc-roottype>
       
  1738  
       
  1739 
       
  1740 =item *
       
  1741 
       
  1742 B<157> - unexpected Element [$tagName]
       
  1743 
       
  1744 The ELEMENT definition for the specified Element does not allow child
       
  1745 Elements with the specified $tagName.
       
  1746 B<VC:> L<Element Valid|http://www.w3.org/TR/REC-xml#elementvalid>
       
  1747 
       
  1748 The error context contains ChildElementIndex which is the index within 
       
  1749 its parent Element (counting only Element nodes.)
       
  1750  
       
  1751 
       
  1752 =item *
       
  1753 
       
  1754 B<158> - unspecified value for #IMPLIED attribute [$attrName]
       
  1755 
       
  1756 The ATTLIST for the specified attribute said the attribute was #IMPLIED,
       
  1757 which means the user application should supply a value, but the attribute
       
  1758 value was not specified. (User applications should pass a value and set
       
  1759 $specified to 1 in the Attr handler.)
       
  1760  
       
  1761 
       
  1762 =item *
       
  1763 
       
  1764 B<159> - unspecified value for #REQUIRED attribute [$attrName]
       
  1765 
       
  1766 The ATTLIST for the specified attribute said the attribute was #REQUIRED,
       
  1767 which means that a value should have been specified.
       
  1768 B<VC:> L<Required Attribute|http://www.w3.org/TR/REC-xml#RequiredAttr>
       
  1769  
       
  1770 
       
  1771 =back
       
  1772 
       
  1773 =head2 160 - 169
       
  1774 
       
  1775 =over 4
       
  1776 
       
  1777 =item *
       
  1778 
       
  1779 B<160> - invalid Enumeration value [$attrValue]
       
  1780 
       
  1781 The specified attribute value does not match one of the Enumeration values
       
  1782 in the ATTLIST.
       
  1783 B<VC:> L<Enumeration|http://www.w3.org/TR/REC-xml#enum>
       
  1784  
       
  1785 
       
  1786 =item *
       
  1787 
       
  1788 B<161> - invalid NOTATION value [$attrValue]
       
  1789 
       
  1790 The specifed attribute value was not found in the list of possible NOTATION 
       
  1791 references as found in the ATTLIST definition.
       
  1792 B<VC:> L<Notation Attributes|http://www.w3.org/TR/REC-xml#notatn>
       
  1793  
       
  1794 
       
  1795 =item *
       
  1796 
       
  1797 B<162> - undefined NOTATION [$attrValue]
       
  1798 
       
  1799 The NOTATION referenced by the specified attribute value was not defined.
       
  1800 B<VC:> L<Notation Attributes|http://www.w3.org/TR/REC-xml#notatn>
       
  1801  
       
  1802 
       
  1803 =back
       
  1804 
       
  1805 =head2 WARNING Messages (200 and up)
       
  1806 
       
  1807 =over 4
       
  1808 
       
  1809 =item *
       
  1810 
       
  1811 B<200> - undefined ID [$id] was referenced [$n] times
       
  1812 
       
  1813 The specified ID was referenced $n times, but never defined in an attribute
       
  1814 value with type ID.
       
  1815 B<VC:> L<IDREF|http://www.w3.org/TR/REC-xml#idref>
       
  1816  
       
  1817 
       
  1818 =back
       
  1819 
       
  1820 =head2 INFO Messages (300 and up)
       
  1821 
       
  1822 =over 4
       
  1823 
       
  1824 =item *
       
  1825 
       
  1826 B<300> - [$n] references to ID [$id]
       
  1827 
       
  1828 The specified ID was referenced $n times.
       
  1829  
       
  1830 
       
  1831 =back
       
  1832 
       
  1833 =head2 Not checked
       
  1834 
       
  1835 The following errors are already checked by L<XML::Parser> (expat) and
       
  1836 are currently not checked by XML::Checker:
       
  1837 
       
  1838 (?? TODO - add more info)
       
  1839 
       
  1840 =over 4
       
  1841 
       
  1842 =item root element is missing
       
  1843 
       
  1844 L<XML::Parser> (expat) throws 'no element found' exception. 
       
  1845 See no_root.xml for an example.
       
  1846 
       
  1847 =back
       
  1848 
       
  1849 =head1 XML::Checker
       
  1850 
       
  1851 XML::Checker can be easily plugged into your application. 
       
  1852 It uses mostly the same style of event handlers (or callbacks) as L<XML::Parser>.
       
  1853 See L<XML::Parser> manual page for descriptions of most handlers. 
       
  1854  
       
  1855 It also implements PerlSAX style event handlers. See L<PerlSAX interface>.
       
  1856 
       
  1857 Currently, the XML::Checker object is a blessed hash with the following 
       
  1858 (potentially useful) entries:
       
  1859 
       
  1860  $checker->{RootElement} - root element name as found in the DOCTYPE
       
  1861  $checker->{NOTATION}->{$notation} - is 1 if the NOTATION was defined
       
  1862  $checker->{ENTITY}->{$name} - contains the (first) ENTITY value if defined
       
  1863  $checker->{Unparsed}->{$entity} - is 1 if the unparsed ENTITY was defined
       
  1864  $checker->{ID}->{$id} - is 1 if the ID was defined
       
  1865  $checker->{IDREF}->{$id} - number of times the ID was referenced
       
  1866 
       
  1867  # Less useful:
       
  1868  $checker->{ERule}->{$tag} - the ELEMENT rules by Element tag name
       
  1869  $checker->{ARule}->{$tag} - the ATTLIST rules by Element tag name
       
  1870  $checker->{Context} - context stack used internally
       
  1871  $checker->{CurrARule} - current ATTLIST rule for the current Element
       
  1872 
       
  1873 =head2 XML:Checker methods
       
  1874 
       
  1875 This section is only interesting when using XML::Checker directly.
       
  1876 XML::Checker supports most event handlers that L<XML::Parser> supports with minor 
       
  1877 differences. Note that the XML::Checker event handler methods are 
       
  1878 instance methods and not static, so don't forget to call them like this,
       
  1879 without passing $expat (as in the L<XML::Parser>) handlers:
       
  1880 
       
  1881  $checker->Start($tagName);
       
  1882 
       
  1883 =over 4
       
  1884 
       
  1885 =item Constructor
       
  1886 
       
  1887  $checker = new XML::Checker;
       
  1888  $checker = new XML::Checker (%user_args);
       
  1889 
       
  1890 User data may be stored by client applications. Only $checker->{User} is
       
  1891 guaranteed not to clash with internal hash keys.
       
  1892 
       
  1893 =item getRootElement ()
       
  1894 
       
  1895  $tagName = $checker->getRootElement;
       
  1896 
       
  1897 Returns the root element name as found in the DOCTYPE
       
  1898 
       
  1899 =back
       
  1900 
       
  1901 =head2 Expat interface
       
  1902 
       
  1903 XML::Checker supports what I call the I<Expat> interface, which is 
       
  1904 the collection of methods you normally specify as the callback handlers
       
  1905 when using XML::Parser.
       
  1906 
       
  1907 Only the following L<XML::Parser> handlers are currently supported:
       
  1908 Init, Final, Char, Start, End, Element, Attlist, Doctype,
       
  1909 Unparsed, Entity, Notation. 
       
  1910 
       
  1911 I don't know how to correctly support the Default handler for all L<XML::Parser>
       
  1912 releases. The Start handler works a little different (see below) and I
       
  1913 added Attr, InitDomElem, FinalDomElem, CDATA and EntityRef handlers.
       
  1914 See L<XML::Parser> for a description of the handlers that are not listed below.
       
  1915 
       
  1916 Note that this interface may disappear, when the PerlSAX interface stabilizes.
       
  1917 
       
  1918 =over 4
       
  1919 
       
  1920 =item Start ($tag)
       
  1921 
       
  1922  $checker->Start($tag);
       
  1923 
       
  1924 Call this when an Element with the specified $tag name is encountered.
       
  1925 Different from the Start handler in L<XML::Parser>, in that no attributes 
       
  1926 are passed in (use the Attr handler for those.)
       
  1927 
       
  1928 =item Attr ($tag, $attrName, $attrValue, $isSpecified)
       
  1929 
       
  1930  $checker->Attr($tag,$attrName,$attrValue,$spec);
       
  1931 
       
  1932 Checks an attribute with the specified $attrName and $attrValue against the
       
  1933 ATTLIST definition of the element with the specified $tag name.
       
  1934 $isSpecified means whether the attribute was specified (1) or defaulted (0).
       
  1935 
       
  1936 =item EndAttr ()
       
  1937 
       
  1938  $checker->EndAttr;
       
  1939 
       
  1940 This should be called after all attributes are passed with Attr().
       
  1941 It will check which of the #REQUIRED attributes were not specified and generate
       
  1942 the appropriate error (159) for each one that is missing.
       
  1943 
       
  1944 =item CDATA ($text)
       
  1945 
       
  1946  $checker->CDATA($text);
       
  1947 
       
  1948 This should be called whenever CDATASections are encountered.
       
  1949 Similar to Char handler (but might perform different checks later...)
       
  1950 
       
  1951 =item EntityRef ($entity, $isParameterEntity)
       
  1952 
       
  1953  $checker->EntityRef($entity,$isParameterEntity);
       
  1954 
       
  1955 Checks the ENTITY reference. Set $isParameterEntity to 1 for 
       
  1956 entity references that start with '%'.
       
  1957 
       
  1958 =item InitDomElem () and FinalDomElem ()
       
  1959 
       
  1960 Used by XML::DOM::Element::check() to initialize (and cleanup) the 
       
  1961 context stack when checking a single element.
       
  1962 
       
  1963 =back
       
  1964 
       
  1965 =head2 PerlSAX interface
       
  1966 
       
  1967 XML::Checker now also supports the PerlSAX interface, so you can use XML::Checker
       
  1968 wherever you use PerlSAX handlers.
       
  1969 
       
  1970 XML::Checker implements the following methods: start_document, end_document,
       
  1971 start_element, end_element, characters, processing_instruction, comment,
       
  1972 start_cdata, end_cdata, entity_reference, notation_decl, unparsed_entity_decl,
       
  1973 entity_decl, element_decl, attlist_decl, doctype_decl, xml_decl
       
  1974 
       
  1975 Not implemented: set_document_locator, ignorable_whitespace
       
  1976 
       
  1977 See PerlSAX.pod for details. (It is called lib/PerlSAX.pod in the libxml-perl 
       
  1978 distribution which can be found at CPAN.)
       
  1979 
       
  1980 =head1 CAVEATS
       
  1981 
       
  1982 This is an alpha release. Almost everything is subject to change.
       
  1983 
       
  1984 =head1 AUTHOR
       
  1985 
       
  1986 Send bug reports, hints, tips, suggestions to Enno Derksen at
       
  1987 <F<enno@att.com>>. 
       
  1988 
       
  1989 =head1 SEE ALSO
       
  1990 
       
  1991 The home page of XML::Checker at L<http://www.erols.com/enno/checker/index.html>
       
  1992 
       
  1993 The XML spec (Extensible Markup Language 1.0) at L<http://www.w3.org/TR/REC-xml>
       
  1994 
       
  1995 The L<XML::Parser> and L<XML::Parser::Expat> manual pages.
       
  1996 
       
  1997 The other packages that come with XML::Checker: 
       
  1998 L<XML::Checker::Parser>, L<XML::DOM::ValParser>
       
  1999 
       
  2000 The DOM Level 1 specification at L<http://www.w3.org/TR/REC-DOM-Level-1>
       
  2001 
       
  2002 The PerlSAX specification. It is currently in lib/PerlSAX.pod in the
       
  2003 libxml-perl distribution by Ken MacLeod. 
       
  2004 
       
  2005 The original SAX specification (Simple API for XML) can be found at 
       
  2006 L<http://www.megginson.com/SAX> and L<http://www.megginson.com/SAX/SAX2>