common/tools/raptor/XML/SAX/PurePerl/DTDDecls.pm
changeset 906 5239d4d0bed1
parent 905 9ed73a51c728
child 907 bab81256b297
--- a/common/tools/raptor/XML/SAX/PurePerl/DTDDecls.pm	Fri Mar 05 16:16:43 2010 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,603 +0,0 @@
-# $Id: DTDDecls.pm,v 1.7 2005/10/14 20:31:20 matt Exp $
-
-package XML::SAX::PurePerl;
-
-use strict;
-use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar);
-
-sub elementdecl {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(9);
-    return 0 unless $data =~ /^<!ELEMENT/;
-    $reader->move_along(9);
-    
-    $self->skip_whitespace($reader) ||
-        $self->parser_error("No whitespace after ELEMENT declaration", $reader);
-    
-    my $name = $self->Name($reader);
-    
-    $self->skip_whitespace($reader) ||
-        $self->parser_error("No whitespace after ELEMENT's name", $reader);
-        
-    $self->contentspec($reader, $name);
-    
-    $self->skip_whitespace($reader);
-    
-    $reader->match('>') or $self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader);
-    
-    return 1;
-}
-
-sub contentspec {
-    my ($self, $reader, $name) = @_;
-    
-    my $data = $reader->data(5);
-    
-    my $model;
-    if ($data =~ /^EMPTY/) {
-        $reader->move_along(5);
-        $model = 'EMPTY';
-    }
-    elsif ($data =~ /^ANY/) {
-        $reader->move_along(3);
-        $model = 'ANY';
-    }
-    else {
-        $model = $self->Mixed_or_children($reader);
-    }
-
-    if ($model) {
-        # call SAX callback now.
-        $self->element_decl({Name => $name, Model => $model});
-        return 1;
-    }
-    
-    $self->parser_error("contentspec not found in ELEMENT declaration", $reader);
-}
-
-sub Mixed_or_children {
-    my ($self, $reader) = @_;
-
-    my $data = $reader->data(8);
-    $data =~ /^\(/ or return; # $self->parser_error("No opening bracket in Mixed or children", $reader);
-    
-    if ($data =~ /^\(\s*\#PCDATA/) {
-        $reader->match('(');
-        $self->skip_whitespace($reader);
-        $reader->move_along(7);
-        my $model = $self->Mixed($reader);
-        return $model;
-    }
-
-    # not matched - must be Children
-    return $self->children($reader);
-}
-
-# Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' '*' )
-#               | ( '(' S* PCDATA S* ')' )
-sub Mixed {
-    my ($self, $reader) = @_;
-
-    # Mixed_or_children already matched '(' S* '#PCDATA'
-
-    my $model = '(#PCDATA';
-            
-    $self->skip_whitespace($reader);
-
-    my %seen;
-    
-    while (1) {
-        last unless $reader->match('|');
-        $self->skip_whitespace($reader);
-
-        my $name = $self->Name($reader) || 
-            $self->parser_error("No 'Name' after Mixed content '|'", $reader);
-
-        if ($seen{$name}) {
-            $self->parser_error("Element '$name' has already appeared in this group", $reader);
-        }
-        $seen{$name}++;
-
-        $model .= "|$name";
-        
-        $self->skip_whitespace($reader);
-    }
-    
-    $reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader);
-
-    $model .= ")";
-
-    if ($reader->match('*')) {
-        $model .= "*";
-    }
-    
-    return $model;
-}
-
-# [[47]] Children ::= ChoiceOrSeq Cardinality?
-# [[48]] Cp ::= ( QName | ChoiceOrSeq ) Cardinality?
-#       ChoiceOrSeq ::= '(' S* Cp ( Choice | Seq )? S* ')'
-# [[49]] Choice ::= ( S* '|' S* Cp )+
-# [[50]] Seq    ::= ( S* ',' S* Cp )+
-#        // Children ::= (Choice | Seq) Cardinality?
-#        // Cp ::= ( QName | Choice | Seq) Cardinality?
-#        // Choice ::= '(' S* Cp ( S* '|' S* Cp )+ S* ')'
-#        // Seq    ::= '(' S* Cp ( S* ',' S* Cp )* S* ')'
-# [[51]] Mixed ::= ( '(' S* PCDATA ( S* '|' S* QName )* S* ')' MixedCardinality )
-#                | ( '(' S* PCDATA S* ')' )
-#        Cardinality ::= '?' | '+' | '*'
-#        MixedCardinality ::= '*'
-sub children {
-    my ($self, $reader) = @_;
-    
-    return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
-}
-
-sub ChoiceOrSeq {
-    my ($self, $reader) = @_;
-    
-    $reader->match('(') or $self->parser_error("choice/seq contains no opening bracket", $reader);
-    
-    my $model = '(';
-    
-    $self->skip_whitespace($reader);
-
-    $model .= $self->Cp($reader);
-    
-    if (my $choice = $self->Choice($reader)) {
-        $model .= $choice;
-    }
-    else {
-        $model .= $self->Seq($reader);
-    }
-
-    $self->skip_whitespace($reader);
-
-    $reader->match(')') or $self->parser_error("choice/seq contains no closing bracket", $reader);
-
-    $model .= ')';
-    
-    return $model;
-}
-
-sub Cardinality {
-    my ($self, $reader) = @_;
-    # cardinality is always optional
-    my $data = $reader->data;
-    if ($data =~ /^([\?\+\*])/) {
-        $reader->move_along(1);
-        return $1;
-    }
-    return '';
-}
-
-sub Cp {
-    my ($self, $reader) = @_;
-
-    my $model;
-    my $name = eval
-    {
-	if (my $name = $self->Name($reader)) {
-	    return $name . $self->Cardinality($reader);
-	}
-    };
-    return $name if defined $name;
-    return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
-}
-
-sub Choice {
-    my ($self, $reader) = @_;
-    
-    my $model = '';
-    $self->skip_whitespace($reader);
-    
-    while ($reader->match('|')) {
-        $self->skip_whitespace($reader);
-        $model .= '|';
-        $model .= $self->Cp($reader);
-        $self->skip_whitespace($reader);
-    }
-
-    return $model;
-}
-
-sub Seq {
-    my ($self, $reader) = @_;
-    
-    my $model = '';
-    $self->skip_whitespace($reader);
-    
-    while ($reader->match(',')) {
-        $self->skip_whitespace($reader);
-        my $cp = $self->Cp($reader);
-        if ($cp) {
-            $model .= ',';
-            $model .= $cp;
-        }
-        $self->skip_whitespace($reader);
-    }
-
-    return $model;
-}
-
-sub AttlistDecl {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(9);
-    if ($data =~ /^<!ATTLIST/) {
-        # It's an attlist
-        
-        $reader->move_along(9);
-        
-        $self->skip_whitespace($reader) || 
-            $self->parser_error("No whitespace after ATTLIST declaration", $reader);
-        my $name = $self->Name($reader);
-
-        $self->AttDefList($reader, $name);
-
-        $self->skip_whitespace($reader);
-        
-        $reader->match('>') or $self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader);
-        
-        return 1;
-    }
-    
-    return 0;
-}
-
-sub AttDefList {
-    my ($self, $reader, $name) = @_;
-
-    1 while $self->AttDef($reader, $name);
-}
-
-sub AttDef {
-    my ($self, $reader, $el_name) = @_;
-
-    $self->skip_whitespace($reader) || return 0;
-    my $att_name = $self->Name($reader) || return 0;
-    $self->skip_whitespace($reader) || 
-        $self->parser_error("No whitespace after Name in attribute definition", $reader);
-    my $att_type = $self->AttType($reader);
-
-    $self->skip_whitespace($reader) ||
-        $self->parser_error("No whitespace after AttType in attribute definition", $reader);
-    my ($mode, $value) = $self->DefaultDecl($reader);
-    
-    # fire SAX event here!
-    $self->attribute_decl({
-            eName => $el_name, 
-            aName => $att_name, 
-            Type => $att_type, 
-            Mode => $mode, 
-            Value => $value,
-            });
-    return 1;
-}
-
-sub AttType {
-    my ($self, $reader) = @_;
-
-    return $self->StringType($reader) ||
-            $self->TokenizedType($reader) ||
-            $self->EnumeratedType($reader) ||
-            $self->parser_error("Can't match AttType", $reader);
-}
-
-sub StringType {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(5);
-    return unless $data =~ /^CDATA/;
-    $reader->move_along(5);
-    return 'CDATA';
-}
-
-sub TokenizedType {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(8);
-    if ($data =~ /^(IDREFS?|ID|ENTITIES|ENTITY|NMTOKENS?)/) {
-        $reader->move_along(length($1));
-        return $1;
-    }
-    return;
-}
-
-sub EnumeratedType {
-    my ($self, $reader) = @_;
-    return $self->NotationType($reader) || $self->Enumeration($reader);
-}
-
-sub NotationType {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(8);
-    return unless $data =~ /^NOTATION/;
-    $reader->move_along(8);
-    
-    $self->skip_whitespace($reader) ||
-        $self->parser_error("No whitespace after NOTATION", $reader);
-    $reader->match('(') or $self->parser_error("No opening bracket in notation section", $reader);
-    
-    $self->skip_whitespace($reader);
-    my $model = 'NOTATION (';
-    my $name = $self->Name($reader) ||
-        $self->parser_error("No name in notation section", $reader);
-    $model .= $name;
-    $self->skip_whitespace($reader);
-    $data = $reader->data;
-    while ($data =~ /^\|/) {
-        $reader->move_along(1);
-        $model .= '|';
-        $self->skip_whitespace($reader);
-        my $name = $self->Name($reader) ||
-            $self->parser_error("No name in notation section", $reader);
-        $model .= $name;
-        $self->skip_whitespace($reader);
-        $data = $reader->data;
-    }
-    $data =~ /^\)/ or $self->parser_error("No closing bracket in notation section", $reader);
-    $reader->move_along(1);
-    
-    $model .= ')';
-
-    return $model;
-}
-
-sub Enumeration {
-    my ($self, $reader) = @_;
-    
-    return unless $reader->match('(');
-    
-    $self->skip_whitespace($reader);
-    my $model = '(';
-    my $nmtoken = $self->Nmtoken($reader) ||
-        $self->parser_error("No Nmtoken in enumerated declaration", $reader);
-    $model .= $nmtoken;
-    $self->skip_whitespace($reader);
-    my $data = $reader->data;
-    while ($data =~ /^\|/) {
-        $model .= '|';
-        $reader->move_along(1);
-        $self->skip_whitespace($reader);
-        my $nmtoken = $self->Nmtoken($reader) ||
-            $self->parser_error("No Nmtoken in enumerated declaration", $reader);
-        $model .= $nmtoken;
-        $self->skip_whitespace($reader);
-        $data = $reader->data;
-    }
-    $data =~ /^\)/ or $self->parser_error("No closing bracket in enumerated declaration", $reader);
-    $reader->move_along(1);
-    
-    $model .= ')';
-
-    return $model;
-}
-
-sub Nmtoken {
-    my ($self, $reader) = @_;
-    return $self->Name($reader);
-}
-
-sub DefaultDecl {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(9);
-    if ($data =~ /^(\#REQUIRED|\#IMPLIED)/) {
-        $reader->move_along(length($1));
-        return $1;
-    }
-    my $model = '';
-    if ($data =~ /^\#FIXED/) {
-        $reader->move_along(6);
-        $self->skip_whitespace($reader) || $self->parser_error(
-                "no whitespace after FIXED specifier", $reader);
-        my $value = $self->AttValue($reader);
-        return "#FIXED", $value;
-    }
-    my $value = $self->AttValue($reader);
-    return undef, $value;
-}
-
-sub EntityDecl {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(8);
-    return 0 unless $data =~ /^<!ENTITY/;
-    $reader->move_along(8);
-    
-    $self->skip_whitespace($reader) || $self->parser_error(
-        "No whitespace after ENTITY declaration", $reader);
-    
-    $self->PEDecl($reader) || $self->GEDecl($reader);
-    
-    $self->skip_whitespace($reader);
-    
-    $reader->match('>') or $self->parser_error("No closing '>' in entity definition", $reader);
-    
-    return 1;
-}
-
-sub GEDecl {
-    my ($self, $reader) = @_;
-
-    my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader);
-    $self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader);
-
-    # TODO: ExternalID calls lexhandler method. Wrong place for it.
-    my $value;
-    if ($value = $self->ExternalID($reader)) {
-        $value .= $self->NDataDecl($reader);
-    }
-    else {
-        $value = $self->EntityValue($reader);
-    }
-
-    if ($self->{ParseOptions}{entities}{$name}) {
-        warn("entity $name already exists\n");
-    } else {
-        $self->{ParseOptions}{entities}{$name} = 1;
-        $self->{ParseOptions}{expanded_entity}{$name} = $value; # ???
-    }
-    # do callback?
-    return 1;
-}
-
-sub PEDecl {
-    my ($self, $reader) = @_;
-    
-    return 0 unless $reader->match('%');
-
-    $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader);
-    my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader);
-    $self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader);
-    my $value = $self->ExternalID($reader) ||
-                $self->EntityValue($reader) ||
-                $self->parser_error("PE is not a value or an external resource", $reader);
-    # do callback?
-    return 1;
-}
-
-my $quotre = qr/[^%&\"]/;
-my $aposre = qr/[^%&\']/;
-
-sub EntityValue {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data;
-    my $quote = '"';
-    my $re = $quotre;
-    if (!$data =~ /^"/) {
-        $data =~ /^'/ or $self->parser_error("Not a quote character", $reader);
-        $quote = "'";
-        $re = $aposre;
-    }
-    $reader->move_along(1);
-    
-    my $value = '';
-    
-    while (1) {
-        my $data = $reader->data;
-
-        $self->parser_error("EOF found while reading entity value", $reader)
-            unless length($data);
-        
-        if ($data =~ /^($re+)/) {
-            my $match = $1;
-            $value .= $match;
-            $reader->move_along(length($match));
-        }
-        elsif ($reader->match('&')) {
-            # if it's a char ref, expand now:
-            if ($reader->match('#')) {
-                my $char;
-                my $ref = '';
-                if ($reader->match('x')) {
-                    my $data = $reader->data;
-                    while (1) {
-                        $self->parser_error("EOF looking for reference end", $reader)
-                            unless length($data);
-                        if ($data !~ /^([0-9a-fA-F]*)/) {
-                            last;
-                        }
-                        $ref .= $1;
-                        $reader->move_along(length($1));
-                        if (length($1) == length($data)) {
-                            $data = $reader->data;
-                        }
-                        else {
-                            last;
-                        }
-                    }
-                    $char = chr_ref(hex($ref));
-                    $ref = "x$ref";
-                }
-                else {
-                    my $data = $reader->data;
-                    while (1) {
-                        $self->parser_error("EOF looking for reference end", $reader)
-                            unless length($data);
-                        if ($data !~ /^([0-9]*)/) {
-                            last;
-                        }
-                        $ref .= $1;
-                        $reader->move_along(length($1));
-                        if (length($1) == length($data)) {
-                            $data = $reader->data;
-                        }
-                        else {
-                            last;
-                        }
-                    }
-                    $char = chr($ref);
-                }
-                $reader->match(';') ||
-                    $self->parser_error("No semi-colon found after character reference", $reader);
-                if ($char !~ $SingleChar) { # match a single character
-                    $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader);
-                }
-                $value .= $char;
-            }
-            else {
-                # entity refs in entities get expanded later, so don't parse now.
-                $value .= '&';
-            }
-        }
-        elsif ($reader->match('%')) {
-            $value .= $self->PEReference($reader);
-        }
-        elsif ($reader->match($quote)) {
-            # end of attrib
-            last;
-        }
-        else {
-            $self->parser_error("Invalid character in attribute value: " . substr($reader->data, 0, 1), $reader);
-        }
-    }
-    
-    return $value;
-}
-
-sub NDataDecl {
-    my ($self, $reader) = @_;
-    $self->skip_whitespace($reader) || return '';
-    my $data = $reader->data(5);
-    return '' unless $data =~ /^NDATA/;
-    $reader->move_along(5);
-    $self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader);
-    my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader);
-    return " NDATA $name";
-}
-
-sub NotationDecl {
-    my ($self, $reader) = @_;
-    
-    my $data = $reader->data(10);
-    return 0 unless $data =~ /^<!NOTATION/;
-    $reader->move_along(10);
-    $self->skip_whitespace($reader) ||
-        $self->parser_error("No whitespace after NOTATION declaration", $reader);
-    $data = $reader->data;
-    my $value = '';
-    while(1) {
-        $self->parser_error("EOF found while looking for end of NotationDecl", $reader)
-            unless length($data);
-        
-        if ($data =~ /^([^>]*)>/) {
-            $value .= $1;
-            $reader->move_along(length($1) + 1);
-            $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" });
-            last;
-        }
-        else {
-            $value .= $data;
-            $reader->move_along(length($data));
-            $data = $reader->data;
-        }
-    }
-    return 1;
-}
-
-1;