common/tools/raptor/XML/SAX/PurePerl.pm
changeset 307 989c70555820
child 765 2892c791ce6c
equal deleted inserted replaced
300:228efacd68af 307:989c70555820
       
     1 # $Id: PurePerl.pm,v 1.21 2007/02/07 09:33:50 grant Exp $
       
     2 
       
     3 package XML::SAX::PurePerl;
       
     4 
       
     5 use strict;
       
     6 use vars qw/$VERSION/;
       
     7 
       
     8 $VERSION = '0.91';
       
     9 
       
    10 use XML::SAX::PurePerl::Productions qw($Any $CharMinusDash $SingleChar);
       
    11 use XML::SAX::PurePerl::Reader;
       
    12 use XML::SAX::PurePerl::EncodingDetect ();
       
    13 use XML::SAX::Exception;
       
    14 use XML::SAX::PurePerl::DocType ();
       
    15 use XML::SAX::PurePerl::DTDDecls ();
       
    16 use XML::SAX::PurePerl::XMLDecl ();
       
    17 use XML::SAX::DocumentLocator ();
       
    18 use XML::SAX::Base ();
       
    19 use XML::SAX qw(Namespaces);
       
    20 use XML::NamespaceSupport ();
       
    21 use IO::File;
       
    22 
       
    23 if ($] < 5.006) {
       
    24     require XML::SAX::PurePerl::NoUnicodeExt;
       
    25 }
       
    26 else {
       
    27     require XML::SAX::PurePerl::UnicodeExt;
       
    28 }
       
    29 
       
    30 use vars qw(@ISA);
       
    31 @ISA = ('XML::SAX::Base');
       
    32 
       
    33 my %int_ents = (
       
    34         amp => '&',
       
    35         lt => '<',
       
    36         gt => '>',
       
    37         quot => '"',
       
    38         apos => "'",
       
    39         );
       
    40 
       
    41 my $xmlns_ns = "http://www.w3.org/2000/xmlns/";
       
    42 my $xml_ns = "http://www.w3.org/XML/1998/namespace";
       
    43 
       
    44 use Carp;
       
    45 sub _parse_characterstream {
       
    46     my $self = shift;
       
    47     my ($fh) = @_;
       
    48     confess("CharacterStream is not yet correctly implemented");
       
    49     my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
       
    50     return $self->_parse($reader);
       
    51 }
       
    52 
       
    53 sub _parse_bytestream {
       
    54     my $self = shift;
       
    55     my ($fh) = @_;
       
    56     my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
       
    57     return $self->_parse($reader);
       
    58 }
       
    59 
       
    60 sub _parse_string {
       
    61     my $self = shift;
       
    62     my ($str) = @_;
       
    63     my $reader = XML::SAX::PurePerl::Reader::String->new($str);
       
    64     return $self->_parse($reader);
       
    65 }
       
    66 
       
    67 sub _parse_systemid {
       
    68     my $self = shift;
       
    69     my ($uri) = @_;
       
    70     my $reader = XML::SAX::PurePerl::Reader::URI->new($uri);
       
    71     return $self->_parse($reader);
       
    72 }
       
    73 
       
    74 sub _parse {
       
    75     my ($self, $reader) = @_;
       
    76     
       
    77     $reader->public_id($self->{ParseOptions}{Source}{PublicId});
       
    78     $reader->system_id($self->{ParseOptions}{Source}{SystemId});
       
    79 
       
    80     $self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
       
    81 
       
    82     $self->set_document_locator(
       
    83         XML::SAX::DocumentLocator->new(
       
    84             sub { $reader->public_id },
       
    85             sub { $reader->system_id },
       
    86             sub { $reader->line },
       
    87             sub { $reader->column },
       
    88             sub { $reader->get_encoding },
       
    89             sub { $reader->get_xml_version },
       
    90         ),
       
    91     );
       
    92     
       
    93     $self->start_document({});
       
    94 
       
    95     if (defined $self->{ParseOptions}{Source}{Encoding}) {
       
    96         $reader->set_encoding($self->{ParseOptions}{Source}{Encoding});
       
    97     }
       
    98     else {
       
    99         $self->encoding_detect($reader);
       
   100     }
       
   101     
       
   102     # parse a document
       
   103     $self->document($reader);
       
   104     
       
   105     return $self->end_document({});
       
   106 }
       
   107 
       
   108 sub parser_error {
       
   109     my $self = shift;
       
   110     my ($error, $reader) = @_;
       
   111     
       
   112 # warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n");
       
   113     my $exception = XML::SAX::Exception::Parse->new(
       
   114                 Message => $error,
       
   115                 ColumnNumber => $reader->column,
       
   116                 LineNumber => $reader->line,
       
   117                 PublicId => $reader->public_id,
       
   118                 SystemId => $reader->system_id,
       
   119             );
       
   120 
       
   121     $self->fatal_error($exception);
       
   122     $exception->throw;
       
   123 }
       
   124 
       
   125 sub document {
       
   126     my ($self, $reader) = @_;
       
   127     
       
   128     # document ::= prolog element Misc*
       
   129     
       
   130     $self->prolog($reader);
       
   131     $self->element($reader) ||
       
   132         $self->parser_error("Document requires an element", $reader);
       
   133     
       
   134     while(length($reader->data)) {
       
   135         $self->Misc($reader) || 
       
   136                 $self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader);
       
   137     }
       
   138 }
       
   139 
       
   140 sub prolog {
       
   141     my ($self, $reader) = @_;
       
   142     
       
   143     $self->XMLDecl($reader);
       
   144     
       
   145     # consume all misc bits
       
   146     1 while($self->Misc($reader));
       
   147     
       
   148     if ($self->doctypedecl($reader)) {
       
   149         while (length($reader->data)) {
       
   150             $self->Misc($reader) || last;
       
   151         }
       
   152     }
       
   153 }
       
   154 
       
   155 sub element {
       
   156     my ($self, $reader) = @_;
       
   157     
       
   158     return 0 unless $reader->match('<');
       
   159     
       
   160     my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader);
       
   161     
       
   162     my %attribs;
       
   163     
       
   164     while( my ($k, $v) = $self->Attribute($reader) ) {
       
   165         $attribs{$k} = $v;
       
   166     }
       
   167     
       
   168     my $have_namespaces = $self->get_feature(Namespaces);
       
   169     
       
   170     # Namespace processing
       
   171     $self->{NSHelper}->push_context;
       
   172     my @new_ns;
       
   173 #        my %attrs = @attribs;
       
   174 #        while (my ($k,$v) = each %attrs) {
       
   175     if ($have_namespaces) {
       
   176         while ( my ($k, $v) = each %attribs ) {
       
   177             if ($k =~ m/^xmlns(:(.*))?$/) {
       
   178                 my $prefix = $2 || '';
       
   179                 $self->{NSHelper}->declare_prefix($prefix, $v);
       
   180                 my $ns = 
       
   181                     {
       
   182                         Prefix       => $prefix,
       
   183                         NamespaceURI => $v,
       
   184                     };
       
   185                 push @new_ns, $ns;
       
   186                 $self->SUPER::start_prefix_mapping($ns);
       
   187             }
       
   188         }
       
   189     }
       
   190 
       
   191     # Create element object and fire event
       
   192     my %attrib_hash;
       
   193     while (my ($name, $value) = each %attribs ) {
       
   194         # TODO normalise value here
       
   195         my ($ns, $prefix, $lname);
       
   196         if ($have_namespaces) {
       
   197             ($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name);
       
   198         }
       
   199         $ns ||= ''; $prefix ||= ''; $lname ||= '';
       
   200         $attrib_hash{"{$ns}$lname"} = {
       
   201             Name => $name,
       
   202             LocalName => $lname,
       
   203             Prefix => $prefix,
       
   204             NamespaceURI => $ns,
       
   205             Value => $value,
       
   206         };
       
   207     }
       
   208     
       
   209     %attribs = (); # lose the memory since we recurse deep
       
   210     
       
   211     my ($ns, $prefix, $lname);
       
   212     if ($self->get_feature(Namespaces)) {
       
   213         ($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name);
       
   214     }
       
   215     else {
       
   216         $lname = $name;
       
   217     }
       
   218     $ns ||= ''; $prefix ||= ''; $lname ||= '';
       
   219 
       
   220     # Process remainder of start_element
       
   221     $self->skip_whitespace($reader);
       
   222     my $have_content;
       
   223     my $data = $reader->data(2);
       
   224     if ($data =~ /^\/>/) {
       
   225         $reader->move_along(2);
       
   226     }
       
   227     else {
       
   228         $data =~ /^>/ or $self->parser_error("No close element tag", $reader);
       
   229         $reader->move_along(1);
       
   230         $have_content++;
       
   231     }
       
   232     
       
   233     my $el = 
       
   234     {
       
   235         Name => $name,
       
   236         LocalName => $lname,
       
   237         Prefix => $prefix,
       
   238         NamespaceURI => $ns,
       
   239         Attributes => \%attrib_hash,
       
   240     };
       
   241     $self->start_element($el);
       
   242     
       
   243     # warn("($name\n");
       
   244     
       
   245     if ($have_content) {
       
   246         $self->content($reader);
       
   247         
       
   248         my $data = $reader->data(2);
       
   249         $data =~ /^<\// or $self->parser_error("No close tag marker", $reader);
       
   250         $reader->move_along(2);
       
   251         my $end_name = $self->Name($reader);
       
   252         $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader);
       
   253         $self->skip_whitespace($reader);
       
   254         $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader);
       
   255     }
       
   256         
       
   257     my %end_el = %$el;
       
   258     delete $end_el{Attributes};
       
   259     $self->end_element(\%end_el);
       
   260 
       
   261     for my $ns (@new_ns) {
       
   262         $self->end_prefix_mapping($ns);
       
   263     }
       
   264     $self->{NSHelper}->pop_context;
       
   265     
       
   266     return 1;
       
   267 }
       
   268 
       
   269 sub content {
       
   270     my ($self, $reader) = @_;
       
   271     
       
   272     while (1) {
       
   273         $self->CharData($reader);
       
   274         
       
   275         my $data = $reader->data(2);
       
   276         
       
   277         if ($data =~ /^<\//) {
       
   278             return 1;
       
   279         }
       
   280         elsif ($data =~ /^&/) {
       
   281             $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader);
       
   282             next;
       
   283         }
       
   284         elsif ($data =~ /^<!/) {
       
   285             ($self->CDSect($reader)
       
   286              or
       
   287              $self->Comment($reader))
       
   288              and next;
       
   289         }
       
   290         elsif ($data =~ /^<\?/) {
       
   291             $self->PI($reader) and next;
       
   292         }
       
   293         elsif ($data =~ /^</) {
       
   294             $self->element($reader) and next;
       
   295         }
       
   296         last;
       
   297     }
       
   298     
       
   299     return 1;
       
   300 }
       
   301 
       
   302 sub CDSect {
       
   303     my ($self, $reader) = @_;
       
   304     
       
   305     my $data = $reader->data(9);
       
   306     return 0 unless $data =~ /^<!\[CDATA\[/;
       
   307     $reader->move_along(9);
       
   308     
       
   309     $self->start_cdata({});
       
   310     
       
   311     $data = $reader->data;
       
   312     while (1) {
       
   313         $self->parser_error("EOF looking for CDATA section end", $reader)
       
   314             unless length($data);
       
   315         
       
   316         if ($data =~ /^(.*?)\]\]>/s) {
       
   317             my $chars = $1;
       
   318             $reader->move_along(length($chars) + 3);
       
   319             $self->characters({Data => $chars});
       
   320             last;
       
   321         }
       
   322         else {
       
   323             $self->characters({Data => $data});
       
   324             $reader->move_along(length($data));
       
   325             $data = $reader->data;
       
   326         }
       
   327     }
       
   328     $self->end_cdata({});
       
   329     return 1;
       
   330 }
       
   331 
       
   332 sub CharData {
       
   333     my ($self, $reader) = @_;
       
   334     
       
   335     my $data = $reader->data;
       
   336     
       
   337     while (1) {
       
   338         return unless length($data);
       
   339         
       
   340         if ($data =~ /^([^<&]*)[<&]/s) {
       
   341             my $chars = $1;
       
   342             $self->parser_error("String ']]>' not allowed in character data", $reader)
       
   343                 if $chars =~ /\]\]>/;
       
   344             $reader->move_along(length($chars));
       
   345             $self->characters({Data => $chars}) if length($chars);
       
   346             last;
       
   347         }
       
   348         else {
       
   349             $self->characters({Data => $data});
       
   350             $reader->move_along(length($data));
       
   351             $data = $reader->data;
       
   352         }
       
   353     }
       
   354 }
       
   355 
       
   356 sub Misc {
       
   357     my ($self, $reader) = @_;
       
   358     if ($self->Comment($reader)) {
       
   359         return 1;
       
   360     }
       
   361     elsif ($self->PI($reader)) {
       
   362         return 1;
       
   363     }
       
   364     elsif ($self->skip_whitespace($reader)) {
       
   365         return 1;
       
   366     }
       
   367     
       
   368     return 0;
       
   369 }
       
   370 
       
   371 sub Reference {
       
   372     my ($self, $reader) = @_;
       
   373     
       
   374     return 0 unless $reader->match('&');
       
   375     
       
   376     my $data = $reader->data;
       
   377     
       
   378     if ($data =~ /^#x([0-9a-fA-F]+);/) {
       
   379         my $ref = $1;
       
   380         $reader->move_along(length($ref) + 3);
       
   381         my $char = chr_ref(hex($ref));
       
   382         $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
       
   383             unless $char =~ /$SingleChar/o;
       
   384         $self->characters({ Data => $char });
       
   385         return 1;
       
   386     }
       
   387     elsif ($data =~ /^#([0-9]+);/) {
       
   388         my $ref = $1;
       
   389         $reader->move_along(length($ref) + 2);
       
   390         my $char = chr_ref($ref);
       
   391         $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
       
   392             unless $char =~ /$SingleChar/o;
       
   393         $self->characters({ Data => $char });
       
   394         return 1;
       
   395     }
       
   396     else {
       
   397         # EntityRef
       
   398         my $name = $self->Name($reader)
       
   399             || $self->parser_error("Invalid name in entity", $reader);
       
   400         $reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader);
       
   401         
       
   402         # warn("got entity: \&$name;\n");
       
   403         
       
   404         # expand it
       
   405         if ($self->_is_entity($name)) {
       
   406             
       
   407             if ($self->_is_external($name)) {
       
   408                 my $value = $self->_get_entity($name);
       
   409                 my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value);
       
   410                 $self->encoding_detect($ent_reader);
       
   411                 $self->extParsedEnt($ent_reader);
       
   412             }
       
   413             else {
       
   414                 my $value = $self->_stringify_entity($name);
       
   415                 my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value);
       
   416                 $self->content($ent_reader);
       
   417             }
       
   418             return 1;
       
   419         }
       
   420         elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) {
       
   421             $self->characters({ Data => $int_ents{$name} });
       
   422             return 1;
       
   423         }
       
   424         else {
       
   425             $self->parser_error("Undeclared entity", $reader);
       
   426         }
       
   427     }
       
   428 }
       
   429 
       
   430 sub AttReference {
       
   431     my ($self, $name, $reader) = @_;
       
   432     if ($name =~ /^#x([0-9a-fA-F]+)$/) {
       
   433         my $chr = chr_ref(hex($1));
       
   434         $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
       
   435         return $chr;
       
   436     }
       
   437     elsif ($name =~ /^#([0-9]+)$/) {
       
   438         my $chr = chr_ref($1);
       
   439         $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
       
   440         return $chr;
       
   441     }
       
   442     else {
       
   443         if ($self->_is_entity($name)) {
       
   444             if ($self->_is_external($name)) {
       
   445                 $self->parser_error("No external entity references allowed in attribute values", $reader);
       
   446             }
       
   447             else {
       
   448                 my $value = $self->_stringify_entity($name);
       
   449                 return $value;
       
   450             }
       
   451         }
       
   452         elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) {
       
   453             return $int_ents{$name};
       
   454         }
       
   455         else {
       
   456             $self->parser_error("Undeclared entity '$name'", $reader);
       
   457         }
       
   458     }
       
   459 }
       
   460 
       
   461 sub extParsedEnt {
       
   462     my ($self, $reader) = @_;
       
   463     
       
   464     $self->TextDecl($reader);
       
   465     $self->content($reader);
       
   466 }
       
   467 
       
   468 sub _is_external {
       
   469     my ($self, $name) = @_;
       
   470 # TODO: Fix this to use $reader to store the entities perhaps.
       
   471     if ($self->{ParseOptions}{external_entities}{$name}) {
       
   472         return 1;
       
   473     }
       
   474     return ;
       
   475 }
       
   476 
       
   477 sub _is_entity {
       
   478     my ($self, $name) = @_;
       
   479 # TODO: ditto above
       
   480     if (exists $self->{ParseOptions}{entities}{$name}) {
       
   481         return 1;
       
   482     }
       
   483     return 0;
       
   484 }
       
   485 
       
   486 sub _stringify_entity {
       
   487     my ($self, $name) = @_;
       
   488 # TODO: ditto above
       
   489     if (exists $self->{ParseOptions}{expanded_entity}{$name}) {
       
   490         return $self->{ParseOptions}{expanded_entity}{$name};
       
   491     }
       
   492     # expand
       
   493     my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name});
       
   494     my $ent = '';
       
   495     while(1) {
       
   496         my $data = $reader->data;
       
   497         $ent .= $data;
       
   498         $reader->move_along(length($data)) or last;
       
   499     }
       
   500     return $self->{ParseOptions}{expanded_entity}{$name} = $ent;
       
   501 }
       
   502 
       
   503 sub _get_entity {
       
   504     my ($self, $name) = @_;
       
   505 # TODO: ditto above
       
   506     return $self->{ParseOptions}{entities}{$name};
       
   507 }
       
   508 
       
   509 sub skip_whitespace {
       
   510     my ($self, $reader) = @_;
       
   511     
       
   512     my $data = $reader->data;
       
   513     
       
   514     my $found = 0;
       
   515     while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) {
       
   516         last unless length($1);
       
   517         $found++;
       
   518         $reader->move_along(length($1));
       
   519         $data = $reader->data;
       
   520     }
       
   521     
       
   522     return $found;
       
   523 }
       
   524 
       
   525 sub Attribute {
       
   526     my ($self, $reader) = @_;
       
   527     
       
   528     $self->skip_whitespace($reader) || return;
       
   529     
       
   530     my $data = $reader->data(2);
       
   531     return if $data =~ /^\/?>/;
       
   532     
       
   533     if (my $name = $self->Name($reader)) {
       
   534         $self->skip_whitespace($reader);
       
   535         $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader);
       
   536         $self->skip_whitespace($reader);
       
   537         my $value = $self->AttValue($reader);
       
   538 
       
   539         if (!$self->cdata_attrib($name)) {
       
   540             $value =~ s/^\x20*//; # discard leading spaces
       
   541             $value =~ s/\x20*$//; # discard trailing spaces
       
   542             $value =~ s/ {1,}/ /g; # all >1 space to single space
       
   543         }
       
   544         
       
   545         return $name, $value;
       
   546     }
       
   547     
       
   548     return;
       
   549 }
       
   550 
       
   551 sub cdata_attrib {
       
   552     # TODO implement this!
       
   553     return 1;
       
   554 }
       
   555 
       
   556 sub AttValue {
       
   557     my ($self, $reader) = @_;
       
   558     
       
   559     my $quote = $self->quote($reader);
       
   560     
       
   561     my $value = '';
       
   562     
       
   563     while (1) {
       
   564         my $data = $reader->data;
       
   565         $self->parser_error("EOF found while looking for the end of attribute value", $reader)
       
   566             unless length($data);
       
   567         if ($data =~ /^([^$quote]*)$quote/) {
       
   568             $reader->move_along(length($1) + 1);
       
   569             $value .= $1;
       
   570             last;
       
   571         }
       
   572         else {
       
   573             $value .= $data;
       
   574             $reader->move_along(length($data));
       
   575         }
       
   576     }
       
   577     
       
   578     if ($value =~ /</) {
       
   579         $self->parser_error("< character not allowed in attribute values", $reader);
       
   580     }
       
   581     
       
   582     $value =~ s/[\x09\x0A\x0D]/\x20/g;
       
   583     $value =~ s/&(#(x[0-9a-fA-F]+)|([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo;
       
   584     
       
   585     return $value;
       
   586 }
       
   587 
       
   588 sub Comment {
       
   589     my ($self, $reader) = @_;
       
   590     
       
   591     my $data = $reader->data(4);
       
   592     if ($data =~ /^<!--/) {
       
   593         $reader->move_along(4);
       
   594         my $comment_str = '';
       
   595         while (1) {
       
   596             my $data = $reader->data;
       
   597             $self->parser_error("End of data seen while looking for close comment marker", $reader)
       
   598                 unless length($data);
       
   599             if ($data =~ /^(.*?)-->/s) {
       
   600                 $comment_str .= $1;
       
   601                 $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/;
       
   602                 $reader->move_along(length($1) + 3);
       
   603                 last;
       
   604             }
       
   605             else {
       
   606                 $comment_str .= $data;
       
   607                 $reader->move_along(length($data));
       
   608             }
       
   609         }
       
   610         
       
   611         $self->comment({ Data => $comment_str });
       
   612         
       
   613         return 1;
       
   614     }
       
   615     return 0;
       
   616 }
       
   617 
       
   618 sub PI {
       
   619     my ($self, $reader) = @_;
       
   620     
       
   621     my $data = $reader->data(2);
       
   622     
       
   623     if ($data =~ /^<\?/) {
       
   624         $reader->move_along(2);
       
   625         my ($target, $data);
       
   626         $target = $self->Name($reader) ||
       
   627             $self->parser_error("PI has no target", $reader);
       
   628         if ($self->skip_whitespace($reader)) {
       
   629             $target = '';
       
   630             while (1) {
       
   631                 my $data = $reader->data;
       
   632                 $self->parser_error("End of data seen while looking for close PI marker", $reader)
       
   633                     unless length($data);
       
   634                 if ($data =~ /^(.*?)\?>/s) {
       
   635                     $target .= $1;
       
   636                     $reader->move_along(length($1) + 2);
       
   637                     last;
       
   638                 }
       
   639                 else {
       
   640                     $target .= $data;
       
   641                     $reader->move_along(length($data));
       
   642                 }
       
   643             }
       
   644         }
       
   645         else {
       
   646             my $data = $reader->data(2);
       
   647             $data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader);
       
   648             $reader->move_along(2);
       
   649         }
       
   650         $self->processing_instruction({ Target => $target, Data => $data });
       
   651         
       
   652         return 1;
       
   653     }
       
   654     return 0;
       
   655 }
       
   656 
       
   657 sub Name {
       
   658     my ($self, $reader) = @_;
       
   659     
       
   660     my $name = '';
       
   661     while(1) {
       
   662         my $data = $reader->data;
       
   663         return unless length($data);
       
   664         $data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*]*)/ or return;
       
   665         $name .= $1;
       
   666         my $len = length($1);
       
   667         $reader->move_along($len);
       
   668         last if ($len != length($data));
       
   669     }
       
   670     
       
   671     return unless length($name);
       
   672     
       
   673     $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
       
   674 
       
   675     return $name;
       
   676 }
       
   677 
       
   678 sub quote {
       
   679     my ($self, $reader) = @_;
       
   680     
       
   681     my $data = $reader->data;
       
   682     
       
   683     $data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader);
       
   684     $reader->move_along(1);
       
   685     return $1;
       
   686 }
       
   687 
       
   688 1;
       
   689 __END__
       
   690 
       
   691 =head1 NAME
       
   692 
       
   693 XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface
       
   694 
       
   695 =head1 SYNOPSIS
       
   696 
       
   697   use XML::Handler::Foo;
       
   698   use XML::SAX::PurePerl;
       
   699   my $handler = XML::Handler::Foo->new();
       
   700   my $parser = XML::SAX::PurePerl->new(Handler => $handler);
       
   701   $parser->parse_uri("myfile.xml");
       
   702 
       
   703 =head1 DESCRIPTION
       
   704 
       
   705 This module implements an XML parser in pure perl. It is written around the
       
   706 upcoming perl 5.8's unicode support and support for multiple document 
       
   707 encodings (using the PerlIO layer), however it has been ported to work with
       
   708 ASCII/UTF8 documents under lower perl versions.
       
   709 
       
   710 The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in
       
   711 the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a
       
   712 better location soon.
       
   713 
       
   714 Please refer to the SAX2 documentation for how to use this module - it is merely a
       
   715 front end to SAX2, and implements nothing that is not in that spec (or at least tries
       
   716 not to - please email me if you find errors in this implementation).
       
   717 
       
   718 =head1 BUGS
       
   719 
       
   720 XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else
       
   721 in fact. However it is great as a fallback parser for XML::SAX, where the
       
   722 user might not be able to install an XS based parser or C library.
       
   723 
       
   724 Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations,
       
   725 though the code is in place to start doing this. Also parsing parameter entity
       
   726 references is causing me much confusion, since it's not exactly what I would call
       
   727 trivial, or well documented in the XML grammar. XML documents with internal subsets
       
   728 are likely to fail.
       
   729 
       
   730 I am however trying to work towards full conformance using the Oasis test suite.
       
   731 
       
   732 =head1 AUTHOR
       
   733 
       
   734 Matt Sergeant, matt@sergeant.org. Copyright 2001.
       
   735 
       
   736 Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com.
       
   737 
       
   738 =head1 LICENSE
       
   739 
       
   740 This is free software. You may use it or redistribute it under the same terms as
       
   741 Perl 5.7.2 itself.
       
   742 
       
   743 =cut
       
   744