uh_parser/XML/SAX/PurePerl.pm
changeset 177 6d3c3db11e72
equal deleted inserted replaced
176:266a7e9b9237 177:6d3c3db11e72
       
     1 # $Id: PurePerl.pm,v 1.21 2007/02/07 09:33:50 grant Exp $
       
     2 
       
     3 package XML::SAX::PurePerl;
       
     4 
       
     5 use strict;
       
     6 use vars qw/$VERSION/;
       
     7 
       
     8 $VERSION = '0.91';
       
     9 
       
    10 use XML::SAX::PurePerl::Productions qw($Any $CharMinusDash $SingleChar);
       
    11 use XML::SAX::PurePerl::Reader;
       
    12 use XML::SAX::PurePerl::EncodingDetect ();
       
    13 use XML::SAX::Exception;
       
    14 use XML::SAX::PurePerl::DocType ();
       
    15 use XML::SAX::PurePerl::DTDDecls ();
       
    16 use XML::SAX::PurePerl::XMLDecl ();
       
    17 use XML::SAX::DocumentLocator ();
       
    18 use XML::SAX::Base ();
       
    19 use XML::SAX qw(Namespaces);
       
    20 use XML::NamespaceSupport ();
       
    21 use IO::File;
       
    22 
       
    23 if ($] < 5.006) {
       
    24     require XML::SAX::PurePerl::NoUnicodeExt;
       
    25 }
       
    26 else {
       
    27     require XML::SAX::PurePerl::UnicodeExt;
       
    28 }
       
    29 
       
    30 use vars qw(@ISA);
       
    31 @ISA = ('XML::SAX::Base');
       
    32 
       
    33 my %int_ents = (
       
    34         amp => '&',
       
    35         lt => '<',
       
    36         gt => '>',
       
    37         quot => '"',
       
    38         apos => "'",
       
    39         );
       
    40 
       
    41 my $xmlns_ns = "http://www.w3.org/2000/xmlns/";
       
    42 my $xml_ns = "http://www.w3.org/XML/1998/namespace";
       
    43 
       
    44 use Carp;
       
    45 sub _parse_characterstream {
       
    46     my $self = shift;
       
    47     my ($fh) = @_;
       
    48     confess("CharacterStream is not yet correctly implemented");
       
    49     my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
       
    50     return $self->_parse($reader);
       
    51 }
       
    52 
       
    53 sub _parse_bytestream {
       
    54     my $self = shift;
       
    55     my ($fh) = @_;
       
    56     my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
       
    57     return $self->_parse($reader);
       
    58 }
       
    59 
       
    60 sub _parse_string {
       
    61     my $self = shift;
       
    62     my ($str) = @_;
       
    63     my $reader = XML::SAX::PurePerl::Reader::String->new($str);
       
    64     return $self->_parse($reader);
       
    65 }
       
    66 
       
    67 sub _parse_systemid {
       
    68     my $self = shift;
       
    69     my ($uri) = @_;
       
    70     my $reader = XML::SAX::PurePerl::Reader::URI->new($uri);
       
    71     return $self->_parse($reader);
       
    72 }
       
    73 
       
    74 sub _parse {
       
    75     my ($self, $reader) = @_;
       
    76     
       
    77     $reader->public_id($self->{ParseOptions}{Source}{PublicId});
       
    78     $reader->system_id($self->{ParseOptions}{Source}{SystemId});
       
    79 
       
    80     $self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
       
    81 
       
    82     $self->set_document_locator(
       
    83         XML::SAX::DocumentLocator->new(
       
    84             sub { $reader->public_id },
       
    85             sub { $reader->system_id },
       
    86             sub { $reader->line },
       
    87             sub { $reader->column },
       
    88             sub { $reader->get_encoding },
       
    89             sub { $reader->get_xml_version },
       
    90         ),
       
    91     );
       
    92     
       
    93     $self->start_document({});
       
    94 
       
    95     if (defined $self->{ParseOptions}{Source}{Encoding}) {
       
    96         $reader->set_encoding($self->{ParseOptions}{Source}{Encoding});
       
    97     }
       
    98     else {
       
    99         $self->encoding_detect($reader);
       
   100     }
       
   101     
       
   102     # parse a document
       
   103     $self->document($reader);
       
   104     
       
   105     return $self->end_document({});
       
   106 }
       
   107 
       
   108 sub parser_error {
       
   109     my $self = shift;
       
   110     my ($error, $reader) = @_;
       
   111     
       
   112 # warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n");
       
   113     my $exception = XML::SAX::Exception::Parse->new(
       
   114                 Message => $error,
       
   115                 ColumnNumber => $reader->column,
       
   116                 LineNumber => $reader->line,
       
   117                 PublicId => $reader->public_id,
       
   118                 SystemId => $reader->system_id,
       
   119             );
       
   120 
       
   121     $self->fatal_error($exception);
       
   122     $exception->throw;
       
   123 }
       
   124 
       
   125 sub document {
       
   126     my ($self, $reader) = @_;
       
   127     
       
   128     # document ::= prolog element Misc*
       
   129     
       
   130     $self->prolog($reader);
       
   131     $self->element($reader) ||
       
   132         $self->parser_error("Document requires an element", $reader);
       
   133     
       
   134     while(length($reader->data)) {
       
   135         $self->Misc($reader) || 
       
   136                 $self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader);
       
   137     }
       
   138 }
       
   139 
       
   140 sub prolog {
       
   141     my ($self, $reader) = @_;
       
   142     
       
   143     $self->XMLDecl($reader);
       
   144     
       
   145     # consume all misc bits
       
   146     1 while($self->Misc($reader));
       
   147     
       
   148     if ($self->doctypedecl($reader)) {
       
   149         while (length($reader->data)) {
       
   150             $self->Misc($reader) || last;
       
   151         }
       
   152     }
       
   153 }
       
   154 
       
   155 sub element {
       
   156     my ($self, $reader) = @_;
       
   157     
       
   158     return 0 unless $reader->match('<');
       
   159     
       
   160     my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader);
       
   161     
       
   162     my %attribs;
       
   163     
       
   164     while( my ($k, $v) = $self->Attribute($reader) ) {
       
   165         $attribs{$k} = $v;
       
   166     }
       
   167     
       
   168     my $have_namespaces = $self->get_feature(Namespaces);
       
   169     
       
   170     # Namespace processing
       
   171     $self->{NSHelper}->push_context;
       
   172     my @new_ns;
       
   173 #        my %attrs = @attribs;
       
   174 #        while (my ($k,$v) = each %attrs) {
       
   175     if ($have_namespaces) {
       
   176         while ( my ($k, $v) = each %attribs ) {
       
   177             if ($k =~ m/^xmlns(:(.*))?$/) {
       
   178                 my $prefix = $2 || '';
       
   179                 $self->{NSHelper}->declare_prefix($prefix, $v);
       
   180                 my $ns = 
       
   181                     {
       
   182                         Prefix       => $prefix,
       
   183                         NamespaceURI => $v,
       
   184                     };
       
   185                 push @new_ns, $ns;
       
   186                 $self->SUPER::start_prefix_mapping($ns);
       
   187             }
       
   188         }
       
   189     }
       
   190 
       
   191     # Create element object and fire event
       
   192     my %attrib_hash;
       
   193     while (my ($name, $value) = each %attribs ) {
       
   194         # TODO normalise value here
       
   195         my ($ns, $prefix, $lname);
       
   196         if ($have_namespaces) {
       
   197             ($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name);
       
   198         }
       
   199         $ns ||= ''; $prefix ||= ''; $lname ||= '';
       
   200         $attrib_hash{"{$ns}$lname"} = {
       
   201             Name => $name,
       
   202             LocalName => $lname,
       
   203             Prefix => $prefix,
       
   204             NamespaceURI => $ns,
       
   205             Value => $value,
       
   206         };
       
   207     }
       
   208     
       
   209     %attribs = (); # lose the memory since we recurse deep
       
   210     
       
   211     my ($ns, $prefix, $lname);
       
   212     if ($self->get_feature(Namespaces)) {
       
   213         ($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name);
       
   214     }
       
   215     else {
       
   216         $lname = $name;
       
   217     }
       
   218     $ns ||= ''; $prefix ||= ''; $lname ||= '';
       
   219 
       
   220     # Process remainder of start_element
       
   221     $self->skip_whitespace($reader);
       
   222     my $have_content;
       
   223     my $data = $reader->data(2);
       
   224     if ($data =~ /^\/>/) {
       
   225         $reader->move_along(2);
       
   226     }
       
   227     else {
       
   228         $data =~ /^>/ or $self->parser_error("No close element tag", $reader);
       
   229         $reader->move_along(1);
       
   230         $have_content++;
       
   231     }
       
   232     
       
   233     my $el = 
       
   234     {
       
   235         Name => $name,
       
   236         LocalName => $lname,
       
   237         Prefix => $prefix,
       
   238         NamespaceURI => $ns,
       
   239         Attributes => \%attrib_hash,
       
   240     };
       
   241     $self->start_element($el);
       
   242     
       
   243     # warn("($name\n");
       
   244     
       
   245     if ($have_content) {
       
   246         $self->content($reader);
       
   247         
       
   248         my $data = $reader->data(2);
       
   249         $data =~ /^<\// or $self->parser_error("No close tag marker", $reader);
       
   250         $reader->move_along(2);
       
   251         my $end_name = $self->Name($reader);
       
   252         $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader);
       
   253         $self->skip_whitespace($reader);
       
   254         $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader);
       
   255     }
       
   256         
       
   257     my %end_el = %$el;
       
   258     delete $end_el{Attributes};
       
   259     $self->end_element(\%end_el);
       
   260 
       
   261     for my $ns (@new_ns) {
       
   262         $self->end_prefix_mapping($ns);
       
   263     }
       
   264     $self->{NSHelper}->pop_context;
       
   265     
       
   266     return 1;
       
   267 }
       
   268 
       
   269 sub content {
       
   270     my ($self, $reader) = @_;
       
   271     
       
   272     while (1) {
       
   273         $self->CharData($reader);
       
   274         
       
   275         my $data = $reader->data(2);
       
   276         
       
   277         if ($data =~ /^<\//) {
       
   278             return 1;
       
   279         }
       
   280         elsif ($data =~ /^&/) {
       
   281             $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader);
       
   282             next;
       
   283         }
       
   284         elsif ($data =~ /^<!/) {
       
   285             ($self->CDSect($reader)
       
   286              or
       
   287              $self->Comment($reader))
       
   288              and next;
       
   289         }
       
   290         elsif ($data =~ /^<\?/) {
       
   291             $self->PI($reader) and next;
       
   292         }
       
   293         elsif ($data =~ /^</) {
       
   294             $self->element($reader) and next;
       
   295         }
       
   296         last;
       
   297     }
       
   298     
       
   299     return 1;
       
   300 }
       
   301 
       
   302 sub CDSect {
       
   303     my ($self, $reader) = @_;
       
   304     
       
   305     my $data = $reader->data(9);
       
   306     return 0 unless $data =~ /^<!\[CDATA\[/;
       
   307     $reader->move_along(9);
       
   308     
       
   309     $self->start_cdata({});
       
   310     
       
   311     $data = $reader->data;
       
   312     while (1) {
       
   313         $self->parser_error("EOF looking for CDATA section end", $reader)
       
   314             unless length($data);
       
   315         
       
   316         if ($data =~ /^(.*?)\]\]>/s) {
       
   317             my $chars = $1;
       
   318             $reader->move_along(length($chars) + 3);
       
   319             $self->characters({Data => $chars});
       
   320             last;
       
   321         }
       
   322         elsif ($data =~ /(.*?)\]+$/s) {
       
   323             my $chars = $1;
       
   324             $reader->move_along(length($chars));
       
   325             $self->characters({Data => $chars});
       
   326             $data = $reader->data(3);
       
   327         }
       
   328         else {
       
   329             $self->characters({Data => $data});
       
   330             $reader->move_along(length($data));
       
   331             $data = $reader->data;
       
   332         }
       
   333     }
       
   334     $self->end_cdata({});
       
   335     return 1;
       
   336 }
       
   337 
       
   338 sub CharData {
       
   339     my ($self, $reader) = @_;
       
   340     
       
   341     my $data = $reader->data;
       
   342     
       
   343     while (1) {
       
   344         return unless length($data);
       
   345         
       
   346         if ($data =~ /^([^<&]*)[<&]/s) {
       
   347             my $chars = $1;
       
   348             $self->parser_error("String ']]>' not allowed in character data", $reader)
       
   349                 if $chars =~ /\]\]>/;
       
   350             $reader->move_along(length($chars));
       
   351             $self->characters({Data => $chars}) if length($chars);
       
   352             last;
       
   353         }
       
   354         else {
       
   355             $self->characters({Data => $data});
       
   356             $reader->move_along(length($data));
       
   357             $data = $reader->data;
       
   358         }
       
   359     }
       
   360 }
       
   361 
       
   362 sub Misc {
       
   363     my ($self, $reader) = @_;
       
   364     if ($self->Comment($reader)) {
       
   365         return 1;
       
   366     }
       
   367     elsif ($self->PI($reader)) {
       
   368         return 1;
       
   369     }
       
   370     elsif ($self->skip_whitespace($reader)) {
       
   371         return 1;
       
   372     }
       
   373     
       
   374     return 0;
       
   375 }
       
   376 
       
   377 sub Reference {
       
   378     my ($self, $reader) = @_;
       
   379     
       
   380     return 0 unless $reader->match('&');
       
   381     
       
   382     my $data = $reader->data;
       
   383     
       
   384     if ($data =~ /^#x([0-9a-fA-F]+);/) {
       
   385         my $ref = $1;
       
   386         $reader->move_along(length($ref) + 3);
       
   387         my $char = chr_ref(hex($ref));
       
   388         $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
       
   389             unless $char =~ /$SingleChar/o;
       
   390         $self->characters({ Data => $char });
       
   391         return 1;
       
   392     }
       
   393     elsif ($data =~ /^#([0-9]+);/) {
       
   394         my $ref = $1;
       
   395         $reader->move_along(length($ref) + 2);
       
   396         my $char = chr_ref($ref);
       
   397         $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
       
   398             unless $char =~ /$SingleChar/o;
       
   399         $self->characters({ Data => $char });
       
   400         return 1;
       
   401     }
       
   402     else {
       
   403         # EntityRef
       
   404         my $name = $self->Name($reader)
       
   405             || $self->parser_error("Invalid name in entity", $reader);
       
   406         $reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader);
       
   407         
       
   408         # warn("got entity: \&$name;\n");
       
   409         
       
   410         # expand it
       
   411         if ($self->_is_entity($name)) {
       
   412             
       
   413             if ($self->_is_external($name)) {
       
   414                 my $value = $self->_get_entity($name);
       
   415                 my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value);
       
   416                 $self->encoding_detect($ent_reader);
       
   417                 $self->extParsedEnt($ent_reader);
       
   418             }
       
   419             else {
       
   420                 my $value = $self->_stringify_entity($name);
       
   421                 my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value);
       
   422                 $self->content($ent_reader);
       
   423             }
       
   424             return 1;
       
   425         }
       
   426         elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) {
       
   427             $self->characters({ Data => $int_ents{$name} });
       
   428             return 1;
       
   429         }
       
   430         else {
       
   431             $self->parser_error("Undeclared entity", $reader);
       
   432         }
       
   433     }
       
   434 }
       
   435 
       
   436 sub AttReference {
       
   437     my ($self, $name, $reader) = @_;
       
   438     if ($name =~ /^#x([0-9a-fA-F]+)$/) {
       
   439         my $chr = chr_ref(hex($1));
       
   440         $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
       
   441         return $chr;
       
   442     }
       
   443     elsif ($name =~ /^#([0-9]+)$/) {
       
   444         my $chr = chr_ref($1);
       
   445         $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
       
   446         return $chr;
       
   447     }
       
   448     else {
       
   449         if ($self->_is_entity($name)) {
       
   450             if ($self->_is_external($name)) {
       
   451                 $self->parser_error("No external entity references allowed in attribute values", $reader);
       
   452             }
       
   453             else {
       
   454                 my $value = $self->_stringify_entity($name);
       
   455                 return $value;
       
   456             }
       
   457         }
       
   458         elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) {
       
   459             return $int_ents{$name};
       
   460         }
       
   461         else {
       
   462             $self->parser_error("Undeclared entity '$name'", $reader);
       
   463         }
       
   464     }
       
   465 }
       
   466 
       
   467 sub extParsedEnt {
       
   468     my ($self, $reader) = @_;
       
   469     
       
   470     $self->TextDecl($reader);
       
   471     $self->content($reader);
       
   472 }
       
   473 
       
   474 sub _is_external {
       
   475     my ($self, $name) = @_;
       
   476 # TODO: Fix this to use $reader to store the entities perhaps.
       
   477     if ($self->{ParseOptions}{external_entities}{$name}) {
       
   478         return 1;
       
   479     }
       
   480     return ;
       
   481 }
       
   482 
       
   483 sub _is_entity {
       
   484     my ($self, $name) = @_;
       
   485 # TODO: ditto above
       
   486     if (exists $self->{ParseOptions}{entities}{$name}) {
       
   487         return 1;
       
   488     }
       
   489     return 0;
       
   490 }
       
   491 
       
   492 sub _stringify_entity {
       
   493     my ($self, $name) = @_;
       
   494 # TODO: ditto above
       
   495     if (exists $self->{ParseOptions}{expanded_entity}{$name}) {
       
   496         return $self->{ParseOptions}{expanded_entity}{$name};
       
   497     }
       
   498     # expand
       
   499     my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name});
       
   500     my $ent = '';
       
   501     while(1) {
       
   502         my $data = $reader->data;
       
   503         $ent .= $data;
       
   504         $reader->move_along(length($data)) or last;
       
   505     }
       
   506     return $self->{ParseOptions}{expanded_entity}{$name} = $ent;
       
   507 }
       
   508 
       
   509 sub _get_entity {
       
   510     my ($self, $name) = @_;
       
   511 # TODO: ditto above
       
   512     return $self->{ParseOptions}{entities}{$name};
       
   513 }
       
   514 
       
   515 sub skip_whitespace {
       
   516     my ($self, $reader) = @_;
       
   517     
       
   518     my $data = $reader->data;
       
   519     
       
   520     my $found = 0;
       
   521     while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) {
       
   522         last unless length($1);
       
   523         $found++;
       
   524         $reader->move_along(length($1));
       
   525         $data = $reader->data;
       
   526     }
       
   527     
       
   528     return $found;
       
   529 }
       
   530 
       
   531 sub Attribute {
       
   532     my ($self, $reader) = @_;
       
   533     
       
   534     $self->skip_whitespace($reader) || return;
       
   535     
       
   536     my $data = $reader->data(2);
       
   537     return if $data =~ /^\/?>/;
       
   538     
       
   539     if (my $name = $self->Name($reader)) {
       
   540         $self->skip_whitespace($reader);
       
   541         $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader);
       
   542         $self->skip_whitespace($reader);
       
   543         my $value = $self->AttValue($reader);
       
   544 
       
   545         if (!$self->cdata_attrib($name)) {
       
   546             $value =~ s/^\x20*//; # discard leading spaces
       
   547             $value =~ s/\x20*$//; # discard trailing spaces
       
   548             $value =~ s/ {1,}/ /g; # all >1 space to single space
       
   549         }
       
   550         
       
   551         return $name, $value;
       
   552     }
       
   553     
       
   554     return;
       
   555 }
       
   556 
       
   557 sub cdata_attrib {
       
   558     # TODO implement this!
       
   559     return 1;
       
   560 }
       
   561 
       
   562 sub AttValue {
       
   563     my ($self, $reader) = @_;
       
   564     
       
   565     my $quote = $self->quote($reader);
       
   566     
       
   567     my $value = '';
       
   568     
       
   569     while (1) {
       
   570         my $data = $reader->data;
       
   571         $self->parser_error("EOF found while looking for the end of attribute value", $reader)
       
   572             unless length($data);
       
   573         if ($data =~ /^([^$quote]*)$quote/) {
       
   574             $reader->move_along(length($1) + 1);
       
   575             $value .= $1;
       
   576             last;
       
   577         }
       
   578         else {
       
   579             $value .= $data;
       
   580             $reader->move_along(length($data));
       
   581         }
       
   582     }
       
   583     
       
   584     if ($value =~ /</) {
       
   585         $self->parser_error("< character not allowed in attribute values", $reader);
       
   586     }
       
   587     
       
   588     $value =~ s/[\x09\x0A\x0D]/\x20/g;
       
   589     $value =~ s/&(#(x[0-9a-fA-F]+)|([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo;
       
   590     
       
   591     return $value;
       
   592 }
       
   593 
       
   594 sub Comment {
       
   595     my ($self, $reader) = @_;
       
   596     
       
   597     my $data = $reader->data(4);
       
   598     if ($data =~ /^<!--/) {
       
   599         $reader->move_along(4);
       
   600         my $comment_str = '';
       
   601         while (1) {
       
   602             my $data = $reader->data;
       
   603             $self->parser_error("End of data seen while looking for close comment marker", $reader)
       
   604                 unless length($data);
       
   605             if ($data =~ /^(.*?)-->/s) {
       
   606                 $comment_str .= $1;
       
   607                 $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/;
       
   608                 $reader->move_along(length($1) + 3);
       
   609                 last;
       
   610             }
       
   611             else {
       
   612                 $comment_str .= $data;
       
   613                 $reader->move_along(length($data));
       
   614             }
       
   615         }
       
   616         
       
   617         $self->comment({ Data => $comment_str });
       
   618         
       
   619         return 1;
       
   620     }
       
   621     return 0;
       
   622 }
       
   623 
       
   624 sub PI {
       
   625     my ($self, $reader) = @_;
       
   626     
       
   627     my $data = $reader->data(2);
       
   628     
       
   629     if ($data =~ /^<\?/) {
       
   630         $reader->move_along(2);
       
   631         my ($target, $data);
       
   632         $target = $self->Name($reader) ||
       
   633             $self->parser_error("PI has no target", $reader);
       
   634         if ($self->skip_whitespace($reader)) {
       
   635             $target = '';
       
   636             while (1) {
       
   637                 my $data = $reader->data;
       
   638                 $self->parser_error("End of data seen while looking for close PI marker", $reader)
       
   639                     unless length($data);
       
   640                 if ($data =~ /^(.*?)\?>/s) {
       
   641                     $target .= $1;
       
   642                     $reader->move_along(length($1) + 2);
       
   643                     last;
       
   644                 }
       
   645                 else {
       
   646                     $target .= $data;
       
   647                     $reader->move_along(length($data));
       
   648                 }
       
   649             }
       
   650         }
       
   651         else {
       
   652             my $data = $reader->data(2);
       
   653             $data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader);
       
   654             $reader->move_along(2);
       
   655         }
       
   656         $self->processing_instruction({ Target => $target, Data => $data });
       
   657         
       
   658         return 1;
       
   659     }
       
   660     return 0;
       
   661 }
       
   662 
       
   663 sub Name {
       
   664     my ($self, $reader) = @_;
       
   665     
       
   666     my $name = '';
       
   667     while(1) {
       
   668         my $data = $reader->data;
       
   669         return unless length($data);
       
   670         $data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*]*)/ or return;
       
   671         $name .= $1;
       
   672         my $len = length($1);
       
   673         $reader->move_along($len);
       
   674         last if ($len != length($data));
       
   675     }
       
   676     
       
   677     return unless length($name);
       
   678     
       
   679     $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
       
   680 
       
   681     return $name;
       
   682 }
       
   683 
       
   684 sub quote {
       
   685     my ($self, $reader) = @_;
       
   686     
       
   687     my $data = $reader->data;
       
   688     
       
   689     $data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader);
       
   690     $reader->move_along(1);
       
   691     return $1;
       
   692 }
       
   693 
       
   694 1;
       
   695 __END__
       
   696 
       
   697 =head1 NAME
       
   698 
       
   699 XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface
       
   700 
       
   701 =head1 SYNOPSIS
       
   702 
       
   703   use XML::Handler::Foo;
       
   704   use XML::SAX::PurePerl;
       
   705   my $handler = XML::Handler::Foo->new();
       
   706   my $parser = XML::SAX::PurePerl->new(Handler => $handler);
       
   707   $parser->parse_uri("myfile.xml");
       
   708 
       
   709 =head1 DESCRIPTION
       
   710 
       
   711 This module implements an XML parser in pure perl. It is written around the
       
   712 upcoming perl 5.8's unicode support and support for multiple document 
       
   713 encodings (using the PerlIO layer), however it has been ported to work with
       
   714 ASCII/UTF8 documents under lower perl versions.
       
   715 
       
   716 The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in
       
   717 the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a
       
   718 better location soon.
       
   719 
       
   720 Please refer to the SAX2 documentation for how to use this module - it is merely a
       
   721 front end to SAX2, and implements nothing that is not in that spec (or at least tries
       
   722 not to - please email me if you find errors in this implementation).
       
   723 
       
   724 =head1 BUGS
       
   725 
       
   726 XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else
       
   727 in fact. However it is great as a fallback parser for XML::SAX, where the
       
   728 user might not be able to install an XS based parser or C library.
       
   729 
       
   730 Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations,
       
   731 though the code is in place to start doing this. Also parsing parameter entity
       
   732 references is causing me much confusion, since it's not exactly what I would call
       
   733 trivial, or well documented in the XML grammar. XML documents with internal subsets
       
   734 are likely to fail.
       
   735 
       
   736 I am however trying to work towards full conformance using the Oasis test suite.
       
   737 
       
   738 =head1 AUTHOR
       
   739 
       
   740 Matt Sergeant, matt@sergeant.org. Copyright 2001.
       
   741 
       
   742 Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com.
       
   743 
       
   744 =head1 LICENSE
       
   745 
       
   746 This is free software. You may use it or redistribute it under the same terms as
       
   747 Perl 5.7.2 itself.
       
   748 
       
   749 =cut
       
   750