|
1 # $Id: PurePerl.pm,v 1.21 2007/02/07 09:33:50 grant Exp $ |
|
2 |
|
3 package XML::SAX::PurePerl; |
|
4 |
|
5 use strict; |
|
6 use vars qw/$VERSION/; |
|
7 |
|
8 $VERSION = '0.91'; |
|
9 |
|
10 use XML::SAX::PurePerl::Productions qw($Any $CharMinusDash $SingleChar); |
|
11 use XML::SAX::PurePerl::Reader; |
|
12 use XML::SAX::PurePerl::EncodingDetect (); |
|
13 use XML::SAX::Exception; |
|
14 use XML::SAX::PurePerl::DocType (); |
|
15 use XML::SAX::PurePerl::DTDDecls (); |
|
16 use XML::SAX::PurePerl::XMLDecl (); |
|
17 use XML::SAX::DocumentLocator (); |
|
18 use XML::SAX::Base (); |
|
19 use XML::SAX qw(Namespaces); |
|
20 use XML::NamespaceSupport (); |
|
21 use IO::File; |
|
22 |
|
23 if ($] < 5.006) { |
|
24 require XML::SAX::PurePerl::NoUnicodeExt; |
|
25 } |
|
26 else { |
|
27 require XML::SAX::PurePerl::UnicodeExt; |
|
28 } |
|
29 |
|
30 use vars qw(@ISA); |
|
31 @ISA = ('XML::SAX::Base'); |
|
32 |
|
33 my %int_ents = ( |
|
34 amp => '&', |
|
35 lt => '<', |
|
36 gt => '>', |
|
37 quot => '"', |
|
38 apos => "'", |
|
39 ); |
|
40 |
|
41 my $xmlns_ns = "http://www.w3.org/2000/xmlns/"; |
|
42 my $xml_ns = "http://www.w3.org/XML/1998/namespace"; |
|
43 |
|
44 use Carp; |
|
45 sub _parse_characterstream { |
|
46 my $self = shift; |
|
47 my ($fh) = @_; |
|
48 confess("CharacterStream is not yet correctly implemented"); |
|
49 my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); |
|
50 return $self->_parse($reader); |
|
51 } |
|
52 |
|
53 sub _parse_bytestream { |
|
54 my $self = shift; |
|
55 my ($fh) = @_; |
|
56 my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); |
|
57 return $self->_parse($reader); |
|
58 } |
|
59 |
|
60 sub _parse_string { |
|
61 my $self = shift; |
|
62 my ($str) = @_; |
|
63 my $reader = XML::SAX::PurePerl::Reader::String->new($str); |
|
64 return $self->_parse($reader); |
|
65 } |
|
66 |
|
67 sub _parse_systemid { |
|
68 my $self = shift; |
|
69 my ($uri) = @_; |
|
70 my $reader = XML::SAX::PurePerl::Reader::URI->new($uri); |
|
71 return $self->_parse($reader); |
|
72 } |
|
73 |
|
74 sub _parse { |
|
75 my ($self, $reader) = @_; |
|
76 |
|
77 $reader->public_id($self->{ParseOptions}{Source}{PublicId}); |
|
78 $reader->system_id($self->{ParseOptions}{Source}{SystemId}); |
|
79 |
|
80 $self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1}); |
|
81 |
|
82 $self->set_document_locator( |
|
83 XML::SAX::DocumentLocator->new( |
|
84 sub { $reader->public_id }, |
|
85 sub { $reader->system_id }, |
|
86 sub { $reader->line }, |
|
87 sub { $reader->column }, |
|
88 sub { $reader->get_encoding }, |
|
89 sub { $reader->get_xml_version }, |
|
90 ), |
|
91 ); |
|
92 |
|
93 $self->start_document({}); |
|
94 |
|
95 if (defined $self->{ParseOptions}{Source}{Encoding}) { |
|
96 $reader->set_encoding($self->{ParseOptions}{Source}{Encoding}); |
|
97 } |
|
98 else { |
|
99 $self->encoding_detect($reader); |
|
100 } |
|
101 |
|
102 # parse a document |
|
103 $self->document($reader); |
|
104 |
|
105 return $self->end_document({}); |
|
106 } |
|
107 |
|
108 sub parser_error { |
|
109 my $self = shift; |
|
110 my ($error, $reader) = @_; |
|
111 |
|
112 # warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n"); |
|
113 my $exception = XML::SAX::Exception::Parse->new( |
|
114 Message => $error, |
|
115 ColumnNumber => $reader->column, |
|
116 LineNumber => $reader->line, |
|
117 PublicId => $reader->public_id, |
|
118 SystemId => $reader->system_id, |
|
119 ); |
|
120 |
|
121 $self->fatal_error($exception); |
|
122 $exception->throw; |
|
123 } |
|
124 |
|
125 sub document { |
|
126 my ($self, $reader) = @_; |
|
127 |
|
128 # document ::= prolog element Misc* |
|
129 |
|
130 $self->prolog($reader); |
|
131 $self->element($reader) || |
|
132 $self->parser_error("Document requires an element", $reader); |
|
133 |
|
134 while(length($reader->data)) { |
|
135 $self->Misc($reader) || |
|
136 $self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader); |
|
137 } |
|
138 } |
|
139 |
|
140 sub prolog { |
|
141 my ($self, $reader) = @_; |
|
142 |
|
143 $self->XMLDecl($reader); |
|
144 |
|
145 # consume all misc bits |
|
146 1 while($self->Misc($reader)); |
|
147 |
|
148 if ($self->doctypedecl($reader)) { |
|
149 while (length($reader->data)) { |
|
150 $self->Misc($reader) || last; |
|
151 } |
|
152 } |
|
153 } |
|
154 |
|
155 sub element { |
|
156 my ($self, $reader) = @_; |
|
157 |
|
158 return 0 unless $reader->match('<'); |
|
159 |
|
160 my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader); |
|
161 |
|
162 my %attribs; |
|
163 |
|
164 while( my ($k, $v) = $self->Attribute($reader) ) { |
|
165 $attribs{$k} = $v; |
|
166 } |
|
167 |
|
168 my $have_namespaces = $self->get_feature(Namespaces); |
|
169 |
|
170 # Namespace processing |
|
171 $self->{NSHelper}->push_context; |
|
172 my @new_ns; |
|
173 # my %attrs = @attribs; |
|
174 # while (my ($k,$v) = each %attrs) { |
|
175 if ($have_namespaces) { |
|
176 while ( my ($k, $v) = each %attribs ) { |
|
177 if ($k =~ m/^xmlns(:(.*))?$/) { |
|
178 my $prefix = $2 || ''; |
|
179 $self->{NSHelper}->declare_prefix($prefix, $v); |
|
180 my $ns = |
|
181 { |
|
182 Prefix => $prefix, |
|
183 NamespaceURI => $v, |
|
184 }; |
|
185 push @new_ns, $ns; |
|
186 $self->SUPER::start_prefix_mapping($ns); |
|
187 } |
|
188 } |
|
189 } |
|
190 |
|
191 # Create element object and fire event |
|
192 my %attrib_hash; |
|
193 while (my ($name, $value) = each %attribs ) { |
|
194 # TODO normalise value here |
|
195 my ($ns, $prefix, $lname); |
|
196 if ($have_namespaces) { |
|
197 ($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name); |
|
198 } |
|
199 $ns ||= ''; $prefix ||= ''; $lname ||= ''; |
|
200 $attrib_hash{"{$ns}$lname"} = { |
|
201 Name => $name, |
|
202 LocalName => $lname, |
|
203 Prefix => $prefix, |
|
204 NamespaceURI => $ns, |
|
205 Value => $value, |
|
206 }; |
|
207 } |
|
208 |
|
209 %attribs = (); # lose the memory since we recurse deep |
|
210 |
|
211 my ($ns, $prefix, $lname); |
|
212 if ($self->get_feature(Namespaces)) { |
|
213 ($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name); |
|
214 } |
|
215 else { |
|
216 $lname = $name; |
|
217 } |
|
218 $ns ||= ''; $prefix ||= ''; $lname ||= ''; |
|
219 |
|
220 # Process remainder of start_element |
|
221 $self->skip_whitespace($reader); |
|
222 my $have_content; |
|
223 my $data = $reader->data(2); |
|
224 if ($data =~ /^\/>/) { |
|
225 $reader->move_along(2); |
|
226 } |
|
227 else { |
|
228 $data =~ /^>/ or $self->parser_error("No close element tag", $reader); |
|
229 $reader->move_along(1); |
|
230 $have_content++; |
|
231 } |
|
232 |
|
233 my $el = |
|
234 { |
|
235 Name => $name, |
|
236 LocalName => $lname, |
|
237 Prefix => $prefix, |
|
238 NamespaceURI => $ns, |
|
239 Attributes => \%attrib_hash, |
|
240 }; |
|
241 $self->start_element($el); |
|
242 |
|
243 # warn("($name\n"); |
|
244 |
|
245 if ($have_content) { |
|
246 $self->content($reader); |
|
247 |
|
248 my $data = $reader->data(2); |
|
249 $data =~ /^<\// or $self->parser_error("No close tag marker", $reader); |
|
250 $reader->move_along(2); |
|
251 my $end_name = $self->Name($reader); |
|
252 $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader); |
|
253 $self->skip_whitespace($reader); |
|
254 $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader); |
|
255 } |
|
256 |
|
257 my %end_el = %$el; |
|
258 delete $end_el{Attributes}; |
|
259 $self->end_element(\%end_el); |
|
260 |
|
261 for my $ns (@new_ns) { |
|
262 $self->end_prefix_mapping($ns); |
|
263 } |
|
264 $self->{NSHelper}->pop_context; |
|
265 |
|
266 return 1; |
|
267 } |
|
268 |
|
269 sub content { |
|
270 my ($self, $reader) = @_; |
|
271 |
|
272 while (1) { |
|
273 $self->CharData($reader); |
|
274 |
|
275 my $data = $reader->data(2); |
|
276 |
|
277 if ($data =~ /^<\//) { |
|
278 return 1; |
|
279 } |
|
280 elsif ($data =~ /^&/) { |
|
281 $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader); |
|
282 next; |
|
283 } |
|
284 elsif ($data =~ /^<!/) { |
|
285 ($self->CDSect($reader) |
|
286 or |
|
287 $self->Comment($reader)) |
|
288 and next; |
|
289 } |
|
290 elsif ($data =~ /^<\?/) { |
|
291 $self->PI($reader) and next; |
|
292 } |
|
293 elsif ($data =~ /^</) { |
|
294 $self->element($reader) and next; |
|
295 } |
|
296 last; |
|
297 } |
|
298 |
|
299 return 1; |
|
300 } |
|
301 |
|
302 sub CDSect { |
|
303 my ($self, $reader) = @_; |
|
304 |
|
305 my $data = $reader->data(9); |
|
306 return 0 unless $data =~ /^<!\[CDATA\[/; |
|
307 $reader->move_along(9); |
|
308 |
|
309 $self->start_cdata({}); |
|
310 |
|
311 $data = $reader->data; |
|
312 while (1) { |
|
313 $self->parser_error("EOF looking for CDATA section end", $reader) |
|
314 unless length($data); |
|
315 |
|
316 if ($data =~ /^(.*?)\]\]>/s) { |
|
317 my $chars = $1; |
|
318 $reader->move_along(length($chars) + 3); |
|
319 $self->characters({Data => $chars}); |
|
320 last; |
|
321 } |
|
322 else { |
|
323 $self->characters({Data => $data}); |
|
324 $reader->move_along(length($data)); |
|
325 $data = $reader->data; |
|
326 } |
|
327 } |
|
328 $self->end_cdata({}); |
|
329 return 1; |
|
330 } |
|
331 |
|
332 sub CharData { |
|
333 my ($self, $reader) = @_; |
|
334 |
|
335 my $data = $reader->data; |
|
336 |
|
337 while (1) { |
|
338 return unless length($data); |
|
339 |
|
340 if ($data =~ /^([^<&]*)[<&]/s) { |
|
341 my $chars = $1; |
|
342 $self->parser_error("String ']]>' not allowed in character data", $reader) |
|
343 if $chars =~ /\]\]>/; |
|
344 $reader->move_along(length($chars)); |
|
345 $self->characters({Data => $chars}) if length($chars); |
|
346 last; |
|
347 } |
|
348 else { |
|
349 $self->characters({Data => $data}); |
|
350 $reader->move_along(length($data)); |
|
351 $data = $reader->data; |
|
352 } |
|
353 } |
|
354 } |
|
355 |
|
356 sub Misc { |
|
357 my ($self, $reader) = @_; |
|
358 if ($self->Comment($reader)) { |
|
359 return 1; |
|
360 } |
|
361 elsif ($self->PI($reader)) { |
|
362 return 1; |
|
363 } |
|
364 elsif ($self->skip_whitespace($reader)) { |
|
365 return 1; |
|
366 } |
|
367 |
|
368 return 0; |
|
369 } |
|
370 |
|
371 sub Reference { |
|
372 my ($self, $reader) = @_; |
|
373 |
|
374 return 0 unless $reader->match('&'); |
|
375 |
|
376 my $data = $reader->data; |
|
377 |
|
378 if ($data =~ /^#x([0-9a-fA-F]+);/) { |
|
379 my $ref = $1; |
|
380 $reader->move_along(length($ref) + 3); |
|
381 my $char = chr_ref(hex($ref)); |
|
382 $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) |
|
383 unless $char =~ /$SingleChar/o; |
|
384 $self->characters({ Data => $char }); |
|
385 return 1; |
|
386 } |
|
387 elsif ($data =~ /^#([0-9]+);/) { |
|
388 my $ref = $1; |
|
389 $reader->move_along(length($ref) + 2); |
|
390 my $char = chr_ref($ref); |
|
391 $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) |
|
392 unless $char =~ /$SingleChar/o; |
|
393 $self->characters({ Data => $char }); |
|
394 return 1; |
|
395 } |
|
396 else { |
|
397 # EntityRef |
|
398 my $name = $self->Name($reader) |
|
399 || $self->parser_error("Invalid name in entity", $reader); |
|
400 $reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader); |
|
401 |
|
402 # warn("got entity: \&$name;\n"); |
|
403 |
|
404 # expand it |
|
405 if ($self->_is_entity($name)) { |
|
406 |
|
407 if ($self->_is_external($name)) { |
|
408 my $value = $self->_get_entity($name); |
|
409 my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value); |
|
410 $self->encoding_detect($ent_reader); |
|
411 $self->extParsedEnt($ent_reader); |
|
412 } |
|
413 else { |
|
414 my $value = $self->_stringify_entity($name); |
|
415 my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value); |
|
416 $self->content($ent_reader); |
|
417 } |
|
418 return 1; |
|
419 } |
|
420 elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) { |
|
421 $self->characters({ Data => $int_ents{$name} }); |
|
422 return 1; |
|
423 } |
|
424 else { |
|
425 $self->parser_error("Undeclared entity", $reader); |
|
426 } |
|
427 } |
|
428 } |
|
429 |
|
430 sub AttReference { |
|
431 my ($self, $name, $reader) = @_; |
|
432 if ($name =~ /^#x([0-9a-fA-F]+)$/) { |
|
433 my $chr = chr_ref(hex($1)); |
|
434 $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); |
|
435 return $chr; |
|
436 } |
|
437 elsif ($name =~ /^#([0-9]+)$/) { |
|
438 my $chr = chr_ref($1); |
|
439 $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); |
|
440 return $chr; |
|
441 } |
|
442 else { |
|
443 if ($self->_is_entity($name)) { |
|
444 if ($self->_is_external($name)) { |
|
445 $self->parser_error("No external entity references allowed in attribute values", $reader); |
|
446 } |
|
447 else { |
|
448 my $value = $self->_stringify_entity($name); |
|
449 return $value; |
|
450 } |
|
451 } |
|
452 elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) { |
|
453 return $int_ents{$name}; |
|
454 } |
|
455 else { |
|
456 $self->parser_error("Undeclared entity '$name'", $reader); |
|
457 } |
|
458 } |
|
459 } |
|
460 |
|
461 sub extParsedEnt { |
|
462 my ($self, $reader) = @_; |
|
463 |
|
464 $self->TextDecl($reader); |
|
465 $self->content($reader); |
|
466 } |
|
467 |
|
468 sub _is_external { |
|
469 my ($self, $name) = @_; |
|
470 # TODO: Fix this to use $reader to store the entities perhaps. |
|
471 if ($self->{ParseOptions}{external_entities}{$name}) { |
|
472 return 1; |
|
473 } |
|
474 return ; |
|
475 } |
|
476 |
|
477 sub _is_entity { |
|
478 my ($self, $name) = @_; |
|
479 # TODO: ditto above |
|
480 if (exists $self->{ParseOptions}{entities}{$name}) { |
|
481 return 1; |
|
482 } |
|
483 return 0; |
|
484 } |
|
485 |
|
486 sub _stringify_entity { |
|
487 my ($self, $name) = @_; |
|
488 # TODO: ditto above |
|
489 if (exists $self->{ParseOptions}{expanded_entity}{$name}) { |
|
490 return $self->{ParseOptions}{expanded_entity}{$name}; |
|
491 } |
|
492 # expand |
|
493 my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name}); |
|
494 my $ent = ''; |
|
495 while(1) { |
|
496 my $data = $reader->data; |
|
497 $ent .= $data; |
|
498 $reader->move_along(length($data)) or last; |
|
499 } |
|
500 return $self->{ParseOptions}{expanded_entity}{$name} = $ent; |
|
501 } |
|
502 |
|
503 sub _get_entity { |
|
504 my ($self, $name) = @_; |
|
505 # TODO: ditto above |
|
506 return $self->{ParseOptions}{entities}{$name}; |
|
507 } |
|
508 |
|
509 sub skip_whitespace { |
|
510 my ($self, $reader) = @_; |
|
511 |
|
512 my $data = $reader->data; |
|
513 |
|
514 my $found = 0; |
|
515 while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) { |
|
516 last unless length($1); |
|
517 $found++; |
|
518 $reader->move_along(length($1)); |
|
519 $data = $reader->data; |
|
520 } |
|
521 |
|
522 return $found; |
|
523 } |
|
524 |
|
525 sub Attribute { |
|
526 my ($self, $reader) = @_; |
|
527 |
|
528 $self->skip_whitespace($reader) || return; |
|
529 |
|
530 my $data = $reader->data(2); |
|
531 return if $data =~ /^\/?>/; |
|
532 |
|
533 if (my $name = $self->Name($reader)) { |
|
534 $self->skip_whitespace($reader); |
|
535 $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader); |
|
536 $self->skip_whitespace($reader); |
|
537 my $value = $self->AttValue($reader); |
|
538 |
|
539 if (!$self->cdata_attrib($name)) { |
|
540 $value =~ s/^\x20*//; # discard leading spaces |
|
541 $value =~ s/\x20*$//; # discard trailing spaces |
|
542 $value =~ s/ {1,}/ /g; # all >1 space to single space |
|
543 } |
|
544 |
|
545 return $name, $value; |
|
546 } |
|
547 |
|
548 return; |
|
549 } |
|
550 |
|
551 sub cdata_attrib { |
|
552 # TODO implement this! |
|
553 return 1; |
|
554 } |
|
555 |
|
556 sub AttValue { |
|
557 my ($self, $reader) = @_; |
|
558 |
|
559 my $quote = $self->quote($reader); |
|
560 |
|
561 my $value = ''; |
|
562 |
|
563 while (1) { |
|
564 my $data = $reader->data; |
|
565 $self->parser_error("EOF found while looking for the end of attribute value", $reader) |
|
566 unless length($data); |
|
567 if ($data =~ /^([^$quote]*)$quote/) { |
|
568 $reader->move_along(length($1) + 1); |
|
569 $value .= $1; |
|
570 last; |
|
571 } |
|
572 else { |
|
573 $value .= $data; |
|
574 $reader->move_along(length($data)); |
|
575 } |
|
576 } |
|
577 |
|
578 if ($value =~ /</) { |
|
579 $self->parser_error("< character not allowed in attribute values", $reader); |
|
580 } |
|
581 |
|
582 $value =~ s/[\x09\x0A\x0D]/\x20/g; |
|
583 $value =~ s/&(#(x[0-9a-fA-F]+)|([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo; |
|
584 |
|
585 return $value; |
|
586 } |
|
587 |
|
588 sub Comment { |
|
589 my ($self, $reader) = @_; |
|
590 |
|
591 my $data = $reader->data(4); |
|
592 if ($data =~ /^<!--/) { |
|
593 $reader->move_along(4); |
|
594 my $comment_str = ''; |
|
595 while (1) { |
|
596 my $data = $reader->data; |
|
597 $self->parser_error("End of data seen while looking for close comment marker", $reader) |
|
598 unless length($data); |
|
599 if ($data =~ /^(.*?)-->/s) { |
|
600 $comment_str .= $1; |
|
601 $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/; |
|
602 $reader->move_along(length($1) + 3); |
|
603 last; |
|
604 } |
|
605 else { |
|
606 $comment_str .= $data; |
|
607 $reader->move_along(length($data)); |
|
608 } |
|
609 } |
|
610 |
|
611 $self->comment({ Data => $comment_str }); |
|
612 |
|
613 return 1; |
|
614 } |
|
615 return 0; |
|
616 } |
|
617 |
|
618 sub PI { |
|
619 my ($self, $reader) = @_; |
|
620 |
|
621 my $data = $reader->data(2); |
|
622 |
|
623 if ($data =~ /^<\?/) { |
|
624 $reader->move_along(2); |
|
625 my ($target, $data); |
|
626 $target = $self->Name($reader) || |
|
627 $self->parser_error("PI has no target", $reader); |
|
628 if ($self->skip_whitespace($reader)) { |
|
629 $target = ''; |
|
630 while (1) { |
|
631 my $data = $reader->data; |
|
632 $self->parser_error("End of data seen while looking for close PI marker", $reader) |
|
633 unless length($data); |
|
634 if ($data =~ /^(.*?)\?>/s) { |
|
635 $target .= $1; |
|
636 $reader->move_along(length($1) + 2); |
|
637 last; |
|
638 } |
|
639 else { |
|
640 $target .= $data; |
|
641 $reader->move_along(length($data)); |
|
642 } |
|
643 } |
|
644 } |
|
645 else { |
|
646 my $data = $reader->data(2); |
|
647 $data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader); |
|
648 $reader->move_along(2); |
|
649 } |
|
650 $self->processing_instruction({ Target => $target, Data => $data }); |
|
651 |
|
652 return 1; |
|
653 } |
|
654 return 0; |
|
655 } |
|
656 |
|
657 sub Name { |
|
658 my ($self, $reader) = @_; |
|
659 |
|
660 my $name = ''; |
|
661 while(1) { |
|
662 my $data = $reader->data; |
|
663 return unless length($data); |
|
664 $data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*]*)/ or return; |
|
665 $name .= $1; |
|
666 my $len = length($1); |
|
667 $reader->move_along($len); |
|
668 last if ($len != length($data)); |
|
669 } |
|
670 |
|
671 return unless length($name); |
|
672 |
|
673 $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader); |
|
674 |
|
675 return $name; |
|
676 } |
|
677 |
|
678 sub quote { |
|
679 my ($self, $reader) = @_; |
|
680 |
|
681 my $data = $reader->data; |
|
682 |
|
683 $data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader); |
|
684 $reader->move_along(1); |
|
685 return $1; |
|
686 } |
|
687 |
|
688 1; |
|
689 __END__ |
|
690 |
|
691 =head1 NAME |
|
692 |
|
693 XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface |
|
694 |
|
695 =head1 SYNOPSIS |
|
696 |
|
697 use XML::Handler::Foo; |
|
698 use XML::SAX::PurePerl; |
|
699 my $handler = XML::Handler::Foo->new(); |
|
700 my $parser = XML::SAX::PurePerl->new(Handler => $handler); |
|
701 $parser->parse_uri("myfile.xml"); |
|
702 |
|
703 =head1 DESCRIPTION |
|
704 |
|
705 This module implements an XML parser in pure perl. It is written around the |
|
706 upcoming perl 5.8's unicode support and support for multiple document |
|
707 encodings (using the PerlIO layer), however it has been ported to work with |
|
708 ASCII/UTF8 documents under lower perl versions. |
|
709 |
|
710 The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in |
|
711 the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a |
|
712 better location soon. |
|
713 |
|
714 Please refer to the SAX2 documentation for how to use this module - it is merely a |
|
715 front end to SAX2, and implements nothing that is not in that spec (or at least tries |
|
716 not to - please email me if you find errors in this implementation). |
|
717 |
|
718 =head1 BUGS |
|
719 |
|
720 XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else |
|
721 in fact. However it is great as a fallback parser for XML::SAX, where the |
|
722 user might not be able to install an XS based parser or C library. |
|
723 |
|
724 Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations, |
|
725 though the code is in place to start doing this. Also parsing parameter entity |
|
726 references is causing me much confusion, since it's not exactly what I would call |
|
727 trivial, or well documented in the XML grammar. XML documents with internal subsets |
|
728 are likely to fail. |
|
729 |
|
730 I am however trying to work towards full conformance using the Oasis test suite. |
|
731 |
|
732 =head1 AUTHOR |
|
733 |
|
734 Matt Sergeant, matt@sergeant.org. Copyright 2001. |
|
735 |
|
736 Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com. |
|
737 |
|
738 =head1 LICENSE |
|
739 |
|
740 This is free software. You may use it or redistribute it under the same terms as |
|
741 Perl 5.7.2 itself. |
|
742 |
|
743 =cut |
|
744 |