|
1 # $Id: PurePerl.pm,v 1.21 2007/02/07 09:33:50 grant Exp $ |
|
2 |
|
3 package XML::SAX::PurePerl; |
|
4 |
|
5 use strict; |
|
6 use vars qw/$VERSION/; |
|
7 |
|
8 $VERSION = '0.91'; |
|
9 |
|
10 use XML::SAX::PurePerl::Productions qw($Any $CharMinusDash $SingleChar); |
|
11 use XML::SAX::PurePerl::Reader; |
|
12 use XML::SAX::PurePerl::EncodingDetect (); |
|
13 use XML::SAX::Exception; |
|
14 use XML::SAX::PurePerl::DocType (); |
|
15 use XML::SAX::PurePerl::DTDDecls (); |
|
16 use XML::SAX::PurePerl::XMLDecl (); |
|
17 use XML::SAX::DocumentLocator (); |
|
18 use XML::SAX::Base (); |
|
19 use XML::SAX qw(Namespaces); |
|
20 use XML::NamespaceSupport (); |
|
21 use IO::File; |
|
22 |
|
23 if ($] < 5.006) { |
|
24 require XML::SAX::PurePerl::NoUnicodeExt; |
|
25 } |
|
26 else { |
|
27 require XML::SAX::PurePerl::UnicodeExt; |
|
28 } |
|
29 |
|
30 use vars qw(@ISA); |
|
31 @ISA = ('XML::SAX::Base'); |
|
32 |
|
33 my %int_ents = ( |
|
34 amp => '&', |
|
35 lt => '<', |
|
36 gt => '>', |
|
37 quot => '"', |
|
38 apos => "'", |
|
39 ); |
|
40 |
|
41 my $xmlns_ns = "http://www.w3.org/2000/xmlns/"; |
|
42 my $xml_ns = "http://www.w3.org/XML/1998/namespace"; |
|
43 |
|
44 use Carp; |
|
45 sub _parse_characterstream { |
|
46 my $self = shift; |
|
47 my ($fh) = @_; |
|
48 confess("CharacterStream is not yet correctly implemented"); |
|
49 my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); |
|
50 return $self->_parse($reader); |
|
51 } |
|
52 |
|
53 sub _parse_bytestream { |
|
54 my $self = shift; |
|
55 my ($fh) = @_; |
|
56 my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh); |
|
57 return $self->_parse($reader); |
|
58 } |
|
59 |
|
60 sub _parse_string { |
|
61 my $self = shift; |
|
62 my ($str) = @_; |
|
63 my $reader = XML::SAX::PurePerl::Reader::String->new($str); |
|
64 return $self->_parse($reader); |
|
65 } |
|
66 |
|
67 sub _parse_systemid { |
|
68 my $self = shift; |
|
69 my ($uri) = @_; |
|
70 my $reader = XML::SAX::PurePerl::Reader::URI->new($uri); |
|
71 return $self->_parse($reader); |
|
72 } |
|
73 |
|
74 sub _parse { |
|
75 my ($self, $reader) = @_; |
|
76 |
|
77 $reader->public_id($self->{ParseOptions}{Source}{PublicId}); |
|
78 $reader->system_id($self->{ParseOptions}{Source}{SystemId}); |
|
79 |
|
80 $self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1}); |
|
81 |
|
82 $self->set_document_locator( |
|
83 XML::SAX::DocumentLocator->new( |
|
84 sub { $reader->public_id }, |
|
85 sub { $reader->system_id }, |
|
86 sub { $reader->line }, |
|
87 sub { $reader->column }, |
|
88 sub { $reader->get_encoding }, |
|
89 sub { $reader->get_xml_version }, |
|
90 ), |
|
91 ); |
|
92 |
|
93 $self->start_document({}); |
|
94 |
|
95 if (defined $self->{ParseOptions}{Source}{Encoding}) { |
|
96 $reader->set_encoding($self->{ParseOptions}{Source}{Encoding}); |
|
97 } |
|
98 else { |
|
99 $self->encoding_detect($reader); |
|
100 } |
|
101 |
|
102 # parse a document |
|
103 $self->document($reader); |
|
104 |
|
105 return $self->end_document({}); |
|
106 } |
|
107 |
|
108 sub parser_error { |
|
109 my $self = shift; |
|
110 my ($error, $reader) = @_; |
|
111 |
|
112 # warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n"); |
|
113 my $exception = XML::SAX::Exception::Parse->new( |
|
114 Message => $error, |
|
115 ColumnNumber => $reader->column, |
|
116 LineNumber => $reader->line, |
|
117 PublicId => $reader->public_id, |
|
118 SystemId => $reader->system_id, |
|
119 ); |
|
120 |
|
121 $self->fatal_error($exception); |
|
122 $exception->throw; |
|
123 } |
|
124 |
|
125 sub document { |
|
126 my ($self, $reader) = @_; |
|
127 |
|
128 # document ::= prolog element Misc* |
|
129 |
|
130 $self->prolog($reader); |
|
131 $self->element($reader) || |
|
132 $self->parser_error("Document requires an element", $reader); |
|
133 |
|
134 while(length($reader->data)) { |
|
135 $self->Misc($reader) || |
|
136 $self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader); |
|
137 } |
|
138 } |
|
139 |
|
140 sub prolog { |
|
141 my ($self, $reader) = @_; |
|
142 |
|
143 $self->XMLDecl($reader); |
|
144 |
|
145 # consume all misc bits |
|
146 1 while($self->Misc($reader)); |
|
147 |
|
148 if ($self->doctypedecl($reader)) { |
|
149 while (length($reader->data)) { |
|
150 $self->Misc($reader) || last; |
|
151 } |
|
152 } |
|
153 } |
|
154 |
|
155 sub element { |
|
156 my ($self, $reader) = @_; |
|
157 |
|
158 return 0 unless $reader->match('<'); |
|
159 |
|
160 my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader); |
|
161 |
|
162 my %attribs; |
|
163 |
|
164 while( my ($k, $v) = $self->Attribute($reader) ) { |
|
165 $attribs{$k} = $v; |
|
166 } |
|
167 |
|
168 my $have_namespaces = $self->get_feature(Namespaces); |
|
169 |
|
170 # Namespace processing |
|
171 $self->{NSHelper}->push_context; |
|
172 my @new_ns; |
|
173 # my %attrs = @attribs; |
|
174 # while (my ($k,$v) = each %attrs) { |
|
175 if ($have_namespaces) { |
|
176 while ( my ($k, $v) = each %attribs ) { |
|
177 if ($k =~ m/^xmlns(:(.*))?$/) { |
|
178 my $prefix = $2 || ''; |
|
179 $self->{NSHelper}->declare_prefix($prefix, $v); |
|
180 my $ns = |
|
181 { |
|
182 Prefix => $prefix, |
|
183 NamespaceURI => $v, |
|
184 }; |
|
185 push @new_ns, $ns; |
|
186 $self->SUPER::start_prefix_mapping($ns); |
|
187 } |
|
188 } |
|
189 } |
|
190 |
|
191 # Create element object and fire event |
|
192 my %attrib_hash; |
|
193 while (my ($name, $value) = each %attribs ) { |
|
194 # TODO normalise value here |
|
195 my ($ns, $prefix, $lname); |
|
196 if ($have_namespaces) { |
|
197 ($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name); |
|
198 } |
|
199 $ns ||= ''; $prefix ||= ''; $lname ||= ''; |
|
200 $attrib_hash{"{$ns}$lname"} = { |
|
201 Name => $name, |
|
202 LocalName => $lname, |
|
203 Prefix => $prefix, |
|
204 NamespaceURI => $ns, |
|
205 Value => $value, |
|
206 }; |
|
207 } |
|
208 |
|
209 %attribs = (); # lose the memory since we recurse deep |
|
210 |
|
211 my ($ns, $prefix, $lname); |
|
212 if ($self->get_feature(Namespaces)) { |
|
213 ($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name); |
|
214 } |
|
215 else { |
|
216 $lname = $name; |
|
217 } |
|
218 $ns ||= ''; $prefix ||= ''; $lname ||= ''; |
|
219 |
|
220 # Process remainder of start_element |
|
221 $self->skip_whitespace($reader); |
|
222 my $have_content; |
|
223 my $data = $reader->data(2); |
|
224 if ($data =~ /^\/>/) { |
|
225 $reader->move_along(2); |
|
226 } |
|
227 else { |
|
228 $data =~ /^>/ or $self->parser_error("No close element tag", $reader); |
|
229 $reader->move_along(1); |
|
230 $have_content++; |
|
231 } |
|
232 |
|
233 my $el = |
|
234 { |
|
235 Name => $name, |
|
236 LocalName => $lname, |
|
237 Prefix => $prefix, |
|
238 NamespaceURI => $ns, |
|
239 Attributes => \%attrib_hash, |
|
240 }; |
|
241 $self->start_element($el); |
|
242 |
|
243 # warn("($name\n"); |
|
244 |
|
245 if ($have_content) { |
|
246 $self->content($reader); |
|
247 |
|
248 my $data = $reader->data(2); |
|
249 $data =~ /^<\// or $self->parser_error("No close tag marker", $reader); |
|
250 $reader->move_along(2); |
|
251 my $end_name = $self->Name($reader); |
|
252 $end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader); |
|
253 $self->skip_whitespace($reader); |
|
254 $reader->match('>') or $self->parser_error("No close '>' on end tag", $reader); |
|
255 } |
|
256 |
|
257 my %end_el = %$el; |
|
258 delete $end_el{Attributes}; |
|
259 $self->end_element(\%end_el); |
|
260 |
|
261 for my $ns (@new_ns) { |
|
262 $self->end_prefix_mapping($ns); |
|
263 } |
|
264 $self->{NSHelper}->pop_context; |
|
265 |
|
266 return 1; |
|
267 } |
|
268 |
|
269 sub content { |
|
270 my ($self, $reader) = @_; |
|
271 |
|
272 while (1) { |
|
273 $self->CharData($reader); |
|
274 |
|
275 my $data = $reader->data(2); |
|
276 |
|
277 if ($data =~ /^<\//) { |
|
278 return 1; |
|
279 } |
|
280 elsif ($data =~ /^&/) { |
|
281 $self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader); |
|
282 next; |
|
283 } |
|
284 elsif ($data =~ /^<!/) { |
|
285 ($self->CDSect($reader) |
|
286 or |
|
287 $self->Comment($reader)) |
|
288 and next; |
|
289 } |
|
290 elsif ($data =~ /^<\?/) { |
|
291 $self->PI($reader) and next; |
|
292 } |
|
293 elsif ($data =~ /^</) { |
|
294 $self->element($reader) and next; |
|
295 } |
|
296 last; |
|
297 } |
|
298 |
|
299 return 1; |
|
300 } |
|
301 |
|
302 sub CDSect { |
|
303 my ($self, $reader) = @_; |
|
304 |
|
305 my $data = $reader->data(9); |
|
306 return 0 unless $data =~ /^<!\[CDATA\[/; |
|
307 $reader->move_along(9); |
|
308 |
|
309 $self->start_cdata({}); |
|
310 |
|
311 $data = $reader->data; |
|
312 while (1) { |
|
313 $self->parser_error("EOF looking for CDATA section end", $reader) |
|
314 unless length($data); |
|
315 |
|
316 if ($data =~ /^(.*?)\]\]>/s) { |
|
317 my $chars = $1; |
|
318 $reader->move_along(length($chars) + 3); |
|
319 $self->characters({Data => $chars}); |
|
320 last; |
|
321 } |
|
322 elsif ($data =~ /(.*?)\]+$/s) { |
|
323 my $chars = $1; |
|
324 $reader->move_along(length($chars)); |
|
325 $self->characters({Data => $chars}); |
|
326 $data = $reader->data(3); |
|
327 } |
|
328 else { |
|
329 $self->characters({Data => $data}); |
|
330 $reader->move_along(length($data)); |
|
331 $data = $reader->data; |
|
332 } |
|
333 } |
|
334 $self->end_cdata({}); |
|
335 return 1; |
|
336 } |
|
337 |
|
338 sub CharData { |
|
339 my ($self, $reader) = @_; |
|
340 |
|
341 my $data = $reader->data; |
|
342 |
|
343 while (1) { |
|
344 return unless length($data); |
|
345 |
|
346 if ($data =~ /^([^<&]*)[<&]/s) { |
|
347 my $chars = $1; |
|
348 $self->parser_error("String ']]>' not allowed in character data", $reader) |
|
349 if $chars =~ /\]\]>/; |
|
350 $reader->move_along(length($chars)); |
|
351 $self->characters({Data => $chars}) if length($chars); |
|
352 last; |
|
353 } |
|
354 else { |
|
355 $self->characters({Data => $data}); |
|
356 $reader->move_along(length($data)); |
|
357 $data = $reader->data; |
|
358 } |
|
359 } |
|
360 } |
|
361 |
|
362 sub Misc { |
|
363 my ($self, $reader) = @_; |
|
364 if ($self->Comment($reader)) { |
|
365 return 1; |
|
366 } |
|
367 elsif ($self->PI($reader)) { |
|
368 return 1; |
|
369 } |
|
370 elsif ($self->skip_whitespace($reader)) { |
|
371 return 1; |
|
372 } |
|
373 |
|
374 return 0; |
|
375 } |
|
376 |
|
377 sub Reference { |
|
378 my ($self, $reader) = @_; |
|
379 |
|
380 return 0 unless $reader->match('&'); |
|
381 |
|
382 my $data = $reader->data; |
|
383 |
|
384 if ($data =~ /^#x([0-9a-fA-F]+);/) { |
|
385 my $ref = $1; |
|
386 $reader->move_along(length($ref) + 3); |
|
387 my $char = chr_ref(hex($ref)); |
|
388 $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) |
|
389 unless $char =~ /$SingleChar/o; |
|
390 $self->characters({ Data => $char }); |
|
391 return 1; |
|
392 } |
|
393 elsif ($data =~ /^#([0-9]+);/) { |
|
394 my $ref = $1; |
|
395 $reader->move_along(length($ref) + 2); |
|
396 my $char = chr_ref($ref); |
|
397 $self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader) |
|
398 unless $char =~ /$SingleChar/o; |
|
399 $self->characters({ Data => $char }); |
|
400 return 1; |
|
401 } |
|
402 else { |
|
403 # EntityRef |
|
404 my $name = $self->Name($reader) |
|
405 || $self->parser_error("Invalid name in entity", $reader); |
|
406 $reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader); |
|
407 |
|
408 # warn("got entity: \&$name;\n"); |
|
409 |
|
410 # expand it |
|
411 if ($self->_is_entity($name)) { |
|
412 |
|
413 if ($self->_is_external($name)) { |
|
414 my $value = $self->_get_entity($name); |
|
415 my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value); |
|
416 $self->encoding_detect($ent_reader); |
|
417 $self->extParsedEnt($ent_reader); |
|
418 } |
|
419 else { |
|
420 my $value = $self->_stringify_entity($name); |
|
421 my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value); |
|
422 $self->content($ent_reader); |
|
423 } |
|
424 return 1; |
|
425 } |
|
426 elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) { |
|
427 $self->characters({ Data => $int_ents{$name} }); |
|
428 return 1; |
|
429 } |
|
430 else { |
|
431 $self->parser_error("Undeclared entity", $reader); |
|
432 } |
|
433 } |
|
434 } |
|
435 |
|
436 sub AttReference { |
|
437 my ($self, $name, $reader) = @_; |
|
438 if ($name =~ /^#x([0-9a-fA-F]+)$/) { |
|
439 my $chr = chr_ref(hex($1)); |
|
440 $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); |
|
441 return $chr; |
|
442 } |
|
443 elsif ($name =~ /^#([0-9]+)$/) { |
|
444 my $chr = chr_ref($1); |
|
445 $chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader); |
|
446 return $chr; |
|
447 } |
|
448 else { |
|
449 if ($self->_is_entity($name)) { |
|
450 if ($self->_is_external($name)) { |
|
451 $self->parser_error("No external entity references allowed in attribute values", $reader); |
|
452 } |
|
453 else { |
|
454 my $value = $self->_stringify_entity($name); |
|
455 return $value; |
|
456 } |
|
457 } |
|
458 elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) { |
|
459 return $int_ents{$name}; |
|
460 } |
|
461 else { |
|
462 $self->parser_error("Undeclared entity '$name'", $reader); |
|
463 } |
|
464 } |
|
465 } |
|
466 |
|
467 sub extParsedEnt { |
|
468 my ($self, $reader) = @_; |
|
469 |
|
470 $self->TextDecl($reader); |
|
471 $self->content($reader); |
|
472 } |
|
473 |
|
474 sub _is_external { |
|
475 my ($self, $name) = @_; |
|
476 # TODO: Fix this to use $reader to store the entities perhaps. |
|
477 if ($self->{ParseOptions}{external_entities}{$name}) { |
|
478 return 1; |
|
479 } |
|
480 return ; |
|
481 } |
|
482 |
|
483 sub _is_entity { |
|
484 my ($self, $name) = @_; |
|
485 # TODO: ditto above |
|
486 if (exists $self->{ParseOptions}{entities}{$name}) { |
|
487 return 1; |
|
488 } |
|
489 return 0; |
|
490 } |
|
491 |
|
492 sub _stringify_entity { |
|
493 my ($self, $name) = @_; |
|
494 # TODO: ditto above |
|
495 if (exists $self->{ParseOptions}{expanded_entity}{$name}) { |
|
496 return $self->{ParseOptions}{expanded_entity}{$name}; |
|
497 } |
|
498 # expand |
|
499 my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name}); |
|
500 my $ent = ''; |
|
501 while(1) { |
|
502 my $data = $reader->data; |
|
503 $ent .= $data; |
|
504 $reader->move_along(length($data)) or last; |
|
505 } |
|
506 return $self->{ParseOptions}{expanded_entity}{$name} = $ent; |
|
507 } |
|
508 |
|
509 sub _get_entity { |
|
510 my ($self, $name) = @_; |
|
511 # TODO: ditto above |
|
512 return $self->{ParseOptions}{entities}{$name}; |
|
513 } |
|
514 |
|
515 sub skip_whitespace { |
|
516 my ($self, $reader) = @_; |
|
517 |
|
518 my $data = $reader->data; |
|
519 |
|
520 my $found = 0; |
|
521 while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) { |
|
522 last unless length($1); |
|
523 $found++; |
|
524 $reader->move_along(length($1)); |
|
525 $data = $reader->data; |
|
526 } |
|
527 |
|
528 return $found; |
|
529 } |
|
530 |
|
531 sub Attribute { |
|
532 my ($self, $reader) = @_; |
|
533 |
|
534 $self->skip_whitespace($reader) || return; |
|
535 |
|
536 my $data = $reader->data(2); |
|
537 return if $data =~ /^\/?>/; |
|
538 |
|
539 if (my $name = $self->Name($reader)) { |
|
540 $self->skip_whitespace($reader); |
|
541 $reader->match('=') or $self->parser_error("No '=' in Attribute", $reader); |
|
542 $self->skip_whitespace($reader); |
|
543 my $value = $self->AttValue($reader); |
|
544 |
|
545 if (!$self->cdata_attrib($name)) { |
|
546 $value =~ s/^\x20*//; # discard leading spaces |
|
547 $value =~ s/\x20*$//; # discard trailing spaces |
|
548 $value =~ s/ {1,}/ /g; # all >1 space to single space |
|
549 } |
|
550 |
|
551 return $name, $value; |
|
552 } |
|
553 |
|
554 return; |
|
555 } |
|
556 |
|
557 sub cdata_attrib { |
|
558 # TODO implement this! |
|
559 return 1; |
|
560 } |
|
561 |
|
562 sub AttValue { |
|
563 my ($self, $reader) = @_; |
|
564 |
|
565 my $quote = $self->quote($reader); |
|
566 |
|
567 my $value = ''; |
|
568 |
|
569 while (1) { |
|
570 my $data = $reader->data; |
|
571 $self->parser_error("EOF found while looking for the end of attribute value", $reader) |
|
572 unless length($data); |
|
573 if ($data =~ /^([^$quote]*)$quote/) { |
|
574 $reader->move_along(length($1) + 1); |
|
575 $value .= $1; |
|
576 last; |
|
577 } |
|
578 else { |
|
579 $value .= $data; |
|
580 $reader->move_along(length($data)); |
|
581 } |
|
582 } |
|
583 |
|
584 if ($value =~ /</) { |
|
585 $self->parser_error("< character not allowed in attribute values", $reader); |
|
586 } |
|
587 |
|
588 $value =~ s/[\x09\x0A\x0D]/\x20/g; |
|
589 $value =~ s/&(#(x[0-9a-fA-F]+)|([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo; |
|
590 |
|
591 return $value; |
|
592 } |
|
593 |
|
594 sub Comment { |
|
595 my ($self, $reader) = @_; |
|
596 |
|
597 my $data = $reader->data(4); |
|
598 if ($data =~ /^<!--/) { |
|
599 $reader->move_along(4); |
|
600 my $comment_str = ''; |
|
601 while (1) { |
|
602 my $data = $reader->data; |
|
603 $self->parser_error("End of data seen while looking for close comment marker", $reader) |
|
604 unless length($data); |
|
605 if ($data =~ /^(.*?)-->/s) { |
|
606 $comment_str .= $1; |
|
607 $self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/; |
|
608 $reader->move_along(length($1) + 3); |
|
609 last; |
|
610 } |
|
611 else { |
|
612 $comment_str .= $data; |
|
613 $reader->move_along(length($data)); |
|
614 } |
|
615 } |
|
616 |
|
617 $self->comment({ Data => $comment_str }); |
|
618 |
|
619 return 1; |
|
620 } |
|
621 return 0; |
|
622 } |
|
623 |
|
624 sub PI { |
|
625 my ($self, $reader) = @_; |
|
626 |
|
627 my $data = $reader->data(2); |
|
628 |
|
629 if ($data =~ /^<\?/) { |
|
630 $reader->move_along(2); |
|
631 my ($target, $data); |
|
632 $target = $self->Name($reader) || |
|
633 $self->parser_error("PI has no target", $reader); |
|
634 if ($self->skip_whitespace($reader)) { |
|
635 $target = ''; |
|
636 while (1) { |
|
637 my $data = $reader->data; |
|
638 $self->parser_error("End of data seen while looking for close PI marker", $reader) |
|
639 unless length($data); |
|
640 if ($data =~ /^(.*?)\?>/s) { |
|
641 $target .= $1; |
|
642 $reader->move_along(length($1) + 2); |
|
643 last; |
|
644 } |
|
645 else { |
|
646 $target .= $data; |
|
647 $reader->move_along(length($data)); |
|
648 } |
|
649 } |
|
650 } |
|
651 else { |
|
652 my $data = $reader->data(2); |
|
653 $data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader); |
|
654 $reader->move_along(2); |
|
655 } |
|
656 $self->processing_instruction({ Target => $target, Data => $data }); |
|
657 |
|
658 return 1; |
|
659 } |
|
660 return 0; |
|
661 } |
|
662 |
|
663 sub Name { |
|
664 my ($self, $reader) = @_; |
|
665 |
|
666 my $name = ''; |
|
667 while(1) { |
|
668 my $data = $reader->data; |
|
669 return unless length($data); |
|
670 $data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*]*)/ or return; |
|
671 $name .= $1; |
|
672 my $len = length($1); |
|
673 $reader->move_along($len); |
|
674 last if ($len != length($data)); |
|
675 } |
|
676 |
|
677 return unless length($name); |
|
678 |
|
679 $name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader); |
|
680 |
|
681 return $name; |
|
682 } |
|
683 |
|
684 sub quote { |
|
685 my ($self, $reader) = @_; |
|
686 |
|
687 my $data = $reader->data; |
|
688 |
|
689 $data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader); |
|
690 $reader->move_along(1); |
|
691 return $1; |
|
692 } |
|
693 |
|
694 1; |
|
695 __END__ |
|
696 |
|
697 =head1 NAME |
|
698 |
|
699 XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface |
|
700 |
|
701 =head1 SYNOPSIS |
|
702 |
|
703 use XML::Handler::Foo; |
|
704 use XML::SAX::PurePerl; |
|
705 my $handler = XML::Handler::Foo->new(); |
|
706 my $parser = XML::SAX::PurePerl->new(Handler => $handler); |
|
707 $parser->parse_uri("myfile.xml"); |
|
708 |
|
709 =head1 DESCRIPTION |
|
710 |
|
711 This module implements an XML parser in pure perl. It is written around the |
|
712 upcoming perl 5.8's unicode support and support for multiple document |
|
713 encodings (using the PerlIO layer), however it has been ported to work with |
|
714 ASCII/UTF8 documents under lower perl versions. |
|
715 |
|
716 The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in |
|
717 the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a |
|
718 better location soon. |
|
719 |
|
720 Please refer to the SAX2 documentation for how to use this module - it is merely a |
|
721 front end to SAX2, and implements nothing that is not in that spec (or at least tries |
|
722 not to - please email me if you find errors in this implementation). |
|
723 |
|
724 =head1 BUGS |
|
725 |
|
726 XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else |
|
727 in fact. However it is great as a fallback parser for XML::SAX, where the |
|
728 user might not be able to install an XS based parser or C library. |
|
729 |
|
730 Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations, |
|
731 though the code is in place to start doing this. Also parsing parameter entity |
|
732 references is causing me much confusion, since it's not exactly what I would call |
|
733 trivial, or well documented in the XML grammar. XML documents with internal subsets |
|
734 are likely to fail. |
|
735 |
|
736 I am however trying to work towards full conformance using the Oasis test suite. |
|
737 |
|
738 =head1 AUTHOR |
|
739 |
|
740 Matt Sergeant, matt@sergeant.org. Copyright 2001. |
|
741 |
|
742 Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com. |
|
743 |
|
744 =head1 LICENSE |
|
745 |
|
746 This is free software. You may use it or redistribute it under the same terms as |
|
747 Perl 5.7.2 itself. |
|
748 |
|
749 =cut |
|
750 |