|
1 package XML::Checker::Parser; |
|
2 use strict; |
|
3 use XML::Parser; |
|
4 use XML::Checker; |
|
5 |
|
6 use vars qw( @ISA @InterceptedHandlers @SGML_SEARCH_PATH %URI_MAP |
|
7 $_checker $_prevFAIL |
|
8 $_Init $_Final $_Char $_Start $_End $_Element $_Attlist |
|
9 $_Doctype $_Unparsed $_Notation $_Entity $_skipInsignifWS |
|
10 $_EndOfDoc |
|
11 ); |
|
12 |
|
13 @ISA = qw( XML::Parser ); |
|
14 |
|
15 @InterceptedHandlers = qw( Init Final Char Start End Element Attlist |
|
16 Doctype Unparsed Notation Entity ); |
|
17 |
|
18 # Where to search for external DTDs (in local file system) |
|
19 @SGML_SEARCH_PATH = (); |
|
20 |
|
21 # Where to search for external DTDs as referred to by public ID in a |
|
22 # <!DOCTYPE ...> statement, e.g. "-//W3C//DTD HTML 4.0//EN" |
|
23 # E.g. it could map "-//W3C//DTD HTML 4.0//EN" to "file:/user/html.dtd" |
|
24 %URI_MAP = (); |
|
25 |
|
26 sub new |
|
27 { |
|
28 my ($class, %args) = @_; |
|
29 |
|
30 my $super = new XML::Parser (%args); |
|
31 $super->{Checker} = new XML::Checker (%args); |
|
32 |
|
33 my %handlers = %{$super->{Handlers}}; |
|
34 |
|
35 # Don't need Comment handler - assuming comments are allowed anywhere |
|
36 #?? What should Default handler do? |
|
37 #?? Check XMLDecl, ExternEnt, Proc? No, for now. |
|
38 #?? Add CdataStart, CdataEnd support? |
|
39 |
|
40 for (@InterceptedHandlers) |
|
41 { |
|
42 my $func = "XML::Checker::Parser::$_"; |
|
43 $handlers{$_} = \&$func; |
|
44 } |
|
45 |
|
46 $super->{UserHandlers} = $super->{Handlers}; |
|
47 $super->{Handlers} = \%handlers; |
|
48 |
|
49 bless $super, $class; |
|
50 } |
|
51 |
|
52 sub getChecker |
|
53 { |
|
54 $_[0]->{Checker} |
|
55 } |
|
56 |
|
57 sub parse |
|
58 { |
|
59 my $self = shift; |
|
60 my $uh = $self->{UserHandlers}; |
|
61 |
|
62 local $_checker = $self->{Checker}; |
|
63 |
|
64 local $_Init = $uh->{Init}; |
|
65 local $_Final = $uh->{Final}; |
|
66 local $_Start = $uh->{Start}; |
|
67 local $_End = $uh->{End}; |
|
68 local $_Char = $uh->{Char}; |
|
69 local $_Element = $uh->{'Element'}; |
|
70 local $_Attlist = $uh->{'Attlist'}; |
|
71 local $_Doctype = $uh->{Doctype}; |
|
72 local $_Unparsed = $uh->{Unparsed}; |
|
73 local $_Notation = $uh->{Notation}; |
|
74 local $_Entity = $uh->{Entity}; |
|
75 |
|
76 local $_prevFAIL = $XML::Checker::FAIL; |
|
77 local $XML::Checker::FAIL = \&fail_add_context; |
|
78 |
|
79 local $XML::Checker::INSIGNIF_WS = 0; |
|
80 local $_skipInsignifWS = $self->{SkipInsignifWS}; |
|
81 |
|
82 local $_EndOfDoc = 0; |
|
83 |
|
84 $self->SUPER::parse (@_); |
|
85 } |
|
86 |
|
87 my $LWP_USER_AGENT; |
|
88 sub set_LWP_UserAgent # static |
|
89 { |
|
90 $LWP_USER_AGENT = shift; |
|
91 } |
|
92 |
|
93 sub load_URL # static |
|
94 { |
|
95 my ($url, $lwp_user_agent) = @_; |
|
96 my $result; |
|
97 |
|
98 # Read the file from the web with LWP. |
|
99 # |
|
100 # Note that we read in the entire file, which may not be ideal |
|
101 # for large files. LWP::UserAgent also provides a callback style |
|
102 # request, which we could convert to a stream with a fork()... |
|
103 |
|
104 my $response; |
|
105 eval |
|
106 { |
|
107 use LWP::UserAgent; |
|
108 |
|
109 my $ua = $lwp_user_agent; |
|
110 unless (defined $ua) |
|
111 { |
|
112 unless (defined $LWP_USER_AGENT) |
|
113 { |
|
114 $LWP_USER_AGENT = LWP::UserAgent->new; |
|
115 |
|
116 # Load proxy settings from environment variables, i.e.: |
|
117 # http_proxy, ftp_proxy, no_proxy etc. (see LWP::UserAgent(3)) |
|
118 # You need these to go thru firewalls. |
|
119 $LWP_USER_AGENT->env_proxy; |
|
120 } |
|
121 $ua = $LWP_USER_AGENT; |
|
122 } |
|
123 my $req = new HTTP::Request 'GET', $url; |
|
124 $response = $LWP_USER_AGENT->request ($req); |
|
125 $result = $response->content; |
|
126 }; |
|
127 if ($@) |
|
128 { |
|
129 die "Couldn't load URL [$url] with LWP: $@"; |
|
130 } |
|
131 if (!$result) |
|
132 { |
|
133 my $message = $response->as_string; |
|
134 die "Couldn't load URL [$url] with LWP: $message"; |
|
135 } |
|
136 return $result; |
|
137 } |
|
138 |
|
139 sub parsefile |
|
140 { |
|
141 my $self = shift; |
|
142 my $url = shift; |
|
143 |
|
144 # Any other URL schemes? |
|
145 if ($url =~ /^(https?|ftp|wais|gopher|file):/) |
|
146 { |
|
147 my $xml = load_URL ($url, $self->{LWP_UserAgent}); |
|
148 my $result; |
|
149 eval |
|
150 { |
|
151 # Parse the result of the HTTP request |
|
152 $result = $self->parse ($xml, @_); |
|
153 }; |
|
154 if ($@) |
|
155 { |
|
156 die "Couldn't parsefile [$url]: $@"; |
|
157 } |
|
158 return $result; |
|
159 } |
|
160 else |
|
161 { |
|
162 return $self->SUPER::parsefile ($url, @_); |
|
163 } |
|
164 } |
|
165 |
|
166 sub Init |
|
167 { |
|
168 my $expat = shift; |
|
169 $_checker->{Expat} = $expat; |
|
170 |
|
171 $_checker->Init (@_); |
|
172 &$_Init ($expat) if $_Init; |
|
173 } |
|
174 |
|
175 sub Final |
|
176 { |
|
177 my $expat = shift; |
|
178 $_EndOfDoc = 1; |
|
179 |
|
180 $_checker->Final (@_); |
|
181 my $result = &$_Final ($expat) if $_Final; |
|
182 |
|
183 # Decouple Expat from Checker |
|
184 delete $_checker->{Expat}; |
|
185 |
|
186 # NOTE: Checker is not decoupled |
|
187 return $result; |
|
188 } |
|
189 |
|
190 sub Start |
|
191 { |
|
192 my ($expat, $tag, @attr) = @_; |
|
193 |
|
194 $_checker->Start ($tag); |
|
195 |
|
196 my $num_spec = $expat->specified_attr; |
|
197 for (my $i = 0; $i < @attr; $i++) |
|
198 { |
|
199 my $spec = ($i < $num_spec); |
|
200 my $attr = $attr[$i]; |
|
201 my $val = $attr[++$i]; |
|
202 |
|
203 # print "--- $tag $attr $val $spec\n"; |
|
204 $_checker->Attr ($tag, $attr, $val, $spec); |
|
205 } |
|
206 $_checker->EndAttr; |
|
207 |
|
208 &$_Start ($expat, $tag, @attr) if $_Start; |
|
209 } |
|
210 |
|
211 sub End |
|
212 { |
|
213 my $expat = shift; |
|
214 $_checker->End (@_); |
|
215 &$_End ($expat, @_) if $_End; |
|
216 } |
|
217 |
|
218 sub Char |
|
219 { |
|
220 my $expat = shift; |
|
221 $_checker->Char (@_); |
|
222 &$_Char ($expat, @_) |
|
223 if $_Char && !($XML::Checker::INSIGNIF_WS && $_skipInsignifWS); |
|
224 # Skip insignificant whitespace |
|
225 } |
|
226 |
|
227 sub Element |
|
228 { |
|
229 my $expat = shift; |
|
230 $_checker->Element (@_); |
|
231 &$_Element ($expat, @_) if $_Element; |
|
232 } |
|
233 |
|
234 sub Attlist |
|
235 { |
|
236 my $expat = shift; |
|
237 $_checker->Attlist (@_); |
|
238 &$_Attlist ($expat, @_) if $_Attlist; |
|
239 } |
|
240 |
|
241 |
|
242 sub Doctype |
|
243 { |
|
244 my $expat = shift; |
|
245 my ($name, $sysid, $pubid, $internal) = @_; |
|
246 |
|
247 my $dtd; |
|
248 unless ($_checker->{SkipExternalDTD}) |
|
249 { |
|
250 if ($sysid) |
|
251 { |
|
252 # External DTD... |
|
253 |
|
254 #?? I'm not sure if we should die here or keep going? |
|
255 $dtd = load_DTD ($sysid, $expat->{LWP_UserAgent}); |
|
256 } |
|
257 elsif ($pubid) |
|
258 { |
|
259 $dtd = load_DTD ($pubid, $expat->{LWP_UserAgent}); |
|
260 } |
|
261 } |
|
262 |
|
263 if (defined $dtd) |
|
264 { |
|
265 #?? what about passing ProtocolEncoding, Namespaces, Stream_Delimiter ? |
|
266 my $parser = new XML::Parser ( |
|
267 Checker => $_checker, |
|
268 ErrorContext => $expat->{ErrorContext}, |
|
269 Handlers => { |
|
270 Entity => \&XML::Checker::Parser::ExternalDTD::Entity, |
|
271 Notation => \&XML::Checker::Parser::ExternalDTD::Notation, |
|
272 Element => \&XML::Checker::Parser::ExternalDTD::Element, |
|
273 Attlist => \&XML::Checker::Parser::ExternalDTD::Attlist, |
|
274 Unparsed => \&XML::Checker::Parser::ExternalDTD::Unparsed, |
|
275 }); |
|
276 |
|
277 eval |
|
278 { |
|
279 $parser->parse ("<!DOCTYPE $name SYSTEM '$sysid' [\n$dtd\n]>\n<$name/>"); |
|
280 }; |
|
281 if ($@) |
|
282 { |
|
283 die "Couldn't parse contents of external DTD <$sysid> :$@"; |
|
284 } |
|
285 } |
|
286 $_checker->Doctype (@_); |
|
287 &$_Doctype ($expat, @_) if $_Doctype; |
|
288 } |
|
289 |
|
290 sub Unparsed |
|
291 { |
|
292 my $expat = shift; |
|
293 $_checker->Unparsed (@_); |
|
294 &$_Unparsed ($expat, @_) if $_Unparsed; |
|
295 } |
|
296 |
|
297 sub Entity |
|
298 { |
|
299 my $expat = shift; |
|
300 $_checker->Entity (@_); |
|
301 &$_Entity ($expat, @_) if $_Entity; |
|
302 } |
|
303 |
|
304 sub Notation |
|
305 { |
|
306 my $expat = shift; |
|
307 $_checker->Notation (@_); |
|
308 &$_Notation ($expat, @_) if $_Notation; |
|
309 } |
|
310 |
|
311 sub Default |
|
312 { |
|
313 #?? what can I check here? |
|
314 # print "Default handler got[" . join (", ", @_) . "]"; |
|
315 } |
|
316 |
|
317 #sub XMLDecl |
|
318 #{ |
|
319 #?? support later? |
|
320 #} |
|
321 |
|
322 sub setHandlers |
|
323 { |
|
324 my ($self, %h) = @_; |
|
325 |
|
326 for my $name (@InterceptedHandlers) |
|
327 { |
|
328 if (exists $h{$name}) |
|
329 { |
|
330 eval "\$_$name = \$h{$name}"; |
|
331 delete $h{$name}; |
|
332 } |
|
333 } |
|
334 |
|
335 # Pass remaining handlers to the parent class (XML::Parser) |
|
336 $self->SUPER::setHandlers (%h); |
|
337 } |
|
338 |
|
339 # Add (line, column, byte) to error context (unless it's EOF) |
|
340 sub fail_add_context # static |
|
341 { |
|
342 my $e = $_checker->{Expat}; |
|
343 |
|
344 my $byte = $e->current_byte; # -1 means: end of XML document |
|
345 if ($byte != -1 && !$_EndOfDoc) |
|
346 { |
|
347 push @_, (line => $e->current_line, |
|
348 column => $e->current_column, |
|
349 byte => $byte); |
|
350 } |
|
351 &$_prevFAIL (@_); |
|
352 } |
|
353 |
|
354 #-------- STATIC METHODS related to External DTDs --------------------------- |
|
355 |
|
356 sub load_DTD # static |
|
357 { |
|
358 my ($sysid, $lwp_user_agent) = @_; |
|
359 |
|
360 # See if it is defined in the %URI_MAP |
|
361 # (Public IDs are stored here, e.g. "-//W3C//DTD HTML 4.0//EN") |
|
362 if (exists $URI_MAP{$sysid}) |
|
363 { |
|
364 $sysid = $URI_MAP{$sysid}; |
|
365 } |
|
366 elsif ($sysid !~ /^\w+:/) |
|
367 { |
|
368 # Prefix the sysid with 'file:' if it has no protocol identifier |
|
369 unless ($sysid =~ /^\//) |
|
370 { |
|
371 # Not an absolute path. See if it's in SGML_SEARCH_PATH. |
|
372 my $relative_sysid = $sysid; |
|
373 |
|
374 $sysid = find_in_sgml_search_path ($sysid); |
|
375 if (! $sysid) |
|
376 { |
|
377 if ($ENV{'SGML_SEARCH_PATH'}) |
|
378 { |
|
379 die "Couldn't find external DTD [$relative_sysid] in SGML_SEARCH_PATH ($ENV{'SGML_SEARCH_PATH'})"; |
|
380 } |
|
381 else |
|
382 { |
|
383 die "Couldn't find external DTD [$relative_sysid], may be you should set SGML_SEARCH_PATH"; |
|
384 } |
|
385 } |
|
386 } |
|
387 $sysid = "file:$sysid"; |
|
388 } |
|
389 |
|
390 return load_URL ($sysid, $lwp_user_agent); |
|
391 } |
|
392 |
|
393 sub map_uri # static |
|
394 { |
|
395 %URI_MAP = (%URI_MAP, @_); |
|
396 } |
|
397 |
|
398 sub set_sgml_search_path # static |
|
399 { |
|
400 @SGML_SEARCH_PATH = @_; |
|
401 } |
|
402 |
|
403 sub find_in_sgml_search_path # static |
|
404 { |
|
405 my $file = shift; |
|
406 |
|
407 my @dirs = @SGML_SEARCH_PATH; |
|
408 unless (@dirs) |
|
409 { |
|
410 my $path = $ENV{SGML_SEARCH_PATH}; |
|
411 if ($path) |
|
412 { |
|
413 @dirs = split (':', $path); |
|
414 } |
|
415 else |
|
416 { |
|
417 my $home = $ENV{HOME}; |
|
418 @dirs = (".", "$home/.sgml", "/usr/lib/sgml", "/usr/share/sgml"); |
|
419 } |
|
420 } |
|
421 |
|
422 for my $directory (@dirs) |
|
423 { |
|
424 if (-e "$directory/$file") |
|
425 { |
|
426 return "$directory/$file"; |
|
427 } |
|
428 } |
|
429 return undef; |
|
430 } |
|
431 |
|
432 package XML::Checker::Parser::ExternalDTD; |
|
433 |
|
434 sub Element { |
|
435 my $expat = shift; |
|
436 $expat->{Checker}->Element(@_); |
|
437 } |
|
438 |
|
439 sub Attlist { |
|
440 my $expat = shift; |
|
441 $expat->{Checker}->Attlist(@_); |
|
442 } |
|
443 |
|
444 sub Unparsed { |
|
445 my $expat = shift; |
|
446 $expat->{Checker}->Unparsed(@_); |
|
447 } |
|
448 |
|
449 sub Notation { |
|
450 my $expat = shift; |
|
451 $expat->{Checker}->Notation(@_); |
|
452 } |
|
453 |
|
454 sub Entity { |
|
455 my $expat = shift; |
|
456 # print "Entity: $expat\n"; |
|
457 $expat->{Checker}->Entity(@_); |
|
458 } |
|
459 |
|
460 1; # package return code |
|
461 |
|
462 __END__ |
|
463 |
|
464 =head1 NAME |
|
465 |
|
466 XML::Checker::Parser - an XML::Parser that validates at parse time |
|
467 |
|
468 =head1 SYNOPSIS |
|
469 |
|
470 use XML::Checker::Parser; |
|
471 |
|
472 my %expat_options = (KeepCDATA => 1, |
|
473 Handlers => [ Unparsed => \&my_Unparsed_handler ]); |
|
474 my $parser = new XML::Checker::Parser (%expat_options); |
|
475 |
|
476 eval { |
|
477 local $XML::Checker::FAIL = \&my_fail; |
|
478 $parser->parsefile ("fail.xml"); |
|
479 }; |
|
480 if ($@) { |
|
481 # Either XML::Parser (expat) threw an exception or my_fail() died. |
|
482 ... your error handling code here ... |
|
483 } |
|
484 |
|
485 # Throws an exception (with die) when an error is encountered, this |
|
486 # will stop the parsing process. |
|
487 # Don't die if a warning or info message is encountered, just print a message. |
|
488 sub my_fail { |
|
489 my $code = shift; |
|
490 die XML::Checker::error_string ($code, @_) if $code < 200; |
|
491 XML::Checker::print_error ($code, @_); |
|
492 } |
|
493 |
|
494 =head1 DESCRIPTION |
|
495 |
|
496 XML::Checker::Parser extends L<XML::Parser> |
|
497 |
|
498 I hope the example in the SYNOPSIS says it all, just use |
|
499 L<XML::Checker::Parser> as if it were an XML::Parser. |
|
500 See L<XML::Parser> for the supported (expat) options. |
|
501 |
|
502 You can also derive your parser from XML::Checker::Parser instead of |
|
503 from XML::Parser. All you should have to do is replace: |
|
504 |
|
505 package MyParser; |
|
506 @ISA = qw( XML::Parser ); |
|
507 |
|
508 with: |
|
509 |
|
510 package MyParser; |
|
511 @ISA = qw( XML::Checker::Parser ); |
|
512 |
|
513 =head1 XML::Checker::Parser constructor |
|
514 |
|
515 $parser = new XML::Checker::Parser (SkipExternalDTD => 1, SkipInsignifWS => 1); |
|
516 |
|
517 The constructor takes the same parameters as L<XML::Parser> with the following additions: |
|
518 |
|
519 =over 4 |
|
520 |
|
521 =item SkipExternalDTD |
|
522 |
|
523 By default, it will try to load external DTDs using LWP. You can disable this |
|
524 by setting SkipExternalDTD to 1. See L<External DTDs|"External DTDs"> for details. |
|
525 |
|
526 =item SkipInsignifWS |
|
527 |
|
528 By default, it will treat insignificant whitespace as regular Char data. |
|
529 By setting SkipInsignifWS to 1, the user Char handler will not be called |
|
530 if insignificant whitespace is encountered. |
|
531 See L<XML::Checker/INSIGNIFICANT_WHITESPACE> for details. |
|
532 |
|
533 =item LWP_UserAgent |
|
534 |
|
535 When calling parsefile() with a URL (instead of a filename) or when loading |
|
536 external DTDs, we use LWP to download the |
|
537 remote file. By default it will use a L<LWP::UserAgent> that is created as follows: |
|
538 |
|
539 use LWP::UserAgent; |
|
540 $LWP_USER_AGENT = LWP::UserAgent->new; |
|
541 $LWP_USER_AGENT->env_proxy; |
|
542 |
|
543 Note that L<env_proxy> reads proxy settings from your environment variables, |
|
544 which is what I need to do to get thru our firewall. |
|
545 If you want to use a different LWP::UserAgent, you can either set |
|
546 it globally with: |
|
547 |
|
548 XML::Checker::Parser::set_LWP_UserAgent ($my_agent); |
|
549 |
|
550 or, you can specify it for a specific XML::Checker::Parser by passing it to |
|
551 the constructor: |
|
552 |
|
553 my $parser = new XML::Checker::Parser (LWP_UserAgent => $my_agent); |
|
554 |
|
555 Currently, LWP is used when the filename (passed to parsefile) starts with one of |
|
556 the following URL schemes: http, https, ftp, wais, gopher, or file |
|
557 (followed by a colon.) If I missed one, please let me know. |
|
558 |
|
559 The LWP modules are part of libwww-perl which is available at CPAN. |
|
560 |
|
561 =back |
|
562 |
|
563 =head1 External DTDs |
|
564 |
|
565 XML::Checker::Parser will try to load and parse external DTDs that are |
|
566 referenced in DOCTYPE definitions unless you set the B<SkipExternalDTD> |
|
567 option to 1 (the default setting is 0.) |
|
568 See L<CAVEATS|"CAVEATS"> for details on what is not supported by XML::Checker::Parser. |
|
569 |
|
570 L<XML::Parser> (version 2.27 and up) does a much better job at reading external |
|
571 DTDs, because recently external DTD parsing was added to expat. |
|
572 Make sure you set the L<XML::Parser> option B<ParseParamEnt> to 1 and the |
|
573 XML::Checker::Parser option B<SkipExternalDTD> to 1. |
|
574 (They can both be set in the XML::Checker::Parser constructor.) |
|
575 |
|
576 When external DTDs are parsed by XML::Checker::Parser, they are |
|
577 located in the following order: |
|
578 |
|
579 =over 4 |
|
580 |
|
581 =item * |
|
582 |
|
583 With the %URI_MAP, which can be set using B<map_uri>. |
|
584 This hash maps external resource ids (like system ID's and public ID's) |
|
585 to full path URI's. |
|
586 It was meant to aid in resolving PUBLIC IDs found in DOCTYPE declarations |
|
587 after the PUBLIC keyword, e.g. |
|
588 |
|
589 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"> |
|
590 |
|
591 However, you can also use this to force L<XML::Checker> to read DTDs from a |
|
592 different URL than was specified (e.g. from the local file system for |
|
593 performance reasons.) |
|
594 |
|
595 =item * |
|
596 |
|
597 on the Internet, if their system identifier starts with a protocol |
|
598 (like http://...) |
|
599 |
|
600 =item * |
|
601 |
|
602 on the local disk, if their system identifier starts with a slash |
|
603 (absolute path) |
|
604 |
|
605 =item * |
|
606 |
|
607 in the SGML_SEARCH_PATH, if their system identifier is a |
|
608 relative file name. It will use @SGML_SEARCH_PATH if it was set with |
|
609 B<set_sgml_search_path()>, or the colon-separated $ENV{SGML_SEARCH_PATH}, |
|
610 or (if that isn't set) the list (".", "$ENV{'HOME'}/.sgml", "/usr/lib/sgml", |
|
611 "/usr/share/sgml"), which includes the |
|
612 current directory, so it should do the right thing in most cases. |
|
613 |
|
614 =back |
|
615 |
|
616 =head2 Static methods related to External DTDs |
|
617 |
|
618 =over 4 |
|
619 |
|
620 =item set_sgml_search_path (dir1, dir2, ...) |
|
621 |
|
622 External DTDs with relative file paths are looked up using the @SGML_SEARCH_PATH, |
|
623 which can be set with this method. If @SGML_SEARCH_PATH is never set, it |
|
624 will use the colon-separated $ENV{SGML_SEARCH_PATH} instead. If neither are set |
|
625 it uses the list: ".", "$ENV{'HOME'}/.sgml", "/usr/lib/sgml", |
|
626 "/usr/share/sgml". |
|
627 |
|
628 set_sgml_search_path is a static method. |
|
629 |
|
630 =item map_uri (pubid => uri, ...) |
|
631 |
|
632 To define the location of PUBLIC ids, as found in DOCTYPE declarations |
|
633 after the PUBLIC keyword, e.g. |
|
634 |
|
635 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN"> |
|
636 |
|
637 call this method, e.g. |
|
638 |
|
639 XML::Checker::Parser::map_uri ( |
|
640 "-//W3C//DTD HTML 4.0//EN" => "file:/user/html.dtd"); |
|
641 |
|
642 See L<External DTDs|"External DTDs"> for more info. |
|
643 |
|
644 XML::Checker::Parser::map_uri is a static method. |
|
645 |
|
646 =back |
|
647 |
|
648 =head1 Switching user handlers at parse time |
|
649 |
|
650 You should be able to use setHandlers() just as in L<XML::Parser>. |
|
651 (Using setHandlers has not been tested yet.) |
|
652 |
|
653 =head1 Error handling |
|
654 |
|
655 XML::Checker::Parser routes the fail handler through |
|
656 XML::Checker::Parser::fail_add_context() before calling your fail handler |
|
657 (i.e. the global fail handler: $XML::Checker::FAIL. |
|
658 See L<XML::Checker/ERROR_HANDLING>.) |
|
659 It adds the (line, column, byte) information from L<XML::Parser> to the |
|
660 error context (unless it was the end of the XML document.) |
|
661 |
|
662 =head1 Supported XML::Parser handlers |
|
663 |
|
664 Only the following L<XML::Parser> handlers are currently routed through |
|
665 L<XML::Checker>: Init, Final, Char, Start, End, Element, Attlist, Doctype, |
|
666 Unparsed, Notation. |
|
667 |
|
668 =head1 CAVEATS |
|
669 |
|
670 When using XML::Checker::Parser to parse external DTDs |
|
671 (i.e. with SkipExternalDTD => 0), |
|
672 expect trouble when your external DTD contains parameter entities inside |
|
673 declarations or conditional sections. The external DTD should probably have |
|
674 the same encoding as the orignal XML document. |
|
675 |
|
676 =head1 AUTHOR |
|
677 |
|
678 Send bug reports, hints, tips, suggestions to Enno Derksen at |
|
679 <F<enno@att.com>>. |
|
680 |
|
681 =head1 SEE ALSO |
|
682 |
|
683 L<XML::Checker> (L<XML::Checker/SEE_ALSO>), L<XML::Parser> |