176
+ − 1
# $Id: PurePerl.pm,v 1.21 2007/02/07 09:33:50 grant Exp $
+ − 2
+ − 3
package XML::SAX::PurePerl;
+ − 4
+ − 5
use strict;
+ − 6
use vars qw/$VERSION/;
+ − 7
+ − 8
$VERSION = '0.91';
+ − 9
+ − 10
use XML::SAX::PurePerl::Productions qw($Any $CharMinusDash $SingleChar);
+ − 11
use XML::SAX::PurePerl::Reader;
+ − 12
use XML::SAX::PurePerl::EncodingDetect ();
+ − 13
use XML::SAX::Exception;
+ − 14
use XML::SAX::PurePerl::DocType ();
+ − 15
use XML::SAX::PurePerl::DTDDecls ();
+ − 16
use XML::SAX::PurePerl::XMLDecl ();
+ − 17
use XML::SAX::DocumentLocator ();
+ − 18
use XML::SAX::Base ();
+ − 19
use XML::SAX qw(Namespaces);
+ − 20
use XML::NamespaceSupport ();
+ − 21
use IO::File;
+ − 22
+ − 23
if ($] < 5.006) {
+ − 24
require XML::SAX::PurePerl::NoUnicodeExt;
+ − 25
}
+ − 26
else {
+ − 27
require XML::SAX::PurePerl::UnicodeExt;
+ − 28
}
+ − 29
+ − 30
use vars qw(@ISA);
+ − 31
@ISA = ('XML::SAX::Base');
+ − 32
+ − 33
my %int_ents = (
+ − 34
amp => '&',
+ − 35
lt => '<',
+ − 36
gt => '>',
+ − 37
quot => '"',
+ − 38
apos => "'",
+ − 39
);
+ − 40
+ − 41
my $xmlns_ns = "http://www.w3.org/2000/xmlns/";
+ − 42
my $xml_ns = "http://www.w3.org/XML/1998/namespace";
+ − 43
+ − 44
use Carp;
+ − 45
sub _parse_characterstream {
+ − 46
my $self = shift;
+ − 47
my ($fh) = @_;
+ − 48
confess("CharacterStream is not yet correctly implemented");
+ − 49
my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
+ − 50
return $self->_parse($reader);
+ − 51
}
+ − 52
+ − 53
sub _parse_bytestream {
+ − 54
my $self = shift;
+ − 55
my ($fh) = @_;
+ − 56
my $reader = XML::SAX::PurePerl::Reader::Stream->new($fh);
+ − 57
return $self->_parse($reader);
+ − 58
}
+ − 59
+ − 60
sub _parse_string {
+ − 61
my $self = shift;
+ − 62
my ($str) = @_;
+ − 63
my $reader = XML::SAX::PurePerl::Reader::String->new($str);
+ − 64
return $self->_parse($reader);
+ − 65
}
+ − 66
+ − 67
sub _parse_systemid {
+ − 68
my $self = shift;
+ − 69
my ($uri) = @_;
+ − 70
my $reader = XML::SAX::PurePerl::Reader::URI->new($uri);
+ − 71
return $self->_parse($reader);
+ − 72
}
+ − 73
+ − 74
sub _parse {
+ − 75
my ($self, $reader) = @_;
+ − 76
+ − 77
$reader->public_id($self->{ParseOptions}{Source}{PublicId});
+ − 78
$reader->system_id($self->{ParseOptions}{Source}{SystemId});
+ − 79
+ − 80
$self->{NSHelper} = XML::NamespaceSupport->new({xmlns => 1});
+ − 81
+ − 82
$self->set_document_locator(
+ − 83
XML::SAX::DocumentLocator->new(
+ − 84
sub { $reader->public_id },
+ − 85
sub { $reader->system_id },
+ − 86
sub { $reader->line },
+ − 87
sub { $reader->column },
+ − 88
sub { $reader->get_encoding },
+ − 89
sub { $reader->get_xml_version },
+ − 90
),
+ − 91
);
+ − 92
+ − 93
$self->start_document({});
+ − 94
+ − 95
if (defined $self->{ParseOptions}{Source}{Encoding}) {
+ − 96
$reader->set_encoding($self->{ParseOptions}{Source}{Encoding});
+ − 97
}
+ − 98
else {
+ − 99
$self->encoding_detect($reader);
+ − 100
}
+ − 101
+ − 102
# parse a document
+ − 103
$self->document($reader);
+ − 104
+ − 105
return $self->end_document({});
+ − 106
}
+ − 107
+ − 108
sub parser_error {
+ − 109
my $self = shift;
+ − 110
my ($error, $reader) = @_;
+ − 111
+ − 112
# warn("parser error: $error from ", $reader->line, " : ", $reader->column, "\n");
+ − 113
my $exception = XML::SAX::Exception::Parse->new(
+ − 114
Message => $error,
+ − 115
ColumnNumber => $reader->column,
+ − 116
LineNumber => $reader->line,
+ − 117
PublicId => $reader->public_id,
+ − 118
SystemId => $reader->system_id,
+ − 119
);
+ − 120
+ − 121
$self->fatal_error($exception);
+ − 122
$exception->throw;
+ − 123
}
+ − 124
+ − 125
sub document {
+ − 126
my ($self, $reader) = @_;
+ − 127
+ − 128
# document ::= prolog element Misc*
+ − 129
+ − 130
$self->prolog($reader);
+ − 131
$self->element($reader) ||
+ − 132
$self->parser_error("Document requires an element", $reader);
+ − 133
+ − 134
while(length($reader->data)) {
+ − 135
$self->Misc($reader) ||
+ − 136
$self->parser_error("Only Comments, PIs and whitespace allowed at end of document", $reader);
+ − 137
}
+ − 138
}
+ − 139
+ − 140
sub prolog {
+ − 141
my ($self, $reader) = @_;
+ − 142
+ − 143
$self->XMLDecl($reader);
+ − 144
+ − 145
# consume all misc bits
+ − 146
1 while($self->Misc($reader));
+ − 147
+ − 148
if ($self->doctypedecl($reader)) {
+ − 149
while (length($reader->data)) {
+ − 150
$self->Misc($reader) || last;
+ − 151
}
+ − 152
}
+ − 153
}
+ − 154
+ − 155
sub element {
+ − 156
my ($self, $reader) = @_;
+ − 157
+ − 158
return 0 unless $reader->match('<');
+ − 159
+ − 160
my $name = $self->Name($reader) || $self->parser_error("Invalid element name", $reader);
+ − 161
+ − 162
my %attribs;
+ − 163
+ − 164
while( my ($k, $v) = $self->Attribute($reader) ) {
+ − 165
$attribs{$k} = $v;
+ − 166
}
+ − 167
+ − 168
my $have_namespaces = $self->get_feature(Namespaces);
+ − 169
+ − 170
# Namespace processing
+ − 171
$self->{NSHelper}->push_context;
+ − 172
my @new_ns;
+ − 173
# my %attrs = @attribs;
+ − 174
# while (my ($k,$v) = each %attrs) {
+ − 175
if ($have_namespaces) {
+ − 176
while ( my ($k, $v) = each %attribs ) {
+ − 177
if ($k =~ m/^xmlns(:(.*))?$/) {
+ − 178
my $prefix = $2 || '';
+ − 179
$self->{NSHelper}->declare_prefix($prefix, $v);
+ − 180
my $ns =
+ − 181
{
+ − 182
Prefix => $prefix,
+ − 183
NamespaceURI => $v,
+ − 184
};
+ − 185
push @new_ns, $ns;
+ − 186
$self->SUPER::start_prefix_mapping($ns);
+ − 187
}
+ − 188
}
+ − 189
}
+ − 190
+ − 191
# Create element object and fire event
+ − 192
my %attrib_hash;
+ − 193
while (my ($name, $value) = each %attribs ) {
+ − 194
# TODO normalise value here
+ − 195
my ($ns, $prefix, $lname);
+ − 196
if ($have_namespaces) {
+ − 197
($ns, $prefix, $lname) = $self->{NSHelper}->process_attribute_name($name);
+ − 198
}
+ − 199
$ns ||= ''; $prefix ||= ''; $lname ||= '';
+ − 200
$attrib_hash{"{$ns}$lname"} = {
+ − 201
Name => $name,
+ − 202
LocalName => $lname,
+ − 203
Prefix => $prefix,
+ − 204
NamespaceURI => $ns,
+ − 205
Value => $value,
+ − 206
};
+ − 207
}
+ − 208
+ − 209
%attribs = (); # lose the memory since we recurse deep
+ − 210
+ − 211
my ($ns, $prefix, $lname);
+ − 212
if ($self->get_feature(Namespaces)) {
+ − 213
($ns, $prefix, $lname) = $self->{NSHelper}->process_element_name($name);
+ − 214
}
+ − 215
else {
+ − 216
$lname = $name;
+ − 217
}
+ − 218
$ns ||= ''; $prefix ||= ''; $lname ||= '';
+ − 219
+ − 220
# Process remainder of start_element
+ − 221
$self->skip_whitespace($reader);
+ − 222
my $have_content;
+ − 223
my $data = $reader->data(2);
+ − 224
if ($data =~ /^\/>/) {
+ − 225
$reader->move_along(2);
+ − 226
}
+ − 227
else {
+ − 228
$data =~ /^>/ or $self->parser_error("No close element tag", $reader);
+ − 229
$reader->move_along(1);
+ − 230
$have_content++;
+ − 231
}
+ − 232
+ − 233
my $el =
+ − 234
{
+ − 235
Name => $name,
+ − 236
LocalName => $lname,
+ − 237
Prefix => $prefix,
+ − 238
NamespaceURI => $ns,
+ − 239
Attributes => \%attrib_hash,
+ − 240
};
+ − 241
$self->start_element($el);
+ − 242
+ − 243
# warn("($name\n");
+ − 244
+ − 245
if ($have_content) {
+ − 246
$self->content($reader);
+ − 247
+ − 248
my $data = $reader->data(2);
+ − 249
$data =~ /^<\// or $self->parser_error("No close tag marker", $reader);
+ − 250
$reader->move_along(2);
+ − 251
my $end_name = $self->Name($reader);
+ − 252
$end_name eq $name || $self->parser_error("End tag mismatch ($end_name != $name)", $reader);
+ − 253
$self->skip_whitespace($reader);
+ − 254
$reader->match('>') or $self->parser_error("No close '>' on end tag", $reader);
+ − 255
}
+ − 256
+ − 257
my %end_el = %$el;
+ − 258
delete $end_el{Attributes};
+ − 259
$self->end_element(\%end_el);
+ − 260
+ − 261
for my $ns (@new_ns) {
+ − 262
$self->end_prefix_mapping($ns);
+ − 263
}
+ − 264
$self->{NSHelper}->pop_context;
+ − 265
+ − 266
return 1;
+ − 267
}
+ − 268
+ − 269
sub content {
+ − 270
my ($self, $reader) = @_;
+ − 271
+ − 272
while (1) {
+ − 273
$self->CharData($reader);
+ − 274
+ − 275
my $data = $reader->data(2);
+ − 276
+ − 277
if ($data =~ /^<\//) {
+ − 278
return 1;
+ − 279
}
+ − 280
elsif ($data =~ /^&/) {
+ − 281
$self->Reference($reader) or $self->parser_error("bare & not allowed in content", $reader);
+ − 282
next;
+ − 283
}
+ − 284
elsif ($data =~ /^<!/) {
+ − 285
($self->CDSect($reader)
+ − 286
or
+ − 287
$self->Comment($reader))
+ − 288
and next;
+ − 289
}
+ − 290
elsif ($data =~ /^<\?/) {
+ − 291
$self->PI($reader) and next;
+ − 292
}
+ − 293
elsif ($data =~ /^</) {
+ − 294
$self->element($reader) and next;
+ − 295
}
+ − 296
last;
+ − 297
}
+ − 298
+ − 299
return 1;
+ − 300
}
+ − 301
+ − 302
sub CDSect {
+ − 303
my ($self, $reader) = @_;
+ − 304
+ − 305
my $data = $reader->data(9);
+ − 306
return 0 unless $data =~ /^<!\[CDATA\[/;
+ − 307
$reader->move_along(9);
+ − 308
+ − 309
$self->start_cdata({});
+ − 310
+ − 311
$data = $reader->data;
+ − 312
while (1) {
+ − 313
$self->parser_error("EOF looking for CDATA section end", $reader)
+ − 314
unless length($data);
+ − 315
+ − 316
if ($data =~ /^(.*?)\]\]>/s) {
+ − 317
my $chars = $1;
+ − 318
$reader->move_along(length($chars) + 3);
+ − 319
$self->characters({Data => $chars});
+ − 320
last;
+ − 321
}
+ − 322
elsif ($data =~ /(.*?)\]+$/s) {
+ − 323
my $chars = $1;
+ − 324
$reader->move_along(length($chars));
+ − 325
$self->characters({Data => $chars});
+ − 326
$data = $reader->data(3);
+ − 327
}
+ − 328
else {
+ − 329
$self->characters({Data => $data});
+ − 330
$reader->move_along(length($data));
+ − 331
$data = $reader->data;
+ − 332
}
+ − 333
}
+ − 334
$self->end_cdata({});
+ − 335
return 1;
+ − 336
}
+ − 337
+ − 338
sub CharData {
+ − 339
my ($self, $reader) = @_;
+ − 340
+ − 341
my $data = $reader->data;
+ − 342
+ − 343
while (1) {
+ − 344
return unless length($data);
+ − 345
+ − 346
if ($data =~ /^([^<&]*)[<&]/s) {
+ − 347
my $chars = $1;
+ − 348
$self->parser_error("String ']]>' not allowed in character data", $reader)
+ − 349
if $chars =~ /\]\]>/;
+ − 350
$reader->move_along(length($chars));
+ − 351
$self->characters({Data => $chars}) if length($chars);
+ − 352
last;
+ − 353
}
+ − 354
else {
+ − 355
$self->characters({Data => $data});
+ − 356
$reader->move_along(length($data));
+ − 357
$data = $reader->data;
+ − 358
}
+ − 359
}
+ − 360
}
+ − 361
+ − 362
sub Misc {
+ − 363
my ($self, $reader) = @_;
+ − 364
if ($self->Comment($reader)) {
+ − 365
return 1;
+ − 366
}
+ − 367
elsif ($self->PI($reader)) {
+ − 368
return 1;
+ − 369
}
+ − 370
elsif ($self->skip_whitespace($reader)) {
+ − 371
return 1;
+ − 372
}
+ − 373
+ − 374
return 0;
+ − 375
}
+ − 376
+ − 377
sub Reference {
+ − 378
my ($self, $reader) = @_;
+ − 379
+ − 380
return 0 unless $reader->match('&');
+ − 381
+ − 382
my $data = $reader->data;
+ − 383
+ − 384
if ($data =~ /^#x([0-9a-fA-F]+);/) {
+ − 385
my $ref = $1;
+ − 386
$reader->move_along(length($ref) + 3);
+ − 387
my $char = chr_ref(hex($ref));
+ − 388
$self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
+ − 389
unless $char =~ /$SingleChar/o;
+ − 390
$self->characters({ Data => $char });
+ − 391
return 1;
+ − 392
}
+ − 393
elsif ($data =~ /^#([0-9]+);/) {
+ − 394
my $ref = $1;
+ − 395
$reader->move_along(length($ref) + 2);
+ − 396
my $char = chr_ref($ref);
+ − 397
$self->parser_error("Character reference &#$ref; refers to an illegal XML character ($char)", $reader)
+ − 398
unless $char =~ /$SingleChar/o;
+ − 399
$self->characters({ Data => $char });
+ − 400
return 1;
+ − 401
}
+ − 402
else {
+ − 403
# EntityRef
+ − 404
my $name = $self->Name($reader)
+ − 405
|| $self->parser_error("Invalid name in entity", $reader);
+ − 406
$reader->match(';') or $self->parser_error("No semi-colon found after entity name", $reader);
+ − 407
+ − 408
# warn("got entity: \&$name;\n");
+ − 409
+ − 410
# expand it
+ − 411
if ($self->_is_entity($name)) {
+ − 412
+ − 413
if ($self->_is_external($name)) {
+ − 414
my $value = $self->_get_entity($name);
+ − 415
my $ent_reader = XML::SAX::PurePerl::Reader::URI->new($value);
+ − 416
$self->encoding_detect($ent_reader);
+ − 417
$self->extParsedEnt($ent_reader);
+ − 418
}
+ − 419
else {
+ − 420
my $value = $self->_stringify_entity($name);
+ − 421
my $ent_reader = XML::SAX::PurePerl::Reader::String->new($value);
+ − 422
$self->content($ent_reader);
+ − 423
}
+ − 424
return 1;
+ − 425
}
+ − 426
elsif ($name =~ /^(?:amp|gt|lt|quot|apos)$/) {
+ − 427
$self->characters({ Data => $int_ents{$name} });
+ − 428
return 1;
+ − 429
}
+ − 430
else {
+ − 431
$self->parser_error("Undeclared entity", $reader);
+ − 432
}
+ − 433
}
+ − 434
}
+ − 435
+ − 436
sub AttReference {
+ − 437
my ($self, $name, $reader) = @_;
+ − 438
if ($name =~ /^#x([0-9a-fA-F]+)$/) {
+ − 439
my $chr = chr_ref(hex($1));
+ − 440
$chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
+ − 441
return $chr;
+ − 442
}
+ − 443
elsif ($name =~ /^#([0-9]+)$/) {
+ − 444
my $chr = chr_ref($1);
+ − 445
$chr =~ /$SingleChar/o or $self->parser_error("Character reference '&$name;' refers to an illegal XML character", $reader);
+ − 446
return $chr;
+ − 447
}
+ − 448
else {
+ − 449
if ($self->_is_entity($name)) {
+ − 450
if ($self->_is_external($name)) {
+ − 451
$self->parser_error("No external entity references allowed in attribute values", $reader);
+ − 452
}
+ − 453
else {
+ − 454
my $value = $self->_stringify_entity($name);
+ − 455
return $value;
+ − 456
}
+ − 457
}
+ − 458
elsif ($name =~ /^(?:amp|lt|gt|quot|apos)$/) {
+ − 459
return $int_ents{$name};
+ − 460
}
+ − 461
else {
+ − 462
$self->parser_error("Undeclared entity '$name'", $reader);
+ − 463
}
+ − 464
}
+ − 465
}
+ − 466
+ − 467
sub extParsedEnt {
+ − 468
my ($self, $reader) = @_;
+ − 469
+ − 470
$self->TextDecl($reader);
+ − 471
$self->content($reader);
+ − 472
}
+ − 473
+ − 474
sub _is_external {
+ − 475
my ($self, $name) = @_;
+ − 476
# TODO: Fix this to use $reader to store the entities perhaps.
+ − 477
if ($self->{ParseOptions}{external_entities}{$name}) {
+ − 478
return 1;
+ − 479
}
+ − 480
return ;
+ − 481
}
+ − 482
+ − 483
sub _is_entity {
+ − 484
my ($self, $name) = @_;
+ − 485
# TODO: ditto above
+ − 486
if (exists $self->{ParseOptions}{entities}{$name}) {
+ − 487
return 1;
+ − 488
}
+ − 489
return 0;
+ − 490
}
+ − 491
+ − 492
sub _stringify_entity {
+ − 493
my ($self, $name) = @_;
+ − 494
# TODO: ditto above
+ − 495
if (exists $self->{ParseOptions}{expanded_entity}{$name}) {
+ − 496
return $self->{ParseOptions}{expanded_entity}{$name};
+ − 497
}
+ − 498
# expand
+ − 499
my $reader = XML::SAX::PurePerl::Reader::URI->new($self->{ParseOptions}{entities}{$name});
+ − 500
my $ent = '';
+ − 501
while(1) {
+ − 502
my $data = $reader->data;
+ − 503
$ent .= $data;
+ − 504
$reader->move_along(length($data)) or last;
+ − 505
}
+ − 506
return $self->{ParseOptions}{expanded_entity}{$name} = $ent;
+ − 507
}
+ − 508
+ − 509
sub _get_entity {
+ − 510
my ($self, $name) = @_;
+ − 511
# TODO: ditto above
+ − 512
return $self->{ParseOptions}{entities}{$name};
+ − 513
}
+ − 514
+ − 515
sub skip_whitespace {
+ − 516
my ($self, $reader) = @_;
+ − 517
+ − 518
my $data = $reader->data;
+ − 519
+ − 520
my $found = 0;
+ − 521
while ($data =~ s/^([\x20\x0A\x0D\x09]*)//) {
+ − 522
last unless length($1);
+ − 523
$found++;
+ − 524
$reader->move_along(length($1));
+ − 525
$data = $reader->data;
+ − 526
}
+ − 527
+ − 528
return $found;
+ − 529
}
+ − 530
+ − 531
sub Attribute {
+ − 532
my ($self, $reader) = @_;
+ − 533
+ − 534
$self->skip_whitespace($reader) || return;
+ − 535
+ − 536
my $data = $reader->data(2);
+ − 537
return if $data =~ /^\/?>/;
+ − 538
+ − 539
if (my $name = $self->Name($reader)) {
+ − 540
$self->skip_whitespace($reader);
+ − 541
$reader->match('=') or $self->parser_error("No '=' in Attribute", $reader);
+ − 542
$self->skip_whitespace($reader);
+ − 543
my $value = $self->AttValue($reader);
+ − 544
+ − 545
if (!$self->cdata_attrib($name)) {
+ − 546
$value =~ s/^\x20*//; # discard leading spaces
+ − 547
$value =~ s/\x20*$//; # discard trailing spaces
+ − 548
$value =~ s/ {1,}/ /g; # all >1 space to single space
+ − 549
}
+ − 550
+ − 551
return $name, $value;
+ − 552
}
+ − 553
+ − 554
return;
+ − 555
}
+ − 556
+ − 557
sub cdata_attrib {
+ − 558
# TODO implement this!
+ − 559
return 1;
+ − 560
}
+ − 561
+ − 562
sub AttValue {
+ − 563
my ($self, $reader) = @_;
+ − 564
+ − 565
my $quote = $self->quote($reader);
+ − 566
+ − 567
my $value = '';
+ − 568
+ − 569
while (1) {
+ − 570
my $data = $reader->data;
+ − 571
$self->parser_error("EOF found while looking for the end of attribute value", $reader)
+ − 572
unless length($data);
+ − 573
if ($data =~ /^([^$quote]*)$quote/) {
+ − 574
$reader->move_along(length($1) + 1);
+ − 575
$value .= $1;
+ − 576
last;
+ − 577
}
+ − 578
else {
+ − 579
$value .= $data;
+ − 580
$reader->move_along(length($data));
+ − 581
}
+ − 582
}
+ − 583
+ − 584
if ($value =~ /</) {
+ − 585
$self->parser_error("< character not allowed in attribute values", $reader);
+ − 586
}
+ − 587
+ − 588
$value =~ s/[\x09\x0A\x0D]/\x20/g;
+ − 589
$value =~ s/&(#(x[0-9a-fA-F]+)|([0-9]+)|\w+);/$self->AttReference($1, $reader)/geo;
+ − 590
+ − 591
return $value;
+ − 592
}
+ − 593
+ − 594
sub Comment {
+ − 595
my ($self, $reader) = @_;
+ − 596
+ − 597
my $data = $reader->data(4);
+ − 598
if ($data =~ /^<!--/) {
+ − 599
$reader->move_along(4);
+ − 600
my $comment_str = '';
+ − 601
while (1) {
+ − 602
my $data = $reader->data;
+ − 603
$self->parser_error("End of data seen while looking for close comment marker", $reader)
+ − 604
unless length($data);
+ − 605
if ($data =~ /^(.*?)-->/s) {
+ − 606
$comment_str .= $1;
+ − 607
$self->parser_error("Invalid comment (dash)", $reader) if $comment_str =~ /-$/;
+ − 608
$reader->move_along(length($1) + 3);
+ − 609
last;
+ − 610
}
+ − 611
else {
+ − 612
$comment_str .= $data;
+ − 613
$reader->move_along(length($data));
+ − 614
}
+ − 615
}
+ − 616
+ − 617
$self->comment({ Data => $comment_str });
+ − 618
+ − 619
return 1;
+ − 620
}
+ − 621
return 0;
+ − 622
}
+ − 623
+ − 624
sub PI {
+ − 625
my ($self, $reader) = @_;
+ − 626
+ − 627
my $data = $reader->data(2);
+ − 628
+ − 629
if ($data =~ /^<\?/) {
+ − 630
$reader->move_along(2);
+ − 631
my ($target, $data);
+ − 632
$target = $self->Name($reader) ||
+ − 633
$self->parser_error("PI has no target", $reader);
+ − 634
if ($self->skip_whitespace($reader)) {
+ − 635
$target = '';
+ − 636
while (1) {
+ − 637
my $data = $reader->data;
+ − 638
$self->parser_error("End of data seen while looking for close PI marker", $reader)
+ − 639
unless length($data);
+ − 640
if ($data =~ /^(.*?)\?>/s) {
+ − 641
$target .= $1;
+ − 642
$reader->move_along(length($1) + 2);
+ − 643
last;
+ − 644
}
+ − 645
else {
+ − 646
$target .= $data;
+ − 647
$reader->move_along(length($data));
+ − 648
}
+ − 649
}
+ − 650
}
+ − 651
else {
+ − 652
my $data = $reader->data(2);
+ − 653
$data =~ /^\?>/ or $self->parser_error("PI closing sequence not found", $reader);
+ − 654
$reader->move_along(2);
+ − 655
}
+ − 656
$self->processing_instruction({ Target => $target, Data => $data });
+ − 657
+ − 658
return 1;
+ − 659
}
+ − 660
return 0;
+ − 661
}
+ − 662
+ − 663
sub Name {
+ − 664
my ($self, $reader) = @_;
+ − 665
+ − 666
my $name = '';
+ − 667
while(1) {
+ − 668
my $data = $reader->data;
+ − 669
return unless length($data);
+ − 670
$data =~ /^([^\s>\/&\?;=<\)\(\[\],\%\#\!\*]*)/ or return;
+ − 671
$name .= $1;
+ − 672
my $len = length($1);
+ − 673
$reader->move_along($len);
+ − 674
last if ($len != length($data));
+ − 675
}
+ − 676
+ − 677
return unless length($name);
+ − 678
+ − 679
$name =~ /$NameChar/o or $self->parser_error("Name <$name> does not match NameChar production", $reader);
+ − 680
+ − 681
return $name;
+ − 682
}
+ − 683
+ − 684
sub quote {
+ − 685
my ($self, $reader) = @_;
+ − 686
+ − 687
my $data = $reader->data;
+ − 688
+ − 689
$data =~ /^(['"])/ or $self->parser_error("Invalid quote token", $reader);
+ − 690
$reader->move_along(1);
+ − 691
return $1;
+ − 692
}
+ − 693
+ − 694
1;
+ − 695
__END__
+ − 696
+ − 697
=head1 NAME
+ − 698
+ − 699
XML::SAX::PurePerl - Pure Perl XML Parser with SAX2 interface
+ − 700
+ − 701
=head1 SYNOPSIS
+ − 702
+ − 703
use XML::Handler::Foo;
+ − 704
use XML::SAX::PurePerl;
+ − 705
my $handler = XML::Handler::Foo->new();
+ − 706
my $parser = XML::SAX::PurePerl->new(Handler => $handler);
+ − 707
$parser->parse_uri("myfile.xml");
+ − 708
+ − 709
=head1 DESCRIPTION
+ − 710
+ − 711
This module implements an XML parser in pure perl. It is written around the
+ − 712
upcoming perl 5.8's unicode support and support for multiple document
+ − 713
encodings (using the PerlIO layer), however it has been ported to work with
+ − 714
ASCII/UTF8 documents under lower perl versions.
+ − 715
+ − 716
The SAX2 API is described in detail at http://sourceforge.net/projects/perl-xml/, in
+ − 717
the CVS archive, under libxml-perl/docs. Hopefully those documents will be in a
+ − 718
better location soon.
+ − 719
+ − 720
Please refer to the SAX2 documentation for how to use this module - it is merely a
+ − 721
front end to SAX2, and implements nothing that is not in that spec (or at least tries
+ − 722
not to - please email me if you find errors in this implementation).
+ − 723
+ − 724
=head1 BUGS
+ − 725
+ − 726
XML::SAX::PurePerl is B<slow>. Very slow. I suggest you use something else
+ − 727
in fact. However it is great as a fallback parser for XML::SAX, where the
+ − 728
user might not be able to install an XS based parser or C library.
+ − 729
+ − 730
Currently lots, probably. At the moment the weakest area is parsing DOCTYPE declarations,
+ − 731
though the code is in place to start doing this. Also parsing parameter entity
+ − 732
references is causing me much confusion, since it's not exactly what I would call
+ − 733
trivial, or well documented in the XML grammar. XML documents with internal subsets
+ − 734
are likely to fail.
+ − 735
+ − 736
I am however trying to work towards full conformance using the Oasis test suite.
+ − 737
+ − 738
=head1 AUTHOR
+ − 739
+ − 740
Matt Sergeant, matt@sergeant.org. Copyright 2001.
+ − 741
+ − 742
Please report all bugs to the Perl-XML mailing list at perl-xml@listserv.activestate.com.
+ − 743
+ − 744
=head1 LICENSE
+ − 745
+ − 746
This is free software. You may use it or redistribute it under the same terms as
+ − 747
Perl 5.7.2 itself.
+ − 748
+ − 749
=cut
+ − 750