uh_parser/XML/SAX/PurePerl/XMLDecl.pm
author Simon Howkins <simonh@symbian.org>
Thu, 13 May 2010 16:27:37 +0100
changeset 244 2251fde91223
parent 176 6d3c3db11e72
permissions -rw-r--r--
Changed script to use CSV formatted input, rather than TSV. This means that the script can directly process the CSV downloaded from Bugzilla, without any need to use Excel to convert it.

# $Id: XMLDecl.pm,v 1.3 2003/07/30 13:39:22 matt Exp $

package XML::SAX::PurePerl;

use strict;
use XML::SAX::PurePerl::Productions qw($S $VersionNum $EncNameStart $EncNameEnd);

sub XMLDecl {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(5);
    # warn("Looking for xmldecl in: $data");
    if ($data =~ /^<\?xml$S/o) {
        $reader->move_along(5);
        $self->skip_whitespace($reader);
        
        # get version attribute
        $self->VersionInfo($reader) || 
            $self->parser_error("XML Declaration lacks required version attribute, or version attribute does not match XML specification", $reader);
        
        if (!$self->skip_whitespace($reader)) {
            my $data = $reader->data(2);
            $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
            $reader->move_along(2);
            return;
        }
        
        if ($self->EncodingDecl($reader)) {
            if (!$self->skip_whitespace($reader)) {
                my $data = $reader->data(2);
                $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
                $reader->move_along(2);
                return;
            }
        }
        
        $self->SDDecl($reader);
        
        $self->skip_whitespace($reader);
        
        my $data = $reader->data(2);
        $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
        $reader->move_along(2);
    }
    else {
        # warn("first 5 bytes: ", join(',', unpack("CCCCC", $data)), "\n");
        # no xml decl
        if (!$reader->get_encoding) {
            $reader->set_encoding("UTF-8");
        }
    }
}

sub VersionInfo {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(11);
    
    # warn("Looking for version in $data");
    
    $data =~ /^(version$S*=$S*(["'])($VersionNum)\2)/o or return 0;
    $reader->move_along(length($1));
    my $vernum = $3;
    
    if ($vernum ne "1.0") {
        $self->parser_error("Only XML version 1.0 supported. Saw: '$vernum'", $reader);
    }

    return 1;
}

sub SDDecl {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(15);
    
    $data =~ /^(standalone$S*=$S*(["'])(yes|no)\2)/o or return 0;
    $reader->move_along(length($1));
    my $yesno = $3;
    
    if ($yesno eq 'yes') {
        $self->{standalone} = 1;
    }
    else {
        $self->{standalone} = 0;
    }
    
    return 1;
}

sub EncodingDecl {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(12);
    
    $data =~ /^(encoding$S*=$S*(["'])($EncNameStart$EncNameEnd*)\2)/o or return 0;
    $reader->move_along(length($1));
    my $encoding = $3;
    
    $reader->set_encoding($encoding);
    
    return 1;
}

sub TextDecl {
    my ($self, $reader) = @_;
    
    my $data = $reader->data(6);
    $data =~ /^<\?xml$S+/ or return;
    $reader->move_along(5);
    $self->skip_whitespace($reader);
    
    if ($self->VersionInfo($reader)) {
        $self->skip_whitespace($reader) ||
                $self->parser_error("Lack of whitespace after version attribute in text declaration", $reader);
    }
    
    $self->EncodingDecl($reader) ||
        $self->parser_error("Encoding declaration missing from external entity text declaration", $reader);
    
    $self->skip_whitespace($reader);
    
    $data = $reader->data(2);
    $data =~ /^\?>/ or $self->parser_error("Syntax error", $reader);
    
    return 1;
}

1;