uh_parser/preprocess_log.pl
author Simon Howkins <simonh@symbian.org>
Thu, 13 May 2010 16:27:37 +0100
changeset 267 2251fde91223
parent 258 08436a227940
permissions -rw-r--r--
Changed script to use CSV formatted input, rather than TSV. This means that the script can directly process the CSV downloaded from Bugzilla, without any need to use Excel to convert it.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
177
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     1
#!perl -w
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     2
# Copyright (c) 2009 Symbian Foundation Ltd
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     3
# This component and the accompanying materials are made available
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     4
# under the terms of the License "Eclipse Public License v1.0"
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     5
# which accompanies this distribution, and is available
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     6
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     7
#
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     8
# Initial Contributors:
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
     9
# Symbian Foundation Ltd - initial contribution.
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    10
#
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    11
# Contributors:
258
08436a227940 Add author information. Reviewed descriptions
Dario Sestito <darios@symbian.org>
parents: 177
diff changeset
    12
# Dario Sestito <darios@symbian.org>
177
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    13
#
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    14
# Description:
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    15
# Preprocess a raptor log, trying to countermeasure a list of known anomalies
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    16
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    17
use strict;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    18
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    19
use Getopt::Long;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    20
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    21
my $help = 0;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    22
GetOptions(
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    23
	'help!' => \$help,
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    24
);
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    25
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    26
if ($help)
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    27
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    28
	warn <<"EOF";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    29
Preprocess a raptor log, trying to countermeasure a list of known anomalies
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    30
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    31
Usage: perl preprocess_log.pl < INFILE > OUTFILE
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    32
EOF
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    33
	exit(0);
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    34
}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    35
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    36
while (my $line = <>)
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    37
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    38
	if ($line =~ m{<[^<^>]+>.*&.*</[^<^>]+>})
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    39
	{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    40
		$line = escape_ampersand($line);
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    41
	}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    42
	elsif ($line =~ m{<\?xml\s.*encoding=.*\".*\?>})
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    43
	{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    44
		$line = set_encoding_utf8($line);
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    45
	}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    46
	elsif ($line =~ m{<archive.*?[^/]>})
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    47
	{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    48
		$line = unterminated_archive_tag($line, scalar <>, $.)
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    49
	}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    50
	elsif ($line =~ m{make.exe: Circular .* <- .* dependency dropped.})
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    51
	{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    52
		$line = escape_left_angle_bracket($line);
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    53
	}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    54
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    55
	print $line;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    56
}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    57
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    58
sub escape_ampersand
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    59
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    60
	my ($line) = @_;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    61
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    62
	warn "escape_ampersand\n";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    63
	warn "in: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    64
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    65
	$line =~ s,&,&amp;,g;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    66
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    67
	warn "out: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    68
	return $line;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    69
}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    70
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    71
sub set_encoding_utf8
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    72
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    73
	my ($line) = @_;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    74
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    75
	warn "set_encoding_utf8\n";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    76
	warn "in: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    77
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    78
	$line =~ s,encoding=".*",encoding="utf-8",;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    79
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    80
	warn "out: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    81
	return $line;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    82
}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    83
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    84
sub unterminated_archive_tag
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    85
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    86
	my $line = shift;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    87
	my $nextLine = shift;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    88
	my $lineNum = shift;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    89
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    90
	if ($nextLine !~ m{(<member>)|(</archive>)})
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    91
	{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    92
		warn "unterminated_archive_tag\n";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    93
		warn "in: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    94
		$line =~ s{>}{/>};
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    95
		warn "out: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    96
	}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    97
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    98
	return $line . $nextLine;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
    99
}
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   100
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   101
sub escape_left_angle_bracket
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   102
{
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   103
	my ($line) = @_;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   104
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   105
	warn "escape_left_angle_bracket\n";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   106
	warn "in: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   107
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   108
	$line =~ s,<,&lt;,g;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   109
	
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   110
	warn "out: $line";
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   111
	return $line;
6d3c3db11e72 Add Raptor uh parser
Dario Sestito <darios@symbian.org>
parents:
diff changeset
   112
}