changeset 0 044383f39525
child 311 3b0e5fcfce37
child 621 96fee2635b19
equal deleted inserted replaced
-1:000000000000 0:044383f39525
     1 #
     2 # Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
     3 # All rights reserved.
     4 # This component and the accompanying materials are made available
     5 # under the terms of the License "Eclipse Public License v1.0"
     6 # which accompanies this distribution, and is available
     7 # at the URL "".
     8 #
     9 # Initial Contributors:
    10 # Nokia Corporation - initial contribution.
    11 #
    12 # Contributors:
    13 #
    14 # Description: 
    15 #
    17 package EvalidCompare;
    19 use strict;
    20 our $VERSION = '1.00';
    21 use IO::Handle;
    22 use IO::File;
    23 use Cwd;
    25 use File::Temp qw/ tempfile tempdir /;
    26 use File::Find;
    27 use File::Path;
    28 use File::Basename;
    29 use File::Copy;
    31 #
    32 # Constants.
    33 #
    35 my %typeLookup = (
    36       'ARM PE-COFF executable' => 'ignore',
    37       'E32 EXE' => 'e32',
    38       'E32 DLL' => 'e32',
    39       'Uncompressed E32 EXE' => 'e32',
    40       'Uncompressed E32 DLL' => 'e32',
    41       'Compressed E32 EXE' => 'e32',
    42       'Compressed E32 DLL' => 'e32',
    43       'Intel DLL' => 'intel_pe',
    44       'Intel EXE' => 'intel_pe',
    45       'MSDOS EXE' => 'intel_pe',
    46       'Intel object' => 'intel',
    47       'Intel library' => 'intel',
    48       'ELF library' => 'elf',
    49       'ARM object' => 'arm',
    50       'ARM library' => 'arm',
    51       'unknown format' => 'identical',
    52       'Java class' => 'identical',
    53       'ZIP file' => 'zip',
    54       'Permanent File Store' => 'permanent_file_store',
    55       'SIS file' => 'identical',
    56       'MSVC database' => 'ignore',
    57       'MAP file' => 'map',
    58       'SGML file' => 'sgml',
    59       'Preprocessed text' => 'preprocessed_text',
    60       'ELF file' => 'elf',
    61       'Unknown COFF object' => 'identical',
    62       'Unknown library' => 'identical',
    63       'chm file' => 'chm_file',
    64 	  'Header file' => 'header',
    65 	  'Distribution Policy' => 'distpol'
    66      );
    69 # %TEMPDIR% and %FILE% are magic words for the expandor
    70 # they will be replaced with suitable values when used
    71 # they also enabled an order of expandor arguments where the filename is not last
    72 my %typeHandler = (
    73       e32 => {reader => 'elf2e32 --dump --e32input=', filter => \&Elf2E32Filter},
    74       arm => {reader => 'nm --no-sort', filter => \&NmFilter, retry => 1, relative_paths => 1},
    75       elf => {reader => 'elfdump -i', filter => \&ElfDumpFilter, rawretry => 1},
    76       intel => {reader => '%EPOCROOT%epoc32\gcc_mingw\bin\nm --no-sort', filter => \&NmFilter, rawretry => 1, relative_paths => 1, skipstderr => 1},
    77       intel_pe => {reader => 'pe_dump', filter => \&FilterNone, rawretry => 1},
    78 	  zip => {reader => '"'.$FindBin::Bin.'/unzip" -l -v', filter => \&UnzipFilter, rawretry => 1},
    79       map => {filter => \&MapFilter, skipblanks => 1},
    80       sgml => {filter => \&SgmlFilter},
    81       preprocessed_text => {filter => \&PreprocessedTextFilter},
    82       permanent_file_store => {reader => 'pfsdump -c -v', filter => \&PermanentFileStoreFilter, rawretry => 1, relative_paths => 1},
    83       ignore => {filter => \&FilterAll},
    84       chm_file => {expandor => 'hh -decompile %TEMPDIR% %FILE%', rawretry => 1},
    85 	  header => {filter => \&FilterCVSTags},
    86 	  distpol => {filter => \&DistributionPolicyFilter}
    87      );
    90 #
    91 # Globals.
    92 #
    94 my $log;
    95 my $verbose;
    96 my $toRoot;
    97 my $dumpDir;
    99 undef $dumpDir;
   102 #
   103 # Public.
   104 #
   106 sub CompareFiles {
   107   my $file1 = shift;
   108   my $file2 = shift;
   109   $verbose = defined($_[0]) ? shift : 0;
   110   $log = defined($_[0]) ? shift : *STDOUT;
   111   # Try binary compare first (to keep semantics the same as evalid)...
   112   if (DoCompareFiles($file1, $file2, 'unknown format')) {
   113     return 1,'identical';
   114   }
   115   my $type = IdentifyFileType($file1);
   116   if ($typeLookup{$type} eq 'identical') {
   117     return 0,$type; # We already know a binary compare is going to return false.
   118   }
   119   return DoCompareFiles($file1, $file2, $type),$type;
   120 }
   122 sub GenerateSignature {
   123   my $file = shift;
   124   $dumpDir = shift;
   125   $verbose = defined($_[0]) ? shift : 0;
   126   $log = defined($_[0]) ? shift : *STDOUT;
   127   my $md5;
   129   if (eval "require Digest::MD5") { # Prefer Digest::MD5, if available.
   130     $md5 = Digest::MD5->new();
   131   } elsif (eval "require MD5") { # Try old version of MD5, if available.
   132     $md5 = new MD5;
   133   } elsif (eval "require Digest::Perl::MD5") { # Try Perl (Slow) version of MD5, if available.
   134     $md5 = Digest::Perl::MD5->new();
   135   } else {
   136     die "Error: Cannot load any MD5 Modules";
   137   }
   139   my $type = IdentifyFileType($file);
   140   WriteFilteredData($file, $type, $md5);
   141   return $md5->hexdigest(), $type;
   142 }
   145 #
   146 # Private.
   147 #
   149 sub IdentifyFileType {
   150   my $file = shift;
   151   open (FILE, $file) or die "Error: Couldn't open \"$file\" for reading: $!\n";
   152   binmode (FILE);
   153   my $typeBuf;
   154   read (FILE, $typeBuf, 512);
   155   close (FILE);
   156   my ($uid1, $uid2, $uid3, $checksum) = unpack "V4", $typeBuf;
   158   # NB. Need to use the s modifier so that '.' will match \x0A
   160   if ($typeBuf =~ /^.\x00\x00\x10.{12}EPOC.{8}(....).{12}(.)..(.)/s) {
   161     # E32 Image file with a 0x100000?? UID1
   162     # $2 is the flag field indicating an EXE or a DLL
   163     # $3 is the flag byte indicating compressable executables
   164     # $1 is the format field indicating compression type
   165     # See e32tools\inc\e32image.h
   166     #
   167     my $typename = "E32 EXE";
   168     if ((ord $2) & 0x1) {
   169       $typename = "E32 DLL";
   170     }
   171     if ((ord $3) >= 0x1) {
   172     if ((ord $1) != 0) {
   173         $typename = "Compressed $typename";
   174     }
   175     else {
   176         $typename = "Uncompressed $typename";
   177     }
   178     }
   179     return $typename;
   180   }
   182   if ($typeBuf =~ /^\x4D\x5A.{38}\x00{20}(....)/s) {
   183     # A standard 64-byte MS-DOS header with e_magic == IMAGE_DOS_SIGNATURE
   184     # $1 is e_lfanew, which we expect to point to a COFF header
   186     my $offset = unpack "V",$1;
   187     if ($offset + 24 <= length $typeBuf) {
   188       $typeBuf = substr $typeBuf, $offset;
   189     }
   190     else {
   191       open FILE, $file or die "Error: Couldn't open \"$file\" for reading: $!\n";
   192       binmode FILE;
   193       seek FILE, $offset, 0;
   194       read FILE, $typeBuf, 512;
   195       close FILE;
   196     }
   198     if ($typeBuf =~ /^PE\0\0\x4c\x01.{16}(..)/s) {
   199       # A PE signature "PE\0\0" followed by a COFF header with
   200       # machine type IMAGE_FILE_MACHINE_I386
   201       # $1 is the characteristics field
   202       #
   203       if ((unpack "v",$1) & 0x2000) {
   204     return "Intel DLL";
   205       }
   206       else {
   207     return "Intel EXE";
   208       }
   209     }
   210   elsif($typeBuf =~ /^PE\0\0\0\x0a/) {
   211   # A PE signature "PE\0\0" followed by ARM COFF file magic value 0xA00
   212     return "ARM PE-COFF executable";
   213   }
   214     else {
   215       return "MSDOS EXE";
   216     }
   217   }
   219   if ($typeBuf =~ /^(\x4c\x01|\x00\x0A).(\x00|\x01).{4}...\x00/s) {
   220     # COFF header with less than 512 sections and a symbol table
   221     # at an offset no greater than 0x00ffffff
   223     if ($1 eq "\x4c\x01") {
   224       return "Intel object";
   225     }
   226     elsif ($1 eq "\x00\x0A") {
   227       return "ARM object";
   228     }
   229     else {
   230       return "Unknown COFF object";
   231     }
   232   }
   234   if ($typeBuf =~ /^!<arch>\x0A(.{48}([0-9 ]{10})\x60\x0A(......))/s) {
   235     # library - could be MARM or WINS
   237     $typeBuf = $1;
   238     my $member_start = 8;
   240     open (FILE, $file) or die "Error: Couldn't open \"$file\" for reading: $!\n";
   241     binmode (FILE);
   243     while ($typeBuf =~ /^.{48}([0-9 ]{10})\x60\x0A(......)/s) {
   244       # $1 is the size of the archive member, $2 is first 6 bytes of the file
   245       # There may be several different sorts of file in the archive, and we
   246       # need to scan through until we find a type we recognize:
   247       # $2 == 0x0A00 would be ARM COFF, 0x014C would be Intel COFF
   248       if ($2 =~ /^\x00\x0A/) {
   249   close FILE;
   250   return "ARM library";
   251       }
   252       if ($2 =~ /^\x4C\x01/) {
   253   close FILE;
   254   return "Intel library";
   255       }
   256 	  my $elfBuf =  $2;
   257       if ($2 =~ /^\x7F\x45\x4C\x46/) {
   258   close FILE;
   259 		my $dataEncodingLib = substr($elfBuf, 5, 6);
   260 		if ( $dataEncodingLib =~ /^\x02/) {	
   261 			# e_ident[EI_DATA] == 2 (Data Encoding ELFDATA2MSB - big endian)
   262 			# this is not supported by Elfdump hence it is treated as 'unknown format'
   263 		return 'unknown library';
   264 		}
   265 		else {
   266 		return "ELF library";
   267 		}
   268 	 }
   270       $member_start += 60 + $1;
   271       if ($member_start & 0x1) {
   272         $member_start += 1;  # align to multiple of 2 bytes
   273       }
   274       seek FILE, $member_start, 0;
   275       read FILE, $typeBuf, 512;
   276     }
   277     close FILE;
   278     return "Unknown library";
   279   }
   281   if ($typeBuf =~ /^\xCA\xFE\xBA\xBE/) {
   282     # Java class file - should have match as a straight binary comparison
   283     return "Java class";
   284   }
   286   if ($typeBuf =~ /^PK\x03\x04/) {
   287     # ZIP file
   288     return "ZIP file";
   289   }
   291   if ($uid1 && $uid1==0x10000050) {
   292     # Permanent File Store
   293     return "Permanent File Store";
   294   }
   296   if ($uid1 && $uid2 && $uid3 && $checksum && $uid3==0x10000419) {
   297     if (($uid1==0x100002c3 && $uid2==0x1000006d && $checksum==0x128ca96f)  # narrow
   298   ||  ($uid1==0x10003b0b && $uid2==0x1000006d && $checksum==0x75e21a1d)  # unicode
   299   ||  ($uid1==0x10009205 && $uid2==0x10003a12 && $checksum==0x986a0c25)) # new format
   300       {
   301       # SIS file
   302       return "SIS file";
   303       }
   304   }
   306   if ($typeBuf =~ /^Microsoft [^\x0A]+ [Dd]atabase/s) {
   307     return "MSVC database";
   308   }
   310   if ($typeBuf =~ /^\S.+ needed due to / || $typeBuf =~ /^Archive member included.*because of file/) {
   311     # GCC MAP file
   312     return "MAP file";
   313   }
   315   if ($typeBuf =~ /Preferred load address is/) {
   316     # Developer Studio MAP file
   317     return "MAP file";
   318   }
   320   if ($typeBuf =~ /^Address\s+Size\s+Name\s+Subname\s+Module/) {
   321     # CodeWarrior MAP file
   322     return "MAP file";
   323   }
   325   if ($typeBuf =~ /^ARM Linker,/) {
   326     # RVCT MAP file
   327     return "MAP file";
   328   }
   330   if ($typeBuf =~ /<!DOCTYPE/i) {
   331     # XML or HTML file - need to ignore javadoc generation dates
   332     return "SGML file";
   333   }
   335   if ($typeBuf =~ /^# 1 ".*"(\x0D|\x0A)/s) {
   336     # Output of CPP
   337     return "Preprocessed text";
   338   }
   340   if ($typeBuf =~ /^\x7F\x45\x4C\x46/) {
   341 	my $dataEncoding = substr($typeBuf, 5, 6);
   342 	if ( $dataEncoding =~ /^\x02/) {	
   343 	  # e_ident[EI_DATA] == 2 (Data Encoding ELFDATA2MSB - big endian)
   344 	  # this is not supported by Elfdump hence it is treated as 'unknown format'
   345 	   return 'unknown format';
   346 	}
   347 	else {
   348 		return "ELF file";;
   349 	}
   350    }
   352   if ($typeBuf =~/^ITSF/) {
   353     # chm file
   354     return "chm file";
   355   }
   357   if ($file =~ m/\.(iby|h|hby|hrh|oby|rsg|cpp)$/i) {
   358     return "Header file";
   359   }
   361   if ($file =~ /distribution\.policy$/i) {
   362 	return "Distribution Policy"
   363   }
   365   return 'unknown format';
   366 }
   368 sub WriteFilteredData {
   369   my $file = shift;
   370   my $type = shift;
   371   my $md5 = shift;
   372   my $dumpDirExpandedFile = shift;
   374   my (@dumpDirBuffer);
   376   unless (exists $typeLookup{$type}) {
   377     die "Invalid file type \"$type\"";
   378   }
   379   $type = $typeLookup{$type};
   381   # Check to see if this file type requires expanding first
   382   if (exists $typeHandler{$type}->{expandor})
   383   {
   384     my $expandor = $typeHandler{$type}->{expandor};
   385     # Create two temporary directories
   386     my $tempdir = tempdir ( "EvalidExpand_XXXXXX", DIR => File::Spec->tmpdir, CLEANUP => 1);
   388     # Build the Expandor commandline
   389     $expandor =~ s/%TEMPDIR%/$tempdir/g;
   390     $expandor =~ s/%FILE%/$file/g;
   392     # Expand files
   393     my $output = `$expandor 2>&1`;
   394     print($log "Expanding using $expandor output was:-\n$output") if ($verbose);
   395     if ($? > 0)
   396     {
   397       print ($log "$expandor exited with $?") if ($verbose);
   398       # set type to be identical for retry if raw
   399       if ($typeHandler{$type}->{rawretry} == 1)
   400       {
   401         $type = 'identical';
   402       } else {
   403         print "ERROR: failed to start $expandor (" .($?). ") - reporting failure\n";
   404       }
   405     } else {    
   406       # Process all files in $tempdir
   407       my @FileList;
   408       find(sub { push @FileList, $File::Find::name if (! -d);}, $tempdir);
   409       foreach my $expandfile (@FileList)
   410       {
   411 	  my $dumpDirExpandedFilename = "";
   413       if ($dumpDir)
   414       	{
   415 	  	$dumpDirExpandedFilename = $expandfile;
   416 		$dumpDirExpandedFilename =~ s/^.*EvalidExpand_\w+//;
   417 	  	$dumpDirExpandedFilename = $file.$dumpDirExpandedFilename;
   418       	}
   420       my $type = IdentifyFileType($expandfile);
   422       &WriteFilteredData($expandfile, $type, $md5, $dumpDirExpandedFilename);
   423       }
   424     }
   425   }  elsif ($type ne 'identical') {
   426     unless (exists $typeHandler{$type}) {
   427       die "Invalid comparison type \"$type\"";
   428     }
   429     my $reader = $typeHandler{$type}->{reader};
   430     my $filter = $typeHandler{$type}->{filter};
   431     my $retry = $typeHandler{$type}->{retry} || 0;
   432     my $rawretry = $typeHandler{$type}->{rawretry} || 0;
   433 	my $skipblanks = $typeHandler{$type}->{skipblanks} || 0;
   434     my $relativePaths = $typeHandler{$type}->{relative_paths} || 0;
   435     my $dosPaths = $typeHandler{$type}->{dos_paths} || 0;
   437 	my $skipstderr = $typeHandler{$type}->{skipstderr} || 0;
   438 	my $redirectstd = "2>&1";
   440 	if ($skipstderr) {
   441 		$redirectstd = "2>NUL";
   442 	}
   444     if ($relativePaths) {
   445       $file = RelativePath($file);
   446     }
   447     if ($dosPaths) {
   448       $file =~ s/\//\\/g;       # convert to DOS-style backslash separators
   449     }
   451     my $raw;
   452     if ($reader) {
   453       $raw = IO::File->new("$reader \"$file\" $redirectstd |") or die "Error: Couldn't run \"$reader $file\": $!\n";
   454     }
   455     else {
   456       $raw = IO::File->new("$file") or die "Error: Couldn't open \"$file\": $!\n";
   457     }
   458     while (my $line = <$raw>) {
   459       &$filter(\$line);
   460 	  next if $skipblanks and $line =~ /^\s*$/;
   461       $md5->add($line);
   462       push @dumpDirBuffer, $line if ($dumpDir);
   463     }
   464     Drain($raw);
   465     $raw->close();
   467     # Retry once if reader failed and reader has retry specified
   468     if ((($?>>8) != 0) && ($retry == 1))
   469     {
   470       print "Warning: $reader failed (" .($?>>8). ") on $file - retrying\n";
   471       # Reset MD5
   472       $md5->reset;
   473       undef @dumpDirBuffer if ($dumpDir);
   474       $raw = IO::File->new("$reader \"$file\" $redirectstd |") or die "Error: Couldn't run \"$reader $file\": $!\n";
   475       while (my $line = <$raw>)
   476       {
   477         &$filter(\$line);
   478 		next if $skipblanks and $line =~ /^\s*$/;
   479         $md5->add($line);
   480         push @dumpDirBuffer, $line if ($dumpDir);
   481       }
   482       Drain($raw);
   483       $raw->close();
   484       if (($?>>8) != 0)
   485       {
   486         print "Error: $reader failed again (" .($?>>8) .") on $file - reporting failure\n";
   487       }
   488     }
   490     # Retry as raw if specified
   491     if (($?>>8) != 0) {
   492       if ($rawretry)
   493       {
   494           if ($reader =~ /^pfsdump/) { 
   495               print "Warning: $reader failed (". ($?>>8) .") on file $file - retrying as raw binary\n";
   496           }
   497           else {
   498               print "Info: something wrong to execute $reader (". ($?>>8) .") on file $file - retrying as raw binary\n";
   499           }
   500           # Set type to be identical so it will try it as a raw binary stream
   501           $type = 'identical';
   502       } else {
   503         print "Error: $reader failed (". ($?>>8) .") on file $file - not retrying as raw binary\n";
   504       }
   505     }
   506   }
   507   if ($type eq 'identical') {
   508     # Reset md5 as it might have been used in reader section
   509     $md5->reset;
   510 	undef @dumpDirBuffer if ($dumpDir);
   511     # Treat 'identical' as a special case - no filtering, just write raw binary stream.
   512     my $raw = IO::File->new($file) or die "Error: Couldn't open \"$file\" for reading: $!\n";
   513     binmode($raw);
   514     my $buf;
   515     while ($raw->read($buf, 4096)) {
   516       $md5->add($buf);
   517     }
   518     $raw->close();
   519   }
   521   my $dumpDirFilename = $file;
   522   $dumpDirFilename = $dumpDirExpandedFile if ($dumpDirExpandedFile);
   523   dumpDescriptiveOutput ($file, $dumpDirFilename, @dumpDirBuffer) if ($dumpDir);
   525   # Make sure the $? is reset for the next file otherwise it will report errors
   526   $? = 0;
   527 }
   529 sub DoCompareFiles {
   530   my $file1 = shift;
   531   my $file2 = shift;
   532   my $type = shift;
   533   my $same = 0;
   534   unless (exists $typeLookup{$type}) {
   535     die "Invalid file type \"$type\"";
   536   }
   538   $type = $typeLookup{$type};
   540   # Check to see if this file type requires expanding first
   541   if (exists $typeHandler{$type}->{expandor})
   542   {
   543     $same = &ExpandAndCompareFiles($file1, $file2, $typeHandler{$type}->{expandor});
   544     # Check for Expanding error
   545     if ($same == -1)
   546     {
   547       if ($typeHandler{$type}->{rawretry} == 1)
   548       {
   549         # Set type to be identical if rawrety is set
   550         $type = 'identical';
   551         print($log "Warning: Expandor $typeHandler{$type}->{expandor} failed for $file1 or $file2 : retrying as raw\n") if ($verbose);
   552       } else {
   553         die "Error: Expandor $typeHandler{$type}->{expandor} failed for $file1 or $file2\n";
   554       }
   555     } else {
   556       return $same;
   557     }
   558   }
   560   if ($type ne 'identical')
   561   {
   562     unless (exists $typeHandler{$type}) {
   563       die "Invalid comparison type \"$type\"";
   564     }
   566     my $reader = $typeHandler{$type}->{reader};
   567     my $filter = $typeHandler{$type}->{filter};
   568     my $retry = $typeHandler{$type}->{retry} || 0;
   569 	my $skipblanks= $typeHandler{$type}->{skipblanks} || 0;
   570     my $rawretry = $typeHandler{$type}->{rawretry} || 0;
   571     my $relativePaths = $typeHandler{$type}->{relative_paths} || 0;
   572 	my $skipstderr = $typeHandler{$type}->{skipstderr} || 0;
   573 	my $redirectstd = "2>&1";
   575 	if ($skipstderr) {
   576 		$redirectstd = "2>NUL";
   577 	}
   579     if ($relativePaths) {
   580       $file1 = RelativePath($file1);
   581       $file2 = RelativePath($file2);
   582     }
   583     my $fileHandle1;
   584     my $fileHandle2;
   585     if ($reader) {
   586       $fileHandle1 = IO::File->new("$reader \"$file1\" $redirectstd |") or die "Error: Couldn't run \"$reader $file1\": $!\n";
   587       $fileHandle2 = IO::File->new("$reader \"$file2\" $redirectstd |") or die "Error: Couldn't run \"$reader $file2\": $!\n";
   588     }
   589     else {
   590       $fileHandle1 = IO::File->new("$file1") or die "Error: Couldn't open \"$file1\": $!\n";
   591       $fileHandle2 = IO::File->new("$file2") or die "Error: Couldn't open \"$file2\": $!\n";
   592     }
   593 	$same = CompareTexts($fileHandle1, $fileHandle2, $filter, $file1, $skipblanks);
   594     Drain($fileHandle1, $fileHandle2);
   596     $fileHandle1->close();
   597     my $status1 = $?>>8;
   598     $fileHandle2->close();
   599     my $status2 = $?>>8;
   600     if (($retry) && ($status1 != 0 or $status2 != 0))
   601     {
   602       print ($log "Warning: $reader failed ($status1, $status2) - retrying\n");
   604       # Repeat previous code by hand, rather than calling DoCompareFiles
   605       # again: if it's a systematic failure that would be a never ending loop...
   607       $fileHandle1 = IO::File->new("$reader \"$file1\" $redirectstd |") or die "Error: Couldn't run \"$reader $file1\": $!\n";
   608       $fileHandle2 = IO::File->new("$reader \"$file2\" $redirectstd |") or die "Error: Couldn't run \"$reader $file2\": $!\n";
   609 	  $same = CompareTexts($fileHandle1, $fileHandle2, $filter, $file1, $skipblanks);
   610       Drain($fileHandle1, $fileHandle2);
   611       $fileHandle1->close();
   612       $status1 = $?>>8;
   613       $fileHandle2->close();
   614       $status2 = $?>>8;
   615       if ($status1 != 0 or $status2 != 0)
   616       {
   617         print ($log "Warning: $reader failed again ($status1, $status2) - reporting failure\n");
   618         $same = 0;
   619       }
   620     }
   622     # Retry as raw if specified
   623     if (($rawretry)&& ($status1 != 0 or $status2 != 0))
   624     {
   625       if ($rawretry)
   626       {
   627         print ($log "Warning: $reader failed (" .($?>>8). ") on a file retrying as raw binary\n");
   628         # Set type to be identical so it will try it as a raw binary stream
   629         $type = 'identical';
   630       } else {
   631         print ($log "Error: $reader failed (" .($?>>8). ") on a file not retrying as raw binary\n");
   632       }
   633     }
   635   }
   637   if ($type eq 'identical') {
   638     # Treat 'identical' as a special case - no filtering, just do raw binary stream comparison.
   639     my $fileHandle1 = IO::File->new($file1) or die "Error: Couldn't open \"$file1\" for reading: $!\n";
   640     my $fileHandle2 = IO::File->new($file2) or die "Error: Couldn't open \"$file2\" for reading: $!\n";
   641     binmode($fileHandle1);
   642     binmode($fileHandle2);
   643     $same = CompareStreams($fileHandle1, $fileHandle2, $file1);
   644   }
   646   # Make sure the $? is reset for the next file otherwise it will report errors
   647   $? = 0;
   649   return $same;
   650 }
   652 sub CompareStreams {
   653   my $fileHandle1 = shift;
   654   my $fileHandle2 = shift;
   655   my $filename = shift;
   656   my $same = 1;
   657   my $offset = -4096;
   658   my $buf1;
   659   my $buf2;
   660   while ($same) {
   661     my $len1 = $fileHandle1->read($buf1, 4096);
   662     my $len2 = $fileHandle2->read($buf2, 4096);
   663     if ($len1 == 0 and $len2 == 0) {
   664       return 1;
   665     }
   666     $same = $buf1 eq $buf2;
   667     $offset += 4096;
   668   }
   669   if ($verbose) {
   670     my @bytes1 = unpack "C*", $buf1;
   671     my @bytes2 = unpack "C*", $buf2;
   672     foreach my $thisByte (@bytes1) {
   673       if ($thisByte != $bytes2[0]) {
   674 	printf $log "Binary comparison: %s failed at byte %d: %02x != %02x\n", $filename, $offset, $thisByte, $bytes2[0];
   675 	last;
   676       }
   677       shift @bytes2;
   678       $offset+=1;
   679     }
   680   }
   681   return 0;
   682 }
   684 sub NextSignificantLine {
   685 	my $filehandle = shift;
   686 	my $linenumber = shift;
   687 	my $cleanersub = shift;
   688 	my $skipblanks = shift;
   690 	while (!eof($filehandle)) {
   691 		my $line = <$filehandle>;
   692 		$$linenumber++;
   693 		$cleanersub->(\$line);
   694 		return $line if !$skipblanks or $line !~ /^\s*$/;
   695 	}
   696 	return undef; # on eof
   697 }
   699 sub CompareTexts {
   700 	my $filehandle1 = shift;
   701 	my $filehandle2 = shift;
   702 	my $cleaner = shift;
   703 	my $filename = shift;
   704 	my $skipblanks = shift;
   705 	my $lineNum1 = 0;
   706 	my $lineNum2 = 0;
   708 	while (1) {
   709 		my $line1 = NextSignificantLine($filehandle1, \$lineNum1, $cleaner, $skipblanks);
   710 		my $line2 = NextSignificantLine($filehandle2, \$lineNum2, $cleaner, $skipblanks);
   712 		return 0 if defined($line1) != defined($line2); # eof vs. significant content
   713 		return 1 if !defined($line1) and !defined($line2); # eof on both files
   715 		if ($line1 ne $line2) {
   716 			printf($log "Text comparison: %s failed at lines %d/%d\n< %s> %s\n",
   717 			$filename, $lineNum1, $lineNum2, $line1, $line2) if $verbose;
   718 			return 0;
   719 		}
   720 	}
   721 }
   723 sub Drain {
   724   foreach my $handle (@_) {
   725     while (my $line = <$handle>) {
   726     }
   727   }
   728 }
   730 sub RelativePath {
   731   my $name = shift;
   732   if (($name =~ /^\\[^\\]/) || ($name =~ /^\//)) {  # abs path (unix or windows), not UNC
   733     unless ($toRoot) {
   734       $toRoot = getcwd();
   735       $toRoot =~ s/\//\\/g;
   736       $toRoot =~ s/^[a-zA-Z]:\\(.*)$/$1/;
   737       $toRoot =~ s/[^\\]+/../g;
   738       if ($toRoot =~ /^$/) {
   739   $toRoot = '.';    # because we are starting in the root
   740       }
   741     }
   742     return $toRoot.$name;
   743   }
   744   return $name;
   745 }
   747 # Function to expand compressed formats and recompare expanded files
   748 # This is the file against file implementation
   749 # It returns one identical / non indentical result based on all files in the
   750 # expanded content. i.e one non identical expanded file will cause the non
   751 # expanded file to be reported as non identical.
   752 sub ExpandAndCompareFiles
   753 {
   754   my $file1 = shift;
   755   my $file2 = shift;
   756   my $expandor = shift;
   758   # Create two temporary directories
   759   my $tempdir1 = tempdir ( "EvalidExpand_XXXXXX", DIR => File::Spec->tmpdir, CLEANUP => 1);
   760   my $tempdir2 = tempdir ( "EvalidExpand_XXXXXX", DIR => File::Spec->tmpdir, CLEANUP => 1);
   762   # Build the Expandor commandline
   763   my $cmd1 = $expandor;
   764   $cmd1 =~ s/%TEMPDIR%/$tempdir1/g;
   765   $cmd1 =~ s/%FILE%/$file1/g;
   767   my $cmd2 = $expandor;
   768   $cmd2 =~ s/%TEMPDIR%/$tempdir2/g;
   769   $cmd2 =~ s/%FILE%/$file2/g;
   771   # Expand files
   772   my $output = `$cmd1 2>&1`;
   773   print($log "Expanding using $cmd1 output was:-\n$output") if ($verbose);
   774   if ($? > 0)
   775   {
   776     print ($log "$cmd1 exited with $?") if ($verbose);
   777     return -1;
   778   }
   780   $output = `$cmd2 2>&1`;
   781   print($log "Expanding using $cmd2 output was:-\n$output") if ($verbose);
   782   if ($? > 0)
   783   {
   784     print ($log "$cmd2 exited with $?") if ($verbose);
   785     return -1;
   786   }
   788   # Produce full filelist of expanded files without directory names
   789   my %iFileList1;
   790   $tempdir1 =~ s#\\#/#g; # Make sure the dir seperators are / for consistent and easier matching.
   791   find sub {
   792             if (!-d)
   793             {
   794               my ($fixedpath) = $File::Find::name;
   795               $fixedpath =~ s#\\#/#g;
   796               my ($relpath) = $File::Find::name =~ /$tempdir1(.*)/i;
   797               $iFileList1{$relpath} = "left";
   798             }
   799           }, $tempdir1;
   801   my %iFileList2;
   802   $tempdir2 =~ s#\\#/#g; # Make sure the dir seperators are / for consistent and easier matching.
   803   find sub {
   804             if (!-d)
   805             {
   806               my ($fixedpath) = $File::Find::name;
   807               $fixedpath =~ s#\\#/#g;
   808               my ($relpath) = $File::Find::name =~ /$tempdir2(.*)/i;
   809               $iFileList2{$relpath} = "right";
   810             }
   811           }, $tempdir2;
   813   #Work out the if the two file lists are different
   814   foreach my $file (sort keys %iFileList1)
   815   {
   816     if (! defined $iFileList2{$file})
   817     {
   818       # If the filename does not exist in the second filelist the compressed files cannot be the same.
   819       print ($log "Did not find $file in $file2\n") if ($verbose);
   820       return 0;
   821     } else {
   822       delete $iFileList2{$file}
   823     }
   824   }
   826   # There are extra files in the second compressed file therefore the compressed files cannot be the same.
   827   if (scalar(keys %iFileList2) > 0)
   828   {
   829     print ($log "$file2 contained more files than $file1\n") if ($verbose);
   830     return 0;
   831   }
   833   print($log "Comparing content\n") if ($verbose);
   834   #filelist1 and filelist2 contain all the same filenames, now compare the contents of each file
   835   my $same = -1; # Variable to store collated result of comparison, assume an error
   836   foreach my $file (keys %iFileList1)
   837   {
   838     my $type; 
   839     ($same, $type) = CompareFiles($tempdir1.$file,$tempdir2.$file, $verbose, $log);
   840     print ($log "Comparing $tempdir1.$file against $tempdir2.$file\n") if ($verbose);
   841     last if ($same == 0); # do not bother comparing more files if one of the expanded files is different.
   842   }
   844   #Cleanup the temporary directories
   845   rmtree([$tempdir1,$tempdir2]);
   847   return $same;
   848 }
   850 # Create descriptive versions of input files in response to the -d option to MD5 generation
   851 sub dumpDescriptiveOutput ($$@)
   852 	{
   853 	my ($originalFile, $dumpDirFile, @content) = @_;
   855 	my $currentDir = cwd;
   856 	my $drive = "";
   857 	$dumpDirFile =~ s/^.://;  # Remove drive letter 
   859 	$drive = $1 if ($currentDir =~ /^(\w{1}:)\//);
   861 	my $DUMPFILE = $dumpDir;
   862 	$DUMPFILE = cwd."\\$dumpDir" if ($dumpDir !~ /^(\\|\/|\w{1}:\\)/);
   863 	$DUMPFILE = $drive.$dumpDir if ($dumpDir =~ /^\\/);
   864 	$DUMPFILE .= "\\" if ($DUMPFILE !~ /(\\|\/)$/);
   865 	$DUMPFILE .= $dumpDirFile;
   866 	$DUMPFILE =~ s/\//\\/g;
   868 	# This is most likely to come about due to maintaining path structures in expanded archives e.g. .chm files
   869 	if (length ($DUMPFILE) > 255)
   870 		{
   871 		print ("Warning: Not attempting to create \"$DUMPFILE\" as it exceeds Windows MAX_PATH limit.\n");
   872 		return;
   873 		}
   875 	mkpath (dirname ($DUMPFILE));
   877 	my $success = 0;
   879 	if (@content)
   880 		{
   881 		if (open DUMPFILE, "> $DUMPFILE")
   882 			{
   883 			print DUMPFILE $_ foreach (@content);
   884 			close DUMPFILE;
   885 			$success = 1;
   886 			}
   887 		}
   888 	else
   889 		{
   890 		$success = 1 if (copy ($originalFile, $DUMPFILE));
   891 		}
   893 	print ("Warning: Cannot create \"$DUMPFILE\".\n") if (!$success);
   894 	}
   897 #
   898 # Filters.
   899 #
   901 sub Elf2E32Filter {
   902   my $line = shift;
   903   if ($$line =~ /Time Stamp:|E32ImageFile|Header CRC:/) { # Ignore time stamps, file name and Header CRC which uses the timestamp.
   904     $$line = '';
   905   }
   906   if ($$line =~ /imports from /) {
   907   	$$line = lc $$line;	# DLL names are not case-sensitive in the Symbian platform loader
   908   }
   909 }
   911 sub ElfDumpFilter {
   912   my $line = shift;
   913   $$line  =~ s/^\tProgram header offset.*$/Program header offset/;
   914   $$line  =~ s/^\tSection header offset.*$/Section header offset/;
   915   $$line  =~ s/#<DLL>(\S+\.\S+)#<\\DLL>/#<DLL>\L$1\E#<\\DLL>/; # DLL names are not case-sensitive in the Symbian platform loader
   916   if ($$line =~ /^\.(rel\.)?debug_/) {
   917 	$$line = ''; # additional debug-related information - not considered significant
   918 	}
   919 }
   921 sub NmFilter {
   922   my $line = shift;
   923   $$line =~ s/^.*:$//;                # ignore the filenames
   924   $$line =~ s/\.\.\\[^(]*\\//g;
   925   $$line =~ s/\.\.\/[^(]*\///g;  # ignore pathnames of object files
   926   $$line =~ s/^BFD: (.*)$//;		# ignore the Binary File Descriptor(BFD) warning messages
   927   if ($$line =~ /^(.+ (_head|_))\w+_(EPOC32_\w+(_LIB|_iname))$/i) {
   928     # dlltool uses the "-o" argument string as the basis for a "unique symbol", but
   929     # doesn't turn the name into a canonical form first.
   930     # dh.o:
   931     #          U ________EPOC32_RELEASE_ARM4_UREL_EIKCOCTL_LIB_iname
   932     # 00000000 ? _head_______EPOC32_RELEASE_ARM4_UREL_EIKCOCTL_LIB
   933     $$line = uc "$1_..._$3\n";
   934   }
   935 }
   938 sub MapFilter {
   939   my $line = shift;
   940   $$line =~ s/([d-z])\d*s_?\d+\.o/$1s999.o/;                     # ignore the names of intermediate files in .LIB
   941   $$line =~ s/([d-z])\d*([ht])\.o/$1$2.o/;                       # ignore the names of intermediate files in .LIB
   942   $$line =~ s-/-\\-go;                                           # convert / into \
   943   $$line =~ s/(\.\.\\|.:\\)[^(]*\\//g;                           # ignore pathnames of object files
   944   $$line =~ s/\.stab.*$//;                                       # ignore .stab and .stabstr lines
   945   $$line =~ s/0x.*size before relaxing//;                        # ignore additional comments about .stab and .stabstr
   946   $$line =~ s/(_head|_)\w+_(EPOC32_\w+(_LIB|_iname))/$1_,,,_$3/; # dlltool-generated unique symbols
   947   $$line =~ s/Timestamp is .*$//;                                # ignore timestamps in DevStudio map files
   948   if ($$line =~ /^ARM Linker,/) {      
   949 	$$line = '';
   950   }																 # ignore the message that armlink's license will expire. (in RVCT MAP file)
   951   if ($$line =~ /^Your license/) {								 
   952 	$$line = '';
   953   }
   954   $$line =~ s/\s__T\d{8}\s/ __Tnnnnnnnn /;                       # ignore RVCT generated internal symbols
   955   if ($$line =~ /0x00000000   Number         0 /) {              # ignore filenames in RVCT link maps
   956     $$line = '';
   957   }
   959   # Ignore various case differences:
   961   ## RVCT
   963   # source filenames turning up in mangled symbols e.g.:
   964   #     __sti___13_BALServer_cpp                 0x000087c9   Thumb Code    52  BALServer.o(.text)
   965   $$line =~ s/^(\s+__sti___\d+_)(\w+)(.*\(\.text\))$/$1\L$2\E$3/;
   967   # object filenames e.g.:
   968   #     .text                                    0x0000a01c   Section      164  AssertE.o(.text)
   969   $$line =~ s/^(\s+\.text\s+0x[0-9A-Fa-f]{8}\s+Section\s+\d+\s+)(.+)(\(\.text\))$/$1\L$2\E$3/;
   971   ## WINSCW
   973   # import/static libraries processed listed in the last section e.g.:
   974   #1      EDLL.LIB
   975   #99     EDLL.LIB (not used)
   976   $$line =~ s/^(\d{1,2} {5,6})(\w+\.lib)( \(not used\)|)$/$1\L$2\E$3/i;
   977 }
   979 sub UnzipFilter {
   980   my $line = shift;
   981   $$line =~ s/^Archive:.*$/Archive/;                 # ignore the archive names
   982   # Line format of unzip -l -v
   983   # Length   Method    Size  Ratio   Date   Time   CRC-32    Name, Date can be dd-mm-yy or mm/dd/yy
   984   $$line =~ s/ (\d+).*? ..-..-..\s+..:.. / ($1) 99-99-99 99:99 /;  # ignore (Method Size Ratio Date Time) on contained files
   985   $$line =~ s^ (\d+).*? ..\/..\/..\s+..:.. ^ ($1) 99-99-99 99:99 ^;  # ignore (Method Size Ratio Date Time) on contained files
   986 }
   988 sub SgmlFilter {
   989   my $line = shift;
   990   $$line =~ s/<!--.*-->//;  # ignore comments such as "generated by javadoc"
   991 }
   993 sub PreprocessedTextFilter {
   994   my $line = shift;
   995   $$line =~ s/^# \d+ ".*"( \d)?$//;  # ignore #include history
   996 }
   998 sub FilterCVSTags {
   999   my $line = shift;
  1000   $$line =~ s#//\s+\$(?:Id|Name|Header|Date|DateTime|Change|File|Revision|Author):.*\$$##m;
  1001   # Remove tags like:
  1002   # // $Id: //my/perforce/here $
  1003   # which may be inserted into source code by some licensees
  1004 }
  1006 sub PermanentFileStoreFilter {
  1007   my $line = shift;
  1008   $$line =~ s/^Dumping .*$/Dumping (file)/;  # ignore the source file names
  1009 }
  1011 sub DistributionPolicyFilter {
  1012   my $line = shift;
  1013   $$line =~ s/# DistPolGen.*//;
  1014 }
  1016 sub FilterAll {
  1017   my $line = shift;
  1018   $$line = '';
  1019 }
  1021 sub FilterNone {
  1022 }
  1024 1;
  1026 __END__
  1028 =head1 NAME
  1030 - Utilities for comparing the contents of files.
  1032 =head1 DESCRIPTION
  1034 This package has been largely factored out of the C<e32toolp> tool C<evalid>. The main pieces of borrowed functionality are the ability to identify file types by examining their content, and the ability to filter irrelevant data out of files to allow comparisons to be performed. This refactoring was done in order to allow both direct and indirect comparisons of files to be supported. Direct comparisions are done by reading a pair of files (in the same way the C<evalid> does). Indirect comparisons are done by generating MD5 signatures of the files to be compared. The later method allows comparisons to be performed much more efficiently, because only one file need be present provided the signature of the other is known.
  1036 =head1 INTERFACE
  1038 =head2 CompareFiles
  1040 Expects to be passed a pair of file names. May optionally also be passed a verbosity level (defaults to 0) and a file handle for logging purposes (defaults to *STDIN). Returns 1 if the files match, 0 if not. Firstly does a raw binary compare of the two files. If they match, no further processing is done and 1 is returned. If not, the type of the first file is found and the files are re-compared, this time ignoring data known to be irrelevant for the file type. The result of this compare is then returned.
  1042 =head2 GenerateSignature
  1044 Expects to be passed a file name. May optionally also be passed a verbosity level (defaults to 0) and a file handle for logging purposes (defaults to *STDIN). Returns an MD5 signature of the specified file contents, having ignored irrelevant data associted with its type. This signature may subsequently be used to verify that the contents of the file has not been altered in a significant way.
  1046 =head1 KNOWN BUGS
  1048 None.
  1050 =head1 COPYRIGHT
  1052  Copyright (c) 2005-2009 Nokia Corporation and/or its subsidiary(-ies).
  1053  All rights reserved.
  1054  This component and the accompanying materials are made available
  1055  under the terms of the License "Eclipse Public License v1.0"
  1056  which accompanies this distribution, and is available
  1057  at the URL "".
  1059  Initial Contributors:
  1060  Nokia Corporation - initial contribution.
  1062  Contributors:
  1064  Description: 
  1066 =cut
  1068 __END__