libraries/spcre/libpcre/pcre/perltest.pl
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 #! /usr/bin/env perl
       
     2 
       
     3 # Program for testing regular expressions with perl to check that PCRE handles
       
     4 # them the same. This is the version that supports /8 for UTF-8 testing. As it
       
     5 # stands, it requires at least Perl 5.8 for UTF-8 support. However, it needs to
       
     6 # have "use utf8" at the start for running the UTF-8 tests, but *not* for the
       
     7 # other tests. The only way I've found for doing this is to cat this line in
       
     8 # explicitly in the RunPerlTest script.
       
     9 
       
    10 # use locale;  # With this included, \x0b matches \s!
       
    11 
       
    12 # Function for turning a string into a string of printing chars. There are
       
    13 # currently problems with UTF-8 strings; this fudges round them.
       
    14 
       
    15 sub pchars {
       
    16 my($t) = "";
       
    17 
       
    18 if ($utf8)
       
    19   {
       
    20   @p = unpack('U*', $_[0]);
       
    21   foreach $c (@p)
       
    22     {
       
    23     if ($c >= 32 && $c < 127) { $t .= chr $c; }
       
    24       else { $t .= sprintf("\\x{%02x}", $c); }
       
    25     }
       
    26   }
       
    27 
       
    28 else
       
    29   {
       
    30   foreach $c (split(//, $_[0]))
       
    31     {
       
    32     if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
       
    33       else { $t .= sprintf("\\x%02x", ord $c); }
       
    34     }
       
    35   }
       
    36 
       
    37 $t;
       
    38 }
       
    39 
       
    40 
       
    41 # Read lines from named file or stdin and write to named file or stdout; lines
       
    42 # consist of a regular expression, in delimiters and optionally followed by
       
    43 # options, followed by a set of test data, terminated by an empty line.
       
    44 
       
    45 # Sort out the input and output files
       
    46 
       
    47 if (@ARGV > 0)
       
    48   {
       
    49   open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
       
    50   $infile = "INFILE";
       
    51   }
       
    52 else { $infile = "STDIN"; }
       
    53 
       
    54 if (@ARGV > 1)
       
    55   {
       
    56   open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
       
    57   $outfile = "OUTFILE";
       
    58   }
       
    59 else { $outfile = "STDOUT"; }
       
    60 
       
    61 printf($outfile "Perl $] Regular Expressions\n\n");
       
    62 
       
    63 # Main loop
       
    64 
       
    65 NEXT_RE:
       
    66 for (;;)
       
    67   {
       
    68   printf "  re> " if $infile eq "STDIN";
       
    69   last if ! ($_ = <$infile>);
       
    70   printf $outfile "$_" if $infile ne "STDIN";
       
    71   next if ($_ eq "");
       
    72 
       
    73   $pattern = $_;
       
    74 
       
    75   while ($pattern !~ /^\s*(.).*\1/s)
       
    76     {
       
    77     printf "    > " if $infile eq "STDIN";
       
    78     last if ! ($_ = <$infile>);
       
    79     printf $outfile "$_" if $infile ne "STDIN";
       
    80     $pattern .= $_;
       
    81     }
       
    82 
       
    83    chomp($pattern);
       
    84    $pattern =~ s/\s+$//;
       
    85 
       
    86   # The private /+ modifier means "print $' afterwards".
       
    87 
       
    88   $showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
       
    89 
       
    90   # Remove /8 from a UTF-8 pattern.
       
    91 
       
    92   $utf8 = $pattern =~ s/8(?=[a-z]*$)//;
       
    93 
       
    94   # Check that the pattern is valid
       
    95 
       
    96   eval "\$_ =~ ${pattern}";
       
    97   if ($@)
       
    98     {
       
    99     printf $outfile "Error: $@";
       
   100     next NEXT_RE;
       
   101     }
       
   102 
       
   103   # If the /g modifier is present, we want to put a loop round the matching;
       
   104   # otherwise just a single "if".
       
   105 
       
   106   $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
       
   107 
       
   108   # If the pattern is actually the null string, Perl uses the most recently
       
   109   # executed (and successfully compiled) regex is used instead. This is a
       
   110   # nasty trap for the unwary! The PCRE test suite does contain null strings
       
   111   # in places - if they are allowed through here all sorts of weird and
       
   112   # unexpected effects happen. To avoid this, we replace such patterns with
       
   113   # a non-null pattern that has the same effect.
       
   114 
       
   115   $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
       
   116 
       
   117   # Read data lines and test them
       
   118 
       
   119   for (;;)
       
   120     {
       
   121     printf "data> " if $infile eq "STDIN";
       
   122     last NEXT_RE if ! ($_ = <$infile>);
       
   123     chomp;
       
   124     printf $outfile "$_\n" if $infile ne "STDIN";
       
   125 
       
   126     s/\s+$//;
       
   127     s/^\s+//;
       
   128 
       
   129     last if ($_ eq "");
       
   130     $x = eval "\"$_\"";   # To get escapes processed
       
   131 
       
   132     # Empty array for holding results, then do the matching.
       
   133 
       
   134     @subs = ();
       
   135 
       
   136     $pushes = "push \@subs,\$&;" .
       
   137          "push \@subs,\$1;" .
       
   138          "push \@subs,\$2;" .
       
   139          "push \@subs,\$3;" .
       
   140          "push \@subs,\$4;" .
       
   141          "push \@subs,\$5;" .
       
   142          "push \@subs,\$6;" .
       
   143          "push \@subs,\$7;" .
       
   144          "push \@subs,\$8;" .
       
   145          "push \@subs,\$9;" .
       
   146          "push \@subs,\$10;" .
       
   147          "push \@subs,\$11;" .
       
   148          "push \@subs,\$12;" .
       
   149          "push \@subs,\$13;" .
       
   150          "push \@subs,\$14;" .
       
   151          "push \@subs,\$15;" .
       
   152          "push \@subs,\$16;" .
       
   153          "push \@subs,\$'; }";
       
   154 
       
   155     eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
       
   156 
       
   157     if ($@)
       
   158       {
       
   159       printf $outfile "Error: $@\n";
       
   160       next NEXT_RE;
       
   161       }
       
   162     elsif (scalar(@subs) == 0)
       
   163       {
       
   164       printf $outfile "No match\n";
       
   165       }
       
   166     else
       
   167       {
       
   168       while (scalar(@subs) != 0)
       
   169         {
       
   170         printf $outfile (" 0: %s\n", &pchars($subs[0]));
       
   171         printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
       
   172         $last_printed = 0;
       
   173         for ($i = 1; $i <= 16; $i++)
       
   174           {
       
   175           if (defined $subs[$i])
       
   176             {
       
   177             while ($last_printed++ < $i-1)
       
   178               { printf $outfile ("%2d: <unset>\n", $last_printed); }
       
   179             printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
       
   180             $last_printed = $i;
       
   181             }
       
   182           }
       
   183         splice(@subs, 0, 18);
       
   184         }
       
   185       }
       
   186     }
       
   187   }
       
   188 
       
   189 # printf $outfile "\n";
       
   190 
       
   191 # End