libraries/spcre/libpcre/pcre/CleanTxt
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 #! /usr/bin/perl -w
       
     2 
       
     3 # Script to take the output of nroff -man and remove all the backspacing and
       
     4 # the page footers and the screen commands etc so that it is more usefully
       
     5 # readable online. In fact, in the latest nroff, intermediate footers don't
       
     6 # seem to be generated any more.
       
     7 
       
     8 $blankcount = 0;
       
     9 $lastwascut = 0;
       
    10 $firstheader = 1;
       
    11 
       
    12 # Input on STDIN; output to STDOUT.
       
    13 
       
    14 while (<STDIN>)
       
    15   {
       
    16   s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
       
    17   s/.\x8//g;         # Remove "char, backspace"
       
    18 
       
    19   # Handle header lines. Retain only the first one we encounter, but remove
       
    20   # the blank line that follows. Any others (e.g. at end of document) and the
       
    21   # following blank line are dropped.
       
    22 
       
    23   if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
       
    24     {
       
    25     if ($firstheader)
       
    26       {
       
    27       $firstheader = 0;
       
    28       print;
       
    29       $lastprinted = $_;
       
    30       $lastwascut = 0;
       
    31       }
       
    32     $_=<STDIN>;       # Remove a blank that follows
       
    33     next;
       
    34     }
       
    35 
       
    36   # Count runs of empty lines
       
    37 
       
    38   if (/^\s*$/)
       
    39     {
       
    40     $blankcount++;
       
    41     $lastwascut = 0;
       
    42     next;
       
    43     }
       
    44 
       
    45   # If a chunk of lines has been cut out (page footer) and the next line
       
    46   # has a different indentation, put back one blank line.
       
    47 
       
    48   if ($lastwascut && $blankcount < 1 && defined($lastprinted))
       
    49     {
       
    50     ($a) = $lastprinted =~ /^(\s*)/;
       
    51     ($b) = $_ =~ /^(\s*)/;
       
    52     $blankcount++ if ($a ne $b);
       
    53     }
       
    54 
       
    55   # We get here only when we have a non-blank line in hand. If it was preceded
       
    56   # by 3 or more blank lines, read the next 3 lines and see if they are blank.
       
    57   # If so, remove all 7 lines, and remember that we have just done a cut.
       
    58 
       
    59   if ($blankcount >= 3)
       
    60     {
       
    61     for ($i = 0; $i < 3; $i++)
       
    62       {
       
    63       $next[$i] = <STDIN>;
       
    64       $next[$i] = "" if !defined $next[$i];
       
    65       $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
       
    66       $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
       
    67       }
       
    68 
       
    69     # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
       
    70 
       
    71     if ($next[0] =~ /^\s*$/ &&
       
    72         $next[1] =~ /^\s*$/ &&
       
    73         $next[2] =~ /^\s*$/)
       
    74       {
       
    75       $blankcount -= 3;
       
    76       $lastwascut = 1;
       
    77       }
       
    78 
       
    79     # Otherwise output the saved blanks, the current, and the next three
       
    80     # lines. Remember the last printed line.
       
    81 
       
    82     else
       
    83       {
       
    84       for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
       
    85       print;
       
    86       for ($i = 0; $i < 3; $i++)
       
    87         {
       
    88         $next[$i] =~ s/.\x8//g;
       
    89         print $next[$i];
       
    90         $lastprinted = $_;
       
    91         }
       
    92       $lastwascut = 0;
       
    93       $blankcount = 0;
       
    94       }
       
    95     }
       
    96 
       
    97   # This non-blank line is not preceded by 3 or more blank lines. Output
       
    98   # any blanks there are, and the line. Remember it. Force two blank lines
       
    99   # before headings.
       
   100 
       
   101   else
       
   102     {
       
   103     $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
       
   104       defined($lastprinted);
       
   105     for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
       
   106     print;
       
   107     $lastprinted = $_;
       
   108     $lastwascut = 0;
       
   109     $blankcount = 0;
       
   110     }
       
   111   }
       
   112 
       
   113 # End