libraries/spcre/libpcre/pcre/132html
changeset 0 7f656887cf89
equal deleted inserted replaced
-1:000000000000 0:7f656887cf89
       
     1 #! /usr/bin/perl -w
       
     2 
       
     3 # Script to turn PCRE man pages into HTML
       
     4 
       
     5 
       
     6 # Subroutine to handle font changes and other escapes
       
     7 
       
     8 sub do_line {
       
     9 my($s) = $_[0];
       
    10 
       
    11 $s =~ s/</&#60;/g;                   # Deal with < and >
       
    12 $s =~ s/>/&#62;/g;
       
    13 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
       
    14 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
       
    15 $s =~ s"\\e"\\"g;
       
    16 $s =~ s/(?<=Copyright )\(c\)/&copy;/g;
       
    17 $s;
       
    18 }
       
    19 
       
    20 # Subroutine to ensure not in a paragraph
       
    21 
       
    22 sub end_para {
       
    23 if ($inpara)
       
    24   {
       
    25   print TEMP "</PRE>\n" if ($inpre);
       
    26   print TEMP "</P>\n";
       
    27   }
       
    28 $inpara = $inpre = 0;
       
    29 $wrotetext = 0;
       
    30 }
       
    31 
       
    32 # Subroutine to start a new paragraph
       
    33 
       
    34 sub new_para {
       
    35 &end_para();
       
    36 print TEMP "<P>\n";
       
    37 $inpara = 1;
       
    38 }
       
    39 
       
    40 
       
    41 # Main program
       
    42 
       
    43 $innf = 0;
       
    44 $inpara = 0;
       
    45 $inpre = 0;
       
    46 $wrotetext = 0;
       
    47 $toc = 0;
       
    48 $ref = 1;
       
    49 
       
    50 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
       
    51   {
       
    52   $toc = 1 if $ARGV[0] eq "-toc";
       
    53   shift;
       
    54   }
       
    55 
       
    56 # Initial output to STDOUT
       
    57 
       
    58 print <<End ;
       
    59 <html>
       
    60 <head>
       
    61 <title>$ARGV[0] specification</title>
       
    62 </head>
       
    63 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
       
    64 <h1>$ARGV[0] man page</h1>
       
    65 <p>
       
    66 Return to the <a href="index.html">PCRE index page</a>.
       
    67 </p>
       
    68 <p>
       
    69 This page is part of the PCRE HTML documentation. It was generated automatically
       
    70 from the original man page. If there is any nonsense in it, please consult the
       
    71 man page, in case the conversion went wrong.
       
    72 <br>
       
    73 End
       
    74 
       
    75 print "<ul>\n" if ($toc);
       
    76 
       
    77 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
       
    78 
       
    79 while (<STDIN>)
       
    80   {
       
    81   # Handle lines beginning with a dot
       
    82 
       
    83   if (/^\./)
       
    84     {
       
    85     # Some of the PCRE man pages used to contain instances of .br. However,
       
    86     # they should have all been removed because they cause trouble in some
       
    87     # (other) automated systems that translate man pages to HTML. Complain if
       
    88     # we find .br or .in (another macro that is deprecated).
       
    89 
       
    90     if (/^\.br/ || /^\.in/)
       
    91       {
       
    92       print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
       
    93       print STDERR "*** $_\n";
       
    94       die "*** Processing abandoned\n";
       
    95       }
       
    96 
       
    97     # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
       
    98 
       
    99     elsif (/^\.nf/)
       
   100       {
       
   101       $innf = 1;
       
   102       }
       
   103 
       
   104     elsif (/^\.fi/)
       
   105       {
       
   106       $innf = 0;
       
   107       }
       
   108 
       
   109     # Handling .sp is subtle. If it is inside a literal section, do nothing if
       
   110     # the next line is a non literal text line; similarly, if not inside a
       
   111     # literal section, do nothing if a literal follows. The point being that
       
   112     # the <pre> and </pre> that delimit literal sections will do the spacing.
       
   113     # Always skip if no previous output.
       
   114 
       
   115     elsif (/^\.sp/)
       
   116       {
       
   117       if ($wrotetext)
       
   118         {
       
   119         $_ = <STDIN>;
       
   120         if ($inpre)
       
   121           {
       
   122           print TEMP "\n" if (/^[\s.]/);
       
   123           }
       
   124         else
       
   125           {
       
   126           print TEMP "<br>\n<br>\n" if (!/^[\s.]/);
       
   127           }
       
   128         redo;    # Now process the lookahead line we just read
       
   129         }
       
   130       }
       
   131     elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
       
   132       {
       
   133       &new_para();
       
   134       }
       
   135     elsif (/^\.SH\s*("?)(.*)\1/)
       
   136       {
       
   137       # Ignore the NAME section
       
   138       if ($2 =~ /^NAME\b/)
       
   139         {
       
   140         <STDIN>;
       
   141         next;
       
   142         }
       
   143 
       
   144       &end_para();
       
   145       my($title) = &do_line($2);
       
   146       if ($toc)
       
   147         {
       
   148         printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
       
   149           $ref, $ref);
       
   150         printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
       
   151           $ref, $ref);
       
   152         $ref++;
       
   153         }
       
   154       else
       
   155         {
       
   156         print TEMP "<br><b>\n$title\n</b><br>\n";
       
   157         }
       
   158       }
       
   159     elsif (/^\.SS\s*("?)(.*)\1/)
       
   160       {
       
   161       &end_para();
       
   162       my($title) = &do_line($2);
       
   163       print TEMP "<br><b>\n$title\n</b><br>\n";
       
   164       }
       
   165     elsif (/^\.B\s*(.*)/)
       
   166       {
       
   167       &new_para() if (!$inpara);
       
   168       $_ = &do_line($1);
       
   169       s/"(.*?)"/$1/g;
       
   170       print TEMP "<b>$_</b>\n";
       
   171       $wrotetext = 1;
       
   172       }
       
   173     elsif (/^\.I\s*(.*)/)
       
   174       {
       
   175       &new_para() if (!$inpara);
       
   176       $_ = &do_line($1);
       
   177       s/"(.*?)"/$1/g;
       
   178       print TEMP "<i>$_</i>\n";
       
   179       $wrotetext = 1;
       
   180       }
       
   181 
       
   182     # A comment that starts "HREF" takes the next line as a name that
       
   183     # is turned into a hyperlink, using the text given, which might be
       
   184     # in a special font. If it ends in () or (digits) or punctuation, they
       
   185     # aren't part of the link.
       
   186 
       
   187     elsif (/^\.\\"\s*HREF/)
       
   188       {
       
   189       $_=<STDIN>;
       
   190       chomp;
       
   191       $_ = &do_line($_);
       
   192       $_ =~ s/\s+$//;
       
   193       $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
       
   194       print TEMP "<a href=\"$1.html\">$_</a>\n";
       
   195       }
       
   196 
       
   197     # A comment that starts "HTML" inserts literal HTML
       
   198 
       
   199     elsif (/^\.\\"\s*HTML\s*(.*)/)
       
   200       {
       
   201       print TEMP $1;
       
   202       }
       
   203 
       
   204     # A comment that starts < inserts that HTML at the end of the
       
   205     # *next* input line - so as not to get a newline between them.
       
   206 
       
   207     elsif (/^\.\\"\s*(<.*>)/)
       
   208       {
       
   209       my($markup) = $1;
       
   210       $_=<STDIN>;
       
   211       chomp;
       
   212       $_ = &do_line($_);
       
   213       $_ =~ s/\s+$//;
       
   214       print TEMP "$_$markup\n";
       
   215       }
       
   216 
       
   217     # A comment that starts JOIN joins the next two lines together, with one
       
   218     # space between them. Then that line is processed. This is used in some
       
   219     # displays where two lines are needed for the "man" version. JOINSH works
       
   220     # the same, except that it assumes this is a shell command, so removes
       
   221     # continuation backslashes.
       
   222 
       
   223     elsif (/^\.\\"\s*JOIN(SH)?/)
       
   224       {
       
   225       my($one,$two);
       
   226       $one = <STDIN>;
       
   227       $two = <STDIN>;
       
   228       $one =~ s/\s*\\e\s*$// if (defined($1));
       
   229       chomp($one);
       
   230       $two =~ s/^\s+//;
       
   231       $_ = "$one $two";
       
   232       redo;            # Process the joined lines
       
   233       }
       
   234 
       
   235     # Ignore anything not recognized
       
   236 
       
   237     next;
       
   238     }
       
   239 
       
   240   # Line does not begin with a dot. Replace blank lines with new paragraphs
       
   241 
       
   242   if (/^\s*$/)
       
   243     {
       
   244     &end_para() if ($wrotetext);
       
   245     next;
       
   246     }
       
   247 
       
   248   # Convert fonts changes and output an ordinary line. Ensure that indented
       
   249   # lines are marked as literal.
       
   250 
       
   251   $_ = &do_line($_);
       
   252   &new_para() if (!$inpara);
       
   253 
       
   254   if (/^\s/)
       
   255     {
       
   256     if (!$inpre)
       
   257       {
       
   258       print TEMP "<pre>\n";
       
   259       $inpre = 1;
       
   260       }
       
   261     }
       
   262   elsif ($inpre)
       
   263     {
       
   264     print TEMP "</pre>\n";
       
   265     $inpre = 0;
       
   266     }
       
   267 
       
   268   # Add <br> to the end of a non-literal line if we are within .nf/.fi
       
   269 
       
   270   $_ .= "<br>\n" if (!$inpre && $innf);
       
   271 
       
   272   print TEMP;
       
   273   $wrotetext = 1;
       
   274   }
       
   275 
       
   276 # The TOC, if present, will have been written - terminate it
       
   277 
       
   278 print "</ul>\n" if ($toc);
       
   279 
       
   280 # Copy the remainder to the standard output
       
   281 
       
   282 close(TEMP);
       
   283 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
       
   284 
       
   285 print while (<TEMP>);
       
   286 
       
   287 print <<End ;
       
   288 <p>
       
   289 Return to the <a href="index.html">PCRE index page</a>.
       
   290 </p>
       
   291 End
       
   292 
       
   293 close(TEMP);
       
   294 unlink("/tmp/$$");
       
   295 
       
   296 # End