|
1 #! /usr/bin/perl -w |
|
2 |
|
3 # Script to take the output of nroff -man and remove all the backspacing and |
|
4 # the page footers and the screen commands etc so that it is more usefully |
|
5 # readable online. In fact, in the latest nroff, intermediate footers don't |
|
6 # seem to be generated any more. |
|
7 |
|
8 $blankcount = 0; |
|
9 $lastwascut = 0; |
|
10 $firstheader = 1; |
|
11 |
|
12 # Input on STDIN; output to STDOUT. |
|
13 |
|
14 while (<STDIN>) |
|
15 { |
|
16 s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
|
17 s/.\x8//g; # Remove "char, backspace" |
|
18 |
|
19 # Handle header lines. Retain only the first one we encounter, but remove |
|
20 # the blank line that follows. Any others (e.g. at end of document) and the |
|
21 # following blank line are dropped. |
|
22 |
|
23 if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) |
|
24 { |
|
25 if ($firstheader) |
|
26 { |
|
27 $firstheader = 0; |
|
28 print; |
|
29 $lastprinted = $_; |
|
30 $lastwascut = 0; |
|
31 } |
|
32 $_=<STDIN>; # Remove a blank that follows |
|
33 next; |
|
34 } |
|
35 |
|
36 # Count runs of empty lines |
|
37 |
|
38 if (/^\s*$/) |
|
39 { |
|
40 $blankcount++; |
|
41 $lastwascut = 0; |
|
42 next; |
|
43 } |
|
44 |
|
45 # If a chunk of lines has been cut out (page footer) and the next line |
|
46 # has a different indentation, put back one blank line. |
|
47 |
|
48 if ($lastwascut && $blankcount < 1 && defined($lastprinted)) |
|
49 { |
|
50 ($a) = $lastprinted =~ /^(\s*)/; |
|
51 ($b) = $_ =~ /^(\s*)/; |
|
52 $blankcount++ if ($a ne $b); |
|
53 } |
|
54 |
|
55 # We get here only when we have a non-blank line in hand. If it was preceded |
|
56 # by 3 or more blank lines, read the next 3 lines and see if they are blank. |
|
57 # If so, remove all 7 lines, and remember that we have just done a cut. |
|
58 |
|
59 if ($blankcount >= 3) |
|
60 { |
|
61 for ($i = 0; $i < 3; $i++) |
|
62 { |
|
63 $next[$i] = <STDIN>; |
|
64 $next[$i] = "" if !defined $next[$i]; |
|
65 $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" |
|
66 $next[$i] =~ s/.\x8//g; # Remove "char, backspace" |
|
67 } |
|
68 |
|
69 # Cut out chunks of the form <3 blanks><non-blank><3 blanks> |
|
70 |
|
71 if ($next[0] =~ /^\s*$/ && |
|
72 $next[1] =~ /^\s*$/ && |
|
73 $next[2] =~ /^\s*$/) |
|
74 { |
|
75 $blankcount -= 3; |
|
76 $lastwascut = 1; |
|
77 } |
|
78 |
|
79 # Otherwise output the saved blanks, the current, and the next three |
|
80 # lines. Remember the last printed line. |
|
81 |
|
82 else |
|
83 { |
|
84 for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
|
85 print; |
|
86 for ($i = 0; $i < 3; $i++) |
|
87 { |
|
88 $next[$i] =~ s/.\x8//g; |
|
89 print $next[$i]; |
|
90 $lastprinted = $_; |
|
91 } |
|
92 $lastwascut = 0; |
|
93 $blankcount = 0; |
|
94 } |
|
95 } |
|
96 |
|
97 # This non-blank line is not preceded by 3 or more blank lines. Output |
|
98 # any blanks there are, and the line. Remember it. Force two blank lines |
|
99 # before headings. |
|
100 |
|
101 else |
|
102 { |
|
103 $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && |
|
104 defined($lastprinted); |
|
105 for ($i = 0; $i < $blankcount; $i++) { print "\n"; } |
|
106 print; |
|
107 $lastprinted = $_; |
|
108 $lastwascut = 0; |
|
109 $blankcount = 0; |
|
110 } |
|
111 } |
|
112 |
|
113 # End |