|
1 /************************************************* |
|
2 * pcregrep program * |
|
3 *************************************************/ |
|
4 |
|
5 /* This is a grep program that uses the PCRE regular expression library to do |
|
6 its pattern matching. On a Unix or Win32 system it can recurse into |
|
7 directories. |
|
8 |
|
9 Copyright (c) 1997-2008 University of Cambridge |
|
10 |
|
11 ----------------------------------------------------------------------------- |
|
12 Redistribution and use in source and binary forms, with or without |
|
13 modification, are permitted provided that the following conditions are met: |
|
14 |
|
15 * Redistributions of source code must retain the above copyright notice, |
|
16 this list of conditions and the following disclaimer. |
|
17 |
|
18 * Redistributions in binary form must reproduce the above copyright |
|
19 notice, this list of conditions and the following disclaimer in the |
|
20 documentation and/or other materials provided with the distribution. |
|
21 |
|
22 * Neither the name of the University of Cambridge nor the names of its |
|
23 contributors may be used to endorse or promote products derived from |
|
24 this software without specific prior written permission. |
|
25 |
|
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
36 POSSIBILITY OF SUCH DAMAGE. |
|
37 ----------------------------------------------------------------------------- |
|
38 */ |
|
39 |
|
40 #ifdef HAVE_CONFIG_H |
|
41 #include "config.h" |
|
42 #endif |
|
43 |
|
44 #include <ctype.h> |
|
45 #include <locale.h> |
|
46 #include <stdio.h> |
|
47 #include <string.h> |
|
48 #include <stdlib.h> |
|
49 #include <errno.h> |
|
50 |
|
51 #include <sys/types.h> |
|
52 #include <sys/stat.h> |
|
53 |
|
54 #ifdef HAVE_UNISTD_H |
|
55 #include <unistd.h> |
|
56 #endif |
|
57 |
|
58 #ifdef SUPPORT_LIBZ |
|
59 #include <zlib.h> |
|
60 #endif |
|
61 |
|
62 #ifdef SUPPORT_LIBBZ2 |
|
63 #include <bzlib.h> |
|
64 #endif |
|
65 |
|
66 #include "pcre.h" |
|
67 |
|
68 #define FALSE 0 |
|
69 #define TRUE 1 |
|
70 |
|
71 typedef int BOOL; |
|
72 |
|
73 #define MAX_PATTERN_COUNT 100 |
|
74 |
|
75 #if BUFSIZ > 8192 |
|
76 #define MBUFTHIRD BUFSIZ |
|
77 #else |
|
78 #define MBUFTHIRD 8192 |
|
79 #endif |
|
80 |
|
81 /* Values for the "filenames" variable, which specifies options for file name |
|
82 output. The order is important; it is assumed that a file name is wanted for |
|
83 all values greater than FN_DEFAULT. */ |
|
84 |
|
85 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; |
|
86 |
|
87 /* File reading styles */ |
|
88 |
|
89 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; |
|
90 |
|
91 /* Actions for the -d and -D options */ |
|
92 |
|
93 enum { dee_READ, dee_SKIP, dee_RECURSE }; |
|
94 enum { DEE_READ, DEE_SKIP }; |
|
95 |
|
96 /* Actions for special processing options (flag bits) */ |
|
97 |
|
98 #define PO_WORD_MATCH 0x0001 |
|
99 #define PO_LINE_MATCH 0x0002 |
|
100 #define PO_FIXED_STRINGS 0x0004 |
|
101 |
|
102 /* Line ending types */ |
|
103 |
|
104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; |
|
105 |
|
106 |
|
107 |
|
108 /************************************************* |
|
109 * Global variables * |
|
110 *************************************************/ |
|
111 |
|
112 /* Jeffrey Friedl has some debugging requirements that are not part of the |
|
113 regular code. */ |
|
114 |
|
115 #ifdef JFRIEDL_DEBUG |
|
116 static int S_arg = -1; |
|
117 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ |
|
118 static unsigned int jfriedl_XT = 0; /* replicate text this many times */ |
|
119 static const char *jfriedl_prefix = ""; |
|
120 static const char *jfriedl_postfix = ""; |
|
121 #endif |
|
122 |
|
123 static int endlinetype; |
|
124 |
|
125 static char *colour_string = (char *)"1;31"; |
|
126 static char *colour_option = NULL; |
|
127 static char *dee_option = NULL; |
|
128 static char *DEE_option = NULL; |
|
129 static char *newline = NULL; |
|
130 static char *pattern_filename = NULL; |
|
131 static char *stdin_name = (char *)"(standard input)"; |
|
132 static char *locale = NULL; |
|
133 |
|
134 static const unsigned char *pcretables = NULL; |
|
135 |
|
136 static int pattern_count = 0; |
|
137 static pcre **pattern_list = NULL; |
|
138 static pcre_extra **hints_list = NULL; |
|
139 |
|
140 static char *include_pattern = NULL; |
|
141 static char *exclude_pattern = NULL; |
|
142 static char *include_dir_pattern = NULL; |
|
143 static char *exclude_dir_pattern = NULL; |
|
144 |
|
145 static pcre *include_compiled = NULL; |
|
146 static pcre *exclude_compiled = NULL; |
|
147 static pcre *include_dir_compiled = NULL; |
|
148 static pcre *exclude_dir_compiled = NULL; |
|
149 |
|
150 static int after_context = 0; |
|
151 static int before_context = 0; |
|
152 static int both_context = 0; |
|
153 static int dee_action = dee_READ; |
|
154 static int DEE_action = DEE_READ; |
|
155 static int error_count = 0; |
|
156 static int filenames = FN_DEFAULT; |
|
157 static int process_options = 0; |
|
158 |
|
159 static BOOL count_only = FALSE; |
|
160 static BOOL do_colour = FALSE; |
|
161 static BOOL file_offsets = FALSE; |
|
162 static BOOL hyphenpending = FALSE; |
|
163 static BOOL invert = FALSE; |
|
164 static BOOL line_offsets = FALSE; |
|
165 static BOOL multiline = FALSE; |
|
166 static BOOL number = FALSE; |
|
167 static BOOL only_matching = FALSE; |
|
168 static BOOL quiet = FALSE; |
|
169 static BOOL silent = FALSE; |
|
170 static BOOL utf8 = FALSE; |
|
171 |
|
172 /* Structure for options and list of them */ |
|
173 |
|
174 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER, |
|
175 OP_PATLIST }; |
|
176 |
|
177 typedef struct option_item { |
|
178 int type; |
|
179 int one_char; |
|
180 void *dataptr; |
|
181 const char *long_name; |
|
182 const char *help_text; |
|
183 } option_item; |
|
184 |
|
185 /* Options without a single-letter equivalent get a negative value. This can be |
|
186 used to identify them. */ |
|
187 |
|
188 #define N_COLOUR (-1) |
|
189 #define N_EXCLUDE (-2) |
|
190 #define N_EXCLUDE_DIR (-3) |
|
191 #define N_HELP (-4) |
|
192 #define N_INCLUDE (-5) |
|
193 #define N_INCLUDE_DIR (-6) |
|
194 #define N_LABEL (-7) |
|
195 #define N_LOCALE (-8) |
|
196 #define N_NULL (-9) |
|
197 #define N_LOFFSETS (-10) |
|
198 #define N_FOFFSETS (-11) |
|
199 |
|
200 static option_item optionlist[] = { |
|
201 { OP_NODATA, N_NULL, NULL, "", " terminate options" }, |
|
202 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, |
|
203 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, |
|
204 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, |
|
205 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, |
|
206 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, |
|
207 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, |
|
208 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, |
|
209 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, |
|
210 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, |
|
211 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" }, |
|
212 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" }, |
|
213 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
|
214 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, |
|
215 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, |
|
216 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
|
217 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
|
218 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
|
219 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
|
220 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
|
221 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, |
|
222 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
|
223 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
|
224 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, |
|
225 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
|
226 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
|
227 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
|
228 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
|
229 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
|
230 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, |
|
231 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" }, |
|
232 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" }, |
|
233 #ifdef JFRIEDL_DEBUG |
|
234 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, |
|
235 #endif |
|
236 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, |
|
237 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, |
|
238 { OP_NODATA, 'V', NULL, "version", "print version information and exit" }, |
|
239 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, |
|
240 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, |
|
241 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, |
|
242 { OP_NODATA, 0, NULL, NULL, NULL } |
|
243 }; |
|
244 |
|
245 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F |
|
246 options. These set the 1, 2, and 4 bits in process_options, respectively. Note |
|
247 that the combination of -w and -x has the same effect as -x on its own, so we |
|
248 can treat them as the same. */ |
|
249 |
|
250 static const char *prefix[] = { |
|
251 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; |
|
252 |
|
253 static const char *suffix[] = { |
|
254 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; |
|
255 |
|
256 /* UTF-8 tables - used only when the newline setting is "any". */ |
|
257 |
|
258 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
|
259 |
|
260 const char utf8_table4[] = { |
|
261 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
263 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
264 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
|
265 |
|
266 |
|
267 |
|
268 /************************************************* |
|
269 * OS-specific functions * |
|
270 *************************************************/ |
|
271 |
|
272 /* These functions are defined so that they can be made system specific, |
|
273 although at present the only ones are for Unix, Win32, and for "no support". */ |
|
274 |
|
275 |
|
276 /************* Directory scanning in Unix ***********/ |
|
277 |
|
278 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H |
|
279 #include <sys/types.h> |
|
280 #include <sys/stat.h> |
|
281 #include <dirent.h> |
|
282 |
|
283 typedef DIR directory_type; |
|
284 |
|
285 static int |
|
286 isdirectory(char *filename) |
|
287 { |
|
288 struct stat statbuf; |
|
289 if (stat(filename, &statbuf) < 0) |
|
290 return 0; /* In the expectation that opening as a file will fail */ |
|
291 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; |
|
292 } |
|
293 |
|
294 static directory_type * |
|
295 opendirectory(char *filename) |
|
296 { |
|
297 return opendir(filename); |
|
298 } |
|
299 |
|
300 static char * |
|
301 readdirectory(directory_type *dir) |
|
302 { |
|
303 for (;;) |
|
304 { |
|
305 struct dirent *dent = readdir(dir); |
|
306 if (dent == NULL) return NULL; |
|
307 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) |
|
308 return dent->d_name; |
|
309 } |
|
310 /* Control never reaches here */ |
|
311 } |
|
312 |
|
313 static void |
|
314 closedirectory(directory_type *dir) |
|
315 { |
|
316 closedir(dir); |
|
317 } |
|
318 |
|
319 |
|
320 /************* Test for regular file in Unix **********/ |
|
321 |
|
322 static int |
|
323 isregfile(char *filename) |
|
324 { |
|
325 struct stat statbuf; |
|
326 if (stat(filename, &statbuf) < 0) |
|
327 return 1; /* In the expectation that opening as a file will fail */ |
|
328 return (statbuf.st_mode & S_IFMT) == S_IFREG; |
|
329 } |
|
330 |
|
331 |
|
332 /************* Test stdout for being a terminal in Unix **********/ |
|
333 |
|
334 static BOOL |
|
335 is_stdout_tty(void) |
|
336 { |
|
337 return isatty(fileno(stdout)); |
|
338 } |
|
339 |
|
340 |
|
341 /************* Directory scanning in Win32 ***********/ |
|
342 |
|
343 /* I (Philip Hazel) have no means of testing this code. It was contributed by |
|
344 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES |
|
345 when it did not exist. David Byron added a patch that moved the #include of |
|
346 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. |
|
347 */ |
|
348 |
|
349 #elif HAVE_WINDOWS_H |
|
350 |
|
351 #ifndef STRICT |
|
352 # define STRICT |
|
353 #endif |
|
354 #ifndef WIN32_LEAN_AND_MEAN |
|
355 # define WIN32_LEAN_AND_MEAN |
|
356 #endif |
|
357 |
|
358 #include <windows.h> |
|
359 |
|
360 #ifndef INVALID_FILE_ATTRIBUTES |
|
361 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF |
|
362 #endif |
|
363 |
|
364 typedef struct directory_type |
|
365 { |
|
366 HANDLE handle; |
|
367 BOOL first; |
|
368 WIN32_FIND_DATA data; |
|
369 } directory_type; |
|
370 |
|
371 int |
|
372 isdirectory(char *filename) |
|
373 { |
|
374 DWORD attr = GetFileAttributes(filename); |
|
375 if (attr == INVALID_FILE_ATTRIBUTES) |
|
376 return 0; |
|
377 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0; |
|
378 } |
|
379 |
|
380 directory_type * |
|
381 opendirectory(char *filename) |
|
382 { |
|
383 size_t len; |
|
384 char *pattern; |
|
385 directory_type *dir; |
|
386 DWORD err; |
|
387 len = strlen(filename); |
|
388 pattern = (char *) malloc(len + 3); |
|
389 dir = (directory_type *) malloc(sizeof(*dir)); |
|
390 if ((pattern == NULL) || (dir == NULL)) |
|
391 { |
|
392 fprintf(stderr, "pcregrep: malloc failed\n"); |
|
393 exit(2); |
|
394 } |
|
395 memcpy(pattern, filename, len); |
|
396 memcpy(&(pattern[len]), "\\*", 3); |
|
397 dir->handle = FindFirstFile(pattern, &(dir->data)); |
|
398 if (dir->handle != INVALID_HANDLE_VALUE) |
|
399 { |
|
400 free(pattern); |
|
401 dir->first = TRUE; |
|
402 return dir; |
|
403 } |
|
404 err = GetLastError(); |
|
405 free(pattern); |
|
406 free(dir); |
|
407 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; |
|
408 return NULL; |
|
409 } |
|
410 |
|
411 char * |
|
412 readdirectory(directory_type *dir) |
|
413 { |
|
414 for (;;) |
|
415 { |
|
416 if (!dir->first) |
|
417 { |
|
418 if (!FindNextFile(dir->handle, &(dir->data))) |
|
419 return NULL; |
|
420 } |
|
421 else |
|
422 { |
|
423 dir->first = FALSE; |
|
424 } |
|
425 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) |
|
426 return dir->data.cFileName; |
|
427 } |
|
428 #ifndef _MSC_VER |
|
429 return NULL; /* Keep compiler happy; never executed */ |
|
430 #endif |
|
431 } |
|
432 |
|
433 void |
|
434 closedirectory(directory_type *dir) |
|
435 { |
|
436 FindClose(dir->handle); |
|
437 free(dir); |
|
438 } |
|
439 |
|
440 |
|
441 /************* Test for regular file in Win32 **********/ |
|
442 |
|
443 /* I don't know how to do this, or if it can be done; assume all paths are |
|
444 regular if they are not directories. */ |
|
445 |
|
446 int isregfile(char *filename) |
|
447 { |
|
448 return !isdirectory(filename); |
|
449 } |
|
450 |
|
451 |
|
452 /************* Test stdout for being a terminal in Win32 **********/ |
|
453 |
|
454 /* I don't know how to do this; assume never */ |
|
455 |
|
456 static BOOL |
|
457 is_stdout_tty(void) |
|
458 { |
|
459 return FALSE; |
|
460 } |
|
461 |
|
462 |
|
463 /************* Directory scanning when we can't do it ***********/ |
|
464 |
|
465 /* The type is void, and apart from isdirectory(), the functions do nothing. */ |
|
466 |
|
467 #else |
|
468 |
|
469 typedef void directory_type; |
|
470 |
|
471 int isdirectory(char *filename) { return 0; } |
|
472 directory_type * opendirectory(char *filename) { return (directory_type*)0;} |
|
473 char *readdirectory(directory_type *dir) { return (char*)0;} |
|
474 void closedirectory(directory_type *dir) {} |
|
475 |
|
476 |
|
477 /************* Test for regular when we can't do it **********/ |
|
478 |
|
479 /* Assume all files are regular. */ |
|
480 |
|
481 int isregfile(char *filename) { return 1; } |
|
482 |
|
483 |
|
484 /************* Test stdout for being a terminal when we can't do it **********/ |
|
485 |
|
486 static BOOL |
|
487 is_stdout_tty(void) |
|
488 { |
|
489 return FALSE; |
|
490 } |
|
491 |
|
492 |
|
493 #endif |
|
494 |
|
495 |
|
496 |
|
497 #ifndef HAVE_STRERROR |
|
498 /************************************************* |
|
499 * Provide strerror() for non-ANSI libraries * |
|
500 *************************************************/ |
|
501 |
|
502 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror() |
|
503 in their libraries, but can provide the same facility by this simple |
|
504 alternative function. */ |
|
505 |
|
506 extern int sys_nerr; |
|
507 extern char *sys_errlist[]; |
|
508 |
|
509 char * |
|
510 strerror(int n) |
|
511 { |
|
512 if (n < 0 || n >= sys_nerr) return "unknown error number"; |
|
513 return sys_errlist[n]; |
|
514 } |
|
515 #endif /* HAVE_STRERROR */ |
|
516 |
|
517 |
|
518 |
|
519 /************************************************* |
|
520 * Find end of line * |
|
521 *************************************************/ |
|
522 |
|
523 /* The length of the endline sequence that is found is set via lenptr. This may |
|
524 be zero at the very end of the file if there is no line-ending sequence there. |
|
525 |
|
526 Arguments: |
|
527 p current position in line |
|
528 endptr end of available data |
|
529 lenptr where to put the length of the eol sequence |
|
530 |
|
531 Returns: pointer to the last byte of the line |
|
532 */ |
|
533 |
|
534 static char * |
|
535 end_of_line(char *p, char *endptr, int *lenptr) |
|
536 { |
|
537 switch(endlinetype) |
|
538 { |
|
539 default: /* Just in case */ |
|
540 case EL_LF: |
|
541 while (p < endptr && *p != '\n') p++; |
|
542 if (p < endptr) |
|
543 { |
|
544 *lenptr = 1; |
|
545 return p + 1; |
|
546 } |
|
547 *lenptr = 0; |
|
548 return endptr; |
|
549 |
|
550 case EL_CR: |
|
551 while (p < endptr && *p != '\r') p++; |
|
552 if (p < endptr) |
|
553 { |
|
554 *lenptr = 1; |
|
555 return p + 1; |
|
556 } |
|
557 *lenptr = 0; |
|
558 return endptr; |
|
559 |
|
560 case EL_CRLF: |
|
561 for (;;) |
|
562 { |
|
563 while (p < endptr && *p != '\r') p++; |
|
564 if (++p >= endptr) |
|
565 { |
|
566 *lenptr = 0; |
|
567 return endptr; |
|
568 } |
|
569 if (*p == '\n') |
|
570 { |
|
571 *lenptr = 2; |
|
572 return p + 1; |
|
573 } |
|
574 } |
|
575 break; |
|
576 |
|
577 case EL_ANYCRLF: |
|
578 while (p < endptr) |
|
579 { |
|
580 int extra = 0; |
|
581 register int c = *((unsigned char *)p); |
|
582 |
|
583 if (utf8 && c >= 0xc0) |
|
584 { |
|
585 int gcii, gcss; |
|
586 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
|
587 gcss = 6*extra; |
|
588 c = (c & utf8_table3[extra]) << gcss; |
|
589 for (gcii = 1; gcii <= extra; gcii++) |
|
590 { |
|
591 gcss -= 6; |
|
592 c |= (p[gcii] & 0x3f) << gcss; |
|
593 } |
|
594 } |
|
595 |
|
596 p += 1 + extra; |
|
597 |
|
598 switch (c) |
|
599 { |
|
600 case 0x0a: /* LF */ |
|
601 *lenptr = 1; |
|
602 return p; |
|
603 |
|
604 case 0x0d: /* CR */ |
|
605 if (p < endptr && *p == 0x0a) |
|
606 { |
|
607 *lenptr = 2; |
|
608 p++; |
|
609 } |
|
610 else *lenptr = 1; |
|
611 return p; |
|
612 |
|
613 default: |
|
614 break; |
|
615 } |
|
616 } /* End of loop for ANYCRLF case */ |
|
617 |
|
618 *lenptr = 0; /* Must have hit the end */ |
|
619 return endptr; |
|
620 |
|
621 case EL_ANY: |
|
622 while (p < endptr) |
|
623 { |
|
624 int extra = 0; |
|
625 register int c = *((unsigned char *)p); |
|
626 |
|
627 if (utf8 && c >= 0xc0) |
|
628 { |
|
629 int gcii, gcss; |
|
630 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
|
631 gcss = 6*extra; |
|
632 c = (c & utf8_table3[extra]) << gcss; |
|
633 for (gcii = 1; gcii <= extra; gcii++) |
|
634 { |
|
635 gcss -= 6; |
|
636 c |= (p[gcii] & 0x3f) << gcss; |
|
637 } |
|
638 } |
|
639 |
|
640 p += 1 + extra; |
|
641 |
|
642 switch (c) |
|
643 { |
|
644 case 0x0a: /* LF */ |
|
645 case 0x0b: /* VT */ |
|
646 case 0x0c: /* FF */ |
|
647 *lenptr = 1; |
|
648 return p; |
|
649 |
|
650 case 0x0d: /* CR */ |
|
651 if (p < endptr && *p == 0x0a) |
|
652 { |
|
653 *lenptr = 2; |
|
654 p++; |
|
655 } |
|
656 else *lenptr = 1; |
|
657 return p; |
|
658 |
|
659 case 0x85: /* NEL */ |
|
660 *lenptr = utf8? 2 : 1; |
|
661 return p; |
|
662 |
|
663 case 0x2028: /* LS */ |
|
664 case 0x2029: /* PS */ |
|
665 *lenptr = 3; |
|
666 return p; |
|
667 |
|
668 default: |
|
669 break; |
|
670 } |
|
671 } /* End of loop for ANY case */ |
|
672 |
|
673 *lenptr = 0; /* Must have hit the end */ |
|
674 return endptr; |
|
675 } /* End of overall switch */ |
|
676 } |
|
677 |
|
678 |
|
679 |
|
680 /************************************************* |
|
681 * Find start of previous line * |
|
682 *************************************************/ |
|
683 |
|
684 /* This is called when looking back for before lines to print. |
|
685 |
|
686 Arguments: |
|
687 p start of the subsequent line |
|
688 startptr start of available data |
|
689 |
|
690 Returns: pointer to the start of the previous line |
|
691 */ |
|
692 |
|
693 static char * |
|
694 previous_line(char *p, char *startptr) |
|
695 { |
|
696 switch(endlinetype) |
|
697 { |
|
698 default: /* Just in case */ |
|
699 case EL_LF: |
|
700 p--; |
|
701 while (p > startptr && p[-1] != '\n') p--; |
|
702 return p; |
|
703 |
|
704 case EL_CR: |
|
705 p--; |
|
706 while (p > startptr && p[-1] != '\n') p--; |
|
707 return p; |
|
708 |
|
709 case EL_CRLF: |
|
710 for (;;) |
|
711 { |
|
712 p -= 2; |
|
713 while (p > startptr && p[-1] != '\n') p--; |
|
714 if (p <= startptr + 1 || p[-2] == '\r') return p; |
|
715 } |
|
716 return p; /* But control should never get here */ |
|
717 |
|
718 case EL_ANY: |
|
719 case EL_ANYCRLF: |
|
720 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; |
|
721 if (utf8) while ((*p & 0xc0) == 0x80) p--; |
|
722 |
|
723 while (p > startptr) |
|
724 { |
|
725 register int c; |
|
726 char *pp = p - 1; |
|
727 |
|
728 if (utf8) |
|
729 { |
|
730 int extra = 0; |
|
731 while ((*pp & 0xc0) == 0x80) pp--; |
|
732 c = *((unsigned char *)pp); |
|
733 if (c >= 0xc0) |
|
734 { |
|
735 int gcii, gcss; |
|
736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
|
737 gcss = 6*extra; |
|
738 c = (c & utf8_table3[extra]) << gcss; |
|
739 for (gcii = 1; gcii <= extra; gcii++) |
|
740 { |
|
741 gcss -= 6; |
|
742 c |= (pp[gcii] & 0x3f) << gcss; |
|
743 } |
|
744 } |
|
745 } |
|
746 else c = *((unsigned char *)pp); |
|
747 |
|
748 if (endlinetype == EL_ANYCRLF) switch (c) |
|
749 { |
|
750 case 0x0a: /* LF */ |
|
751 case 0x0d: /* CR */ |
|
752 return p; |
|
753 |
|
754 default: |
|
755 break; |
|
756 } |
|
757 |
|
758 else switch (c) |
|
759 { |
|
760 case 0x0a: /* LF */ |
|
761 case 0x0b: /* VT */ |
|
762 case 0x0c: /* FF */ |
|
763 case 0x0d: /* CR */ |
|
764 case 0x85: /* NEL */ |
|
765 case 0x2028: /* LS */ |
|
766 case 0x2029: /* PS */ |
|
767 return p; |
|
768 |
|
769 default: |
|
770 break; |
|
771 } |
|
772 |
|
773 p = pp; /* Back one character */ |
|
774 } /* End of loop for ANY case */ |
|
775 |
|
776 return startptr; /* Hit start of data */ |
|
777 } /* End of overall switch */ |
|
778 } |
|
779 |
|
780 |
|
781 |
|
782 |
|
783 |
|
784 /************************************************* |
|
785 * Print the previous "after" lines * |
|
786 *************************************************/ |
|
787 |
|
788 /* This is called if we are about to lose said lines because of buffer filling, |
|
789 and at the end of the file. The data in the line is written using fwrite() so |
|
790 that a binary zero does not terminate it. |
|
791 |
|
792 Arguments: |
|
793 lastmatchnumber the number of the last matching line, plus one |
|
794 lastmatchrestart where we restarted after the last match |
|
795 endptr end of available data |
|
796 printname filename for printing |
|
797 |
|
798 Returns: nothing |
|
799 */ |
|
800 |
|
801 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, |
|
802 char *endptr, char *printname) |
|
803 { |
|
804 if (after_context > 0 && lastmatchnumber > 0) |
|
805 { |
|
806 int count = 0; |
|
807 while (lastmatchrestart < endptr && count++ < after_context) |
|
808 { |
|
809 int ellength; |
|
810 char *pp = lastmatchrestart; |
|
811 if (printname != NULL) fprintf(stdout, "%s-", printname); |
|
812 if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
|
813 pp = end_of_line(pp, endptr, &ellength); |
|
814 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); |
|
815 lastmatchrestart = pp; |
|
816 } |
|
817 hyphenpending = TRUE; |
|
818 } |
|
819 } |
|
820 |
|
821 |
|
822 |
|
823 /************************************************* |
|
824 * Grep an individual file * |
|
825 *************************************************/ |
|
826 |
|
827 /* This is called from grep_or_recurse() below. It uses a buffer that is three |
|
828 times the value of MBUFTHIRD. The matching point is never allowed to stray into |
|
829 the top third of the buffer, thus keeping more of the file available for |
|
830 context printing or for multiline scanning. For large files, the pointer will |
|
831 be in the middle third most of the time, so the bottom third is available for |
|
832 "before" context printing. |
|
833 |
|
834 Arguments: |
|
835 handle the fopened FILE stream for a normal file |
|
836 the gzFile pointer when reading is via libz |
|
837 the BZFILE pointer when reading is via libbz2 |
|
838 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 |
|
839 printname the file name if it is to be printed for each match |
|
840 or NULL if the file name is not to be printed |
|
841 it cannot be NULL if filenames[_nomatch]_only is set |
|
842 |
|
843 Returns: 0 if there was at least one match |
|
844 1 otherwise (no matches) |
|
845 2 if there is a read error on a .bz2 file |
|
846 */ |
|
847 |
|
848 static int |
|
849 pcregrep(void *handle, int frtype, char *printname) |
|
850 { |
|
851 int rc = 1; |
|
852 int linenumber = 1; |
|
853 int lastmatchnumber = 0; |
|
854 int count = 0; |
|
855 int filepos = 0; |
|
856 int offsets[99]; |
|
857 char *lastmatchrestart = NULL; |
|
858 char buffer[3*MBUFTHIRD]; |
|
859 char *ptr = buffer; |
|
860 char *endptr; |
|
861 size_t bufflength; |
|
862 BOOL endhyphenpending = FALSE; |
|
863 FILE *in = NULL; /* Ensure initialized */ |
|
864 |
|
865 #ifdef SUPPORT_LIBZ |
|
866 gzFile ingz = NULL; |
|
867 #endif |
|
868 |
|
869 #ifdef SUPPORT_LIBBZ2 |
|
870 BZFILE *inbz2 = NULL; |
|
871 #endif |
|
872 |
|
873 |
|
874 /* Do the first read into the start of the buffer and set up the pointer to end |
|
875 of what we have. In the case of libz, a non-zipped .gz file will be read as a |
|
876 plain file. However, if a .bz2 file isn't actually bzipped, the first read will |
|
877 fail. */ |
|
878 |
|
879 #ifdef SUPPORT_LIBZ |
|
880 if (frtype == FR_LIBZ) |
|
881 { |
|
882 ingz = (gzFile)handle; |
|
883 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD); |
|
884 } |
|
885 else |
|
886 #endif |
|
887 |
|
888 #ifdef SUPPORT_LIBBZ2 |
|
889 if (frtype == FR_LIBBZ2) |
|
890 { |
|
891 inbz2 = (BZFILE *)handle; |
|
892 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD); |
|
893 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ |
|
894 } /* without the cast it is unsigned. */ |
|
895 else |
|
896 #endif |
|
897 |
|
898 { |
|
899 in = (FILE *)handle; |
|
900 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in); |
|
901 } |
|
902 |
|
903 endptr = buffer + bufflength; |
|
904 |
|
905 /* Loop while the current pointer is not at the end of the file. For large |
|
906 files, endptr will be at the end of the buffer when we are in the middle of the |
|
907 file, but ptr will never get there, because as soon as it gets over 2/3 of the |
|
908 way, the buffer is shifted left and re-filled. */ |
|
909 |
|
910 while (ptr < endptr) |
|
911 { |
|
912 int i, endlinelength; |
|
913 int mrc = 0; |
|
914 BOOL match = FALSE; |
|
915 char *matchptr = ptr; |
|
916 char *t = ptr; |
|
917 size_t length, linelength; |
|
918 |
|
919 /* At this point, ptr is at the start of a line. We need to find the length |
|
920 of the subject string to pass to pcre_exec(). In multiline mode, it is the |
|
921 length remainder of the data in the buffer. Otherwise, it is the length of |
|
922 the next line. After matching, we always advance by the length of the next |
|
923 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so |
|
924 that any match is constrained to be in the first line. */ |
|
925 |
|
926 t = end_of_line(t, endptr, &endlinelength); |
|
927 linelength = t - ptr - endlinelength; |
|
928 length = multiline? (size_t)(endptr - ptr) : linelength; |
|
929 |
|
930 /* Extra processing for Jeffrey Friedl's debugging. */ |
|
931 |
|
932 #ifdef JFRIEDL_DEBUG |
|
933 if (jfriedl_XT || jfriedl_XR) |
|
934 { |
|
935 #include <sys/time.h> |
|
936 #include <time.h> |
|
937 struct timeval start_time, end_time; |
|
938 struct timezone dummy; |
|
939 |
|
940 if (jfriedl_XT) |
|
941 { |
|
942 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); |
|
943 const char *orig = ptr; |
|
944 ptr = malloc(newlen + 1); |
|
945 if (!ptr) { |
|
946 printf("out of memory"); |
|
947 exit(2); |
|
948 } |
|
949 endptr = ptr; |
|
950 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); |
|
951 for (i = 0; i < jfriedl_XT; i++) { |
|
952 strncpy(endptr, orig, length); |
|
953 endptr += length; |
|
954 } |
|
955 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); |
|
956 length = newlen; |
|
957 } |
|
958 |
|
959 if (gettimeofday(&start_time, &dummy) != 0) |
|
960 perror("bad gettimeofday"); |
|
961 |
|
962 |
|
963 for (i = 0; i < jfriedl_XR; i++) |
|
964 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0); |
|
965 |
|
966 if (gettimeofday(&end_time, &dummy) != 0) |
|
967 perror("bad gettimeofday"); |
|
968 |
|
969 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) |
|
970 - |
|
971 (start_time.tv_sec + (start_time.tv_usec / 1000000.0))); |
|
972 |
|
973 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); |
|
974 return 0; |
|
975 } |
|
976 #endif |
|
977 |
|
978 /* We come back here after a match when the -o option (only_matching) is set, |
|
979 in order to find any further matches in the same line. */ |
|
980 |
|
981 ONLY_MATCHING_RESTART: |
|
982 |
|
983 /* Run through all the patterns until one matches. Note that we don't include |
|
984 the final newline in the subject string. */ |
|
985 |
|
986 for (i = 0; i < pattern_count; i++) |
|
987 { |
|
988 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0, |
|
989 offsets, 99); |
|
990 if (mrc >= 0) { match = TRUE; break; } |
|
991 if (mrc != PCRE_ERROR_NOMATCH) |
|
992 { |
|
993 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc); |
|
994 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); |
|
995 fprintf(stderr, "this line:\n"); |
|
996 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */ |
|
997 fprintf(stderr, "\n"); |
|
998 if (error_count == 0 && |
|
999 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT)) |
|
1000 { |
|
1001 fprintf(stderr, "pcregrep: error %d means that a resource limit " |
|
1002 "was exceeded\n", mrc); |
|
1003 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); |
|
1004 } |
|
1005 if (error_count++ > 20) |
|
1006 { |
|
1007 fprintf(stderr, "pcregrep: too many errors - abandoned\n"); |
|
1008 exit(2); |
|
1009 } |
|
1010 match = invert; /* No more matching; don't show the line again */ |
|
1011 break; |
|
1012 } |
|
1013 } |
|
1014 |
|
1015 /* If it's a match or a not-match (as required), do what's wanted. */ |
|
1016 |
|
1017 if (match != invert) |
|
1018 { |
|
1019 BOOL hyphenprinted = FALSE; |
|
1020 |
|
1021 /* We've failed if we want a file that doesn't have any matches. */ |
|
1022 |
|
1023 if (filenames == FN_NOMATCH_ONLY) return 1; |
|
1024 |
|
1025 /* Just count if just counting is wanted. */ |
|
1026 |
|
1027 if (count_only) count++; |
|
1028 |
|
1029 /* If all we want is a file name, there is no need to scan any more lines |
|
1030 in the file. */ |
|
1031 |
|
1032 else if (filenames == FN_ONLY) |
|
1033 { |
|
1034 fprintf(stdout, "%s\n", printname); |
|
1035 return 0; |
|
1036 } |
|
1037 |
|
1038 /* Likewise, if all we want is a yes/no answer. */ |
|
1039 |
|
1040 else if (quiet) return 0; |
|
1041 |
|
1042 /* The --only-matching option prints just the substring that matched, and |
|
1043 the --file-offsets and --line-offsets options output offsets for the |
|
1044 matching substring (they both force --only-matching). None of these options |
|
1045 prints any context. Afterwards, adjust the start and length, and then jump |
|
1046 back to look for further matches in the same line. If we are in invert |
|
1047 mode, however, nothing is printed - this could be still useful because the |
|
1048 return code is set. */ |
|
1049 |
|
1050 else if (only_matching) |
|
1051 { |
|
1052 if (!invert) |
|
1053 { |
|
1054 if (printname != NULL) fprintf(stdout, "%s:", printname); |
|
1055 if (number) fprintf(stdout, "%d:", linenumber); |
|
1056 if (line_offsets) |
|
1057 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr), |
|
1058 offsets[1] - offsets[0]); |
|
1059 else if (file_offsets) |
|
1060 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr), |
|
1061 offsets[1] - offsets[0]); |
|
1062 else |
|
1063 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
|
1064 fprintf(stdout, "\n"); |
|
1065 matchptr += offsets[1]; |
|
1066 length -= offsets[1]; |
|
1067 match = FALSE; |
|
1068 goto ONLY_MATCHING_RESTART; |
|
1069 } |
|
1070 } |
|
1071 |
|
1072 /* This is the default case when none of the above options is set. We print |
|
1073 the matching lines(s), possibly preceded and/or followed by other lines of |
|
1074 context. */ |
|
1075 |
|
1076 else |
|
1077 { |
|
1078 /* See if there is a requirement to print some "after" lines from a |
|
1079 previous match. We never print any overlaps. */ |
|
1080 |
|
1081 if (after_context > 0 && lastmatchnumber > 0) |
|
1082 { |
|
1083 int ellength; |
|
1084 int linecount = 0; |
|
1085 char *p = lastmatchrestart; |
|
1086 |
|
1087 while (p < ptr && linecount < after_context) |
|
1088 { |
|
1089 p = end_of_line(p, ptr, &ellength); |
|
1090 linecount++; |
|
1091 } |
|
1092 |
|
1093 /* It is important to advance lastmatchrestart during this printing so |
|
1094 that it interacts correctly with any "before" printing below. Print |
|
1095 each line's data using fwrite() in case there are binary zeroes. */ |
|
1096 |
|
1097 while (lastmatchrestart < p) |
|
1098 { |
|
1099 char *pp = lastmatchrestart; |
|
1100 if (printname != NULL) fprintf(stdout, "%s-", printname); |
|
1101 if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
|
1102 pp = end_of_line(pp, endptr, &ellength); |
|
1103 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout); |
|
1104 lastmatchrestart = pp; |
|
1105 } |
|
1106 if (lastmatchrestart != ptr) hyphenpending = TRUE; |
|
1107 } |
|
1108 |
|
1109 /* If there were non-contiguous lines printed above, insert hyphens. */ |
|
1110 |
|
1111 if (hyphenpending) |
|
1112 { |
|
1113 fprintf(stdout, "--\n"); |
|
1114 hyphenpending = FALSE; |
|
1115 hyphenprinted = TRUE; |
|
1116 } |
|
1117 |
|
1118 /* See if there is a requirement to print some "before" lines for this |
|
1119 match. Again, don't print overlaps. */ |
|
1120 |
|
1121 if (before_context > 0) |
|
1122 { |
|
1123 int linecount = 0; |
|
1124 char *p = ptr; |
|
1125 |
|
1126 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && |
|
1127 linecount < before_context) |
|
1128 { |
|
1129 linecount++; |
|
1130 p = previous_line(p, buffer); |
|
1131 } |
|
1132 |
|
1133 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) |
|
1134 fprintf(stdout, "--\n"); |
|
1135 |
|
1136 while (p < ptr) |
|
1137 { |
|
1138 int ellength; |
|
1139 char *pp = p; |
|
1140 if (printname != NULL) fprintf(stdout, "%s-", printname); |
|
1141 if (number) fprintf(stdout, "%d-", linenumber - linecount--); |
|
1142 pp = end_of_line(pp, endptr, &ellength); |
|
1143 fwrite(p, 1, pp - p, stdout); |
|
1144 p = pp; |
|
1145 } |
|
1146 } |
|
1147 |
|
1148 /* Now print the matching line(s); ensure we set hyphenpending at the end |
|
1149 of the file if any context lines are being output. */ |
|
1150 |
|
1151 if (after_context > 0 || before_context > 0) |
|
1152 endhyphenpending = TRUE; |
|
1153 |
|
1154 if (printname != NULL) fprintf(stdout, "%s:", printname); |
|
1155 if (number) fprintf(stdout, "%d:", linenumber); |
|
1156 |
|
1157 /* In multiline mode, we want to print to the end of the line in which |
|
1158 the end of the matched string is found, so we adjust linelength and the |
|
1159 line number appropriately, but only when there actually was a match |
|
1160 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of |
|
1161 the match will always be before the first newline sequence. */ |
|
1162 |
|
1163 if (multiline) |
|
1164 { |
|
1165 int ellength; |
|
1166 char *endmatch = ptr; |
|
1167 if (!invert) |
|
1168 { |
|
1169 endmatch += offsets[1]; |
|
1170 t = ptr; |
|
1171 while (t < endmatch) |
|
1172 { |
|
1173 t = end_of_line(t, endptr, &ellength); |
|
1174 if (t <= endmatch) linenumber++; else break; |
|
1175 } |
|
1176 } |
|
1177 endmatch = end_of_line(endmatch, endptr, &ellength); |
|
1178 linelength = endmatch - ptr - ellength; |
|
1179 } |
|
1180 |
|
1181 /*** NOTE: Use only fwrite() to output the data line, so that binary |
|
1182 zeroes are treated as just another data character. */ |
|
1183 |
|
1184 /* This extra option, for Jeffrey Friedl's debugging requirements, |
|
1185 replaces the matched string, or a specific captured string if it exists, |
|
1186 with X. When this happens, colouring is ignored. */ |
|
1187 |
|
1188 #ifdef JFRIEDL_DEBUG |
|
1189 if (S_arg >= 0 && S_arg < mrc) |
|
1190 { |
|
1191 int first = S_arg * 2; |
|
1192 int last = first + 1; |
|
1193 fwrite(ptr, 1, offsets[first], stdout); |
|
1194 fprintf(stdout, "X"); |
|
1195 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout); |
|
1196 } |
|
1197 else |
|
1198 #endif |
|
1199 |
|
1200 /* We have to split the line(s) up if colouring. */ |
|
1201 |
|
1202 if (do_colour) |
|
1203 { |
|
1204 fwrite(ptr, 1, offsets[0], stdout); |
|
1205 fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
|
1206 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
|
1207 fprintf(stdout, "%c[00m", 0x1b); |
|
1208 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1], |
|
1209 stdout); |
|
1210 } |
|
1211 else fwrite(ptr, 1, linelength + endlinelength, stdout); |
|
1212 } |
|
1213 |
|
1214 /* End of doing what has to be done for a match */ |
|
1215 |
|
1216 rc = 0; /* Had some success */ |
|
1217 |
|
1218 /* Remember where the last match happened for after_context. We remember |
|
1219 where we are about to restart, and that line's number. */ |
|
1220 |
|
1221 lastmatchrestart = ptr + linelength + endlinelength; |
|
1222 lastmatchnumber = linenumber + 1; |
|
1223 } |
|
1224 |
|
1225 /* For a match in multiline inverted mode (which of course did not cause |
|
1226 anything to be printed), we have to move on to the end of the match before |
|
1227 proceeding. */ |
|
1228 |
|
1229 if (multiline && invert && match) |
|
1230 { |
|
1231 int ellength; |
|
1232 char *endmatch = ptr + offsets[1]; |
|
1233 t = ptr; |
|
1234 while (t < endmatch) |
|
1235 { |
|
1236 t = end_of_line(t, endptr, &ellength); |
|
1237 if (t <= endmatch) linenumber++; else break; |
|
1238 } |
|
1239 endmatch = end_of_line(endmatch, endptr, &ellength); |
|
1240 linelength = endmatch - ptr - ellength; |
|
1241 } |
|
1242 |
|
1243 /* Advance to after the newline and increment the line number. The file |
|
1244 offset to the current line is maintained in filepos. */ |
|
1245 |
|
1246 ptr += linelength + endlinelength; |
|
1247 filepos += linelength + endlinelength; |
|
1248 linenumber++; |
|
1249 |
|
1250 /* If we haven't yet reached the end of the file (the buffer is full), and |
|
1251 the current point is in the top 1/3 of the buffer, slide the buffer down by |
|
1252 1/3 and refill it. Before we do this, if some unprinted "after" lines are |
|
1253 about to be lost, print them. */ |
|
1254 |
|
1255 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD) |
|
1256 { |
|
1257 if (after_context > 0 && |
|
1258 lastmatchnumber > 0 && |
|
1259 lastmatchrestart < buffer + MBUFTHIRD) |
|
1260 { |
|
1261 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
|
1262 lastmatchnumber = 0; |
|
1263 } |
|
1264 |
|
1265 /* Now do the shuffle */ |
|
1266 |
|
1267 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD); |
|
1268 ptr -= MBUFTHIRD; |
|
1269 |
|
1270 #ifdef SUPPORT_LIBZ |
|
1271 if (frtype == FR_LIBZ) |
|
1272 bufflength = 2*MBUFTHIRD + |
|
1273 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD); |
|
1274 else |
|
1275 #endif |
|
1276 |
|
1277 #ifdef SUPPORT_LIBBZ2 |
|
1278 if (frtype == FR_LIBBZ2) |
|
1279 bufflength = 2*MBUFTHIRD + |
|
1280 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD); |
|
1281 else |
|
1282 #endif |
|
1283 |
|
1284 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in); |
|
1285 |
|
1286 endptr = buffer + bufflength; |
|
1287 |
|
1288 /* Adjust any last match point */ |
|
1289 |
|
1290 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD; |
|
1291 } |
|
1292 } /* Loop through the whole file */ |
|
1293 |
|
1294 /* End of file; print final "after" lines if wanted; do_after_lines sets |
|
1295 hyphenpending if it prints something. */ |
|
1296 |
|
1297 if (!only_matching && !count_only) |
|
1298 { |
|
1299 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
|
1300 hyphenpending |= endhyphenpending; |
|
1301 } |
|
1302 |
|
1303 /* Print the file name if we are looking for those without matches and there |
|
1304 were none. If we found a match, we won't have got this far. */ |
|
1305 |
|
1306 if (filenames == FN_NOMATCH_ONLY) |
|
1307 { |
|
1308 fprintf(stdout, "%s\n", printname); |
|
1309 return 0; |
|
1310 } |
|
1311 |
|
1312 /* Print the match count if wanted */ |
|
1313 |
|
1314 if (count_only) |
|
1315 { |
|
1316 if (printname != NULL) fprintf(stdout, "%s:", printname); |
|
1317 fprintf(stdout, "%d\n", count); |
|
1318 } |
|
1319 |
|
1320 return rc; |
|
1321 } |
|
1322 |
|
1323 |
|
1324 |
|
1325 /************************************************* |
|
1326 * Grep a file or recurse into a directory * |
|
1327 *************************************************/ |
|
1328 |
|
1329 /* Given a path name, if it's a directory, scan all the files if we are |
|
1330 recursing; if it's a file, grep it. |
|
1331 |
|
1332 Arguments: |
|
1333 pathname the path to investigate |
|
1334 dir_recurse TRUE if recursing is wanted (-r or -drecurse) |
|
1335 only_one_at_top TRUE if the path is the only one at toplevel |
|
1336 |
|
1337 Returns: 0 if there was at least one match |
|
1338 1 if there were no matches |
|
1339 2 there was some kind of error |
|
1340 |
|
1341 However, file opening failures are suppressed if "silent" is set. |
|
1342 */ |
|
1343 |
|
1344 static int |
|
1345 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) |
|
1346 { |
|
1347 int rc = 1; |
|
1348 int sep; |
|
1349 int frtype; |
|
1350 int pathlen; |
|
1351 void *handle; |
|
1352 FILE *in = NULL; /* Ensure initialized */ |
|
1353 |
|
1354 #ifdef SUPPORT_LIBZ |
|
1355 gzFile ingz = NULL; |
|
1356 #endif |
|
1357 |
|
1358 #ifdef SUPPORT_LIBBZ2 |
|
1359 BZFILE *inbz2 = NULL; |
|
1360 #endif |
|
1361 |
|
1362 /* If the file name is "-" we scan stdin */ |
|
1363 |
|
1364 if (strcmp(pathname, "-") == 0) |
|
1365 { |
|
1366 return pcregrep(stdin, FR_PLAIN, |
|
1367 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? |
|
1368 stdin_name : NULL); |
|
1369 } |
|
1370 |
|
1371 /* If the file is a directory, skip if skipping or if we are recursing, scan |
|
1372 each file and directory within it, subject to any include or exclude patterns |
|
1373 that were set. The scanning code is localized so it can be made |
|
1374 system-specific. */ |
|
1375 |
|
1376 if ((sep = isdirectory(pathname)) != 0) |
|
1377 { |
|
1378 if (dee_action == dee_SKIP) return 1; |
|
1379 if (dee_action == dee_RECURSE) |
|
1380 { |
|
1381 char buffer[1024]; |
|
1382 char *nextfile; |
|
1383 directory_type *dir = opendirectory(pathname); |
|
1384 |
|
1385 if (dir == NULL) |
|
1386 { |
|
1387 if (!silent) |
|
1388 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, |
|
1389 strerror(errno)); |
|
1390 return 2; |
|
1391 } |
|
1392 |
|
1393 while ((nextfile = readdirectory(dir)) != NULL) |
|
1394 { |
|
1395 int frc, nflen; |
|
1396 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
|
1397 nflen = strlen(nextfile); |
|
1398 |
|
1399 if (isdirectory(buffer)) |
|
1400 { |
|
1401 if (exclude_dir_compiled != NULL && |
|
1402 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
|
1403 continue; |
|
1404 |
|
1405 if (include_dir_compiled != NULL && |
|
1406 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
|
1407 continue; |
|
1408 } |
|
1409 else |
|
1410 { |
|
1411 if (exclude_compiled != NULL && |
|
1412 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
|
1413 continue; |
|
1414 |
|
1415 if (include_compiled != NULL && |
|
1416 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
|
1417 continue; |
|
1418 } |
|
1419 |
|
1420 frc = grep_or_recurse(buffer, dir_recurse, FALSE); |
|
1421 if (frc > 1) rc = frc; |
|
1422 else if (frc == 0 && rc == 1) rc = 0; |
|
1423 } |
|
1424 |
|
1425 closedirectory(dir); |
|
1426 return rc; |
|
1427 } |
|
1428 } |
|
1429 |
|
1430 /* If the file is not a directory and not a regular file, skip it if that's |
|
1431 been requested. */ |
|
1432 |
|
1433 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1; |
|
1434 |
|
1435 /* Control reaches here if we have a regular file, or if we have a directory |
|
1436 and recursion or skipping was not requested, or if we have anything else and |
|
1437 skipping was not requested. The scan proceeds. If this is the first and only |
|
1438 argument at top level, we don't show the file name, unless we are only showing |
|
1439 the file name, or the filename was forced (-H). */ |
|
1440 |
|
1441 pathlen = strlen(pathname); |
|
1442 |
|
1443 /* Open using zlib if it is supported and the file name ends with .gz. */ |
|
1444 |
|
1445 #ifdef SUPPORT_LIBZ |
|
1446 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) |
|
1447 { |
|
1448 ingz = gzopen(pathname, "rb"); |
|
1449 if (ingz == NULL) |
|
1450 { |
|
1451 if (!silent) |
|
1452 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
|
1453 strerror(errno)); |
|
1454 return 2; |
|
1455 } |
|
1456 handle = (void *)ingz; |
|
1457 frtype = FR_LIBZ; |
|
1458 } |
|
1459 else |
|
1460 #endif |
|
1461 |
|
1462 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ |
|
1463 |
|
1464 #ifdef SUPPORT_LIBBZ2 |
|
1465 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) |
|
1466 { |
|
1467 inbz2 = BZ2_bzopen(pathname, "rb"); |
|
1468 handle = (void *)inbz2; |
|
1469 frtype = FR_LIBBZ2; |
|
1470 } |
|
1471 else |
|
1472 #endif |
|
1473 |
|
1474 /* Otherwise use plain fopen(). The label is so that we can come back here if |
|
1475 an attempt to read a .bz2 file indicates that it really is a plain file. */ |
|
1476 |
|
1477 #ifdef SUPPORT_LIBBZ2 |
|
1478 PLAIN_FILE: |
|
1479 #endif |
|
1480 { |
|
1481 in = fopen(pathname, "r"); |
|
1482 handle = (void *)in; |
|
1483 frtype = FR_PLAIN; |
|
1484 } |
|
1485 |
|
1486 /* All the opening methods return errno when they fail. */ |
|
1487 |
|
1488 if (handle == NULL) |
|
1489 { |
|
1490 if (!silent) |
|
1491 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
|
1492 strerror(errno)); |
|
1493 return 2; |
|
1494 } |
|
1495 |
|
1496 /* Now grep the file */ |
|
1497 |
|
1498 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT || |
|
1499 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); |
|
1500 |
|
1501 /* Close in an appropriate manner. */ |
|
1502 |
|
1503 #ifdef SUPPORT_LIBZ |
|
1504 if (frtype == FR_LIBZ) |
|
1505 gzclose(ingz); |
|
1506 else |
|
1507 #endif |
|
1508 |
|
1509 /* If it is a .bz2 file and the result is 2, it means that the first attempt to |
|
1510 read failed. If the error indicates that the file isn't in fact bzipped, try |
|
1511 again as a normal file. */ |
|
1512 |
|
1513 #ifdef SUPPORT_LIBBZ2 |
|
1514 if (frtype == FR_LIBBZ2) |
|
1515 { |
|
1516 if (rc == 2) |
|
1517 { |
|
1518 int errnum; |
|
1519 const char *err = BZ2_bzerror(inbz2, &errnum); |
|
1520 if (errnum == BZ_DATA_ERROR_MAGIC) |
|
1521 { |
|
1522 BZ2_bzclose(inbz2); |
|
1523 goto PLAIN_FILE; |
|
1524 } |
|
1525 else if (!silent) |
|
1526 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", |
|
1527 pathname, err); |
|
1528 } |
|
1529 BZ2_bzclose(inbz2); |
|
1530 } |
|
1531 else |
|
1532 #endif |
|
1533 |
|
1534 /* Normal file close */ |
|
1535 |
|
1536 fclose(in); |
|
1537 |
|
1538 /* Pass back the yield from pcregrep(). */ |
|
1539 |
|
1540 return rc; |
|
1541 } |
|
1542 |
|
1543 |
|
1544 |
|
1545 |
|
1546 /************************************************* |
|
1547 * Usage function * |
|
1548 *************************************************/ |
|
1549 |
|
1550 static int |
|
1551 usage(int rc) |
|
1552 { |
|
1553 option_item *op; |
|
1554 fprintf(stderr, "Usage: pcregrep [-"); |
|
1555 for (op = optionlist; op->one_char != 0; op++) |
|
1556 { |
|
1557 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); |
|
1558 } |
|
1559 fprintf(stderr, "] [long options] [pattern] [files]\n"); |
|
1560 fprintf(stderr, "Type `pcregrep --help' for more information and the long " |
|
1561 "options.\n"); |
|
1562 return rc; |
|
1563 } |
|
1564 |
|
1565 |
|
1566 |
|
1567 |
|
1568 /************************************************* |
|
1569 * Help function * |
|
1570 *************************************************/ |
|
1571 |
|
1572 static void |
|
1573 help(void) |
|
1574 { |
|
1575 option_item *op; |
|
1576 |
|
1577 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
|
1578 printf("Search for PATTERN in each FILE or standard input.\n"); |
|
1579 printf("PATTERN must be present if neither -e nor -f is used.\n"); |
|
1580 printf("\"-\" can be used as a file name to mean STDIN.\n"); |
|
1581 |
|
1582 #ifdef SUPPORT_LIBZ |
|
1583 printf("Files whose names end in .gz are read using zlib.\n"); |
|
1584 #endif |
|
1585 |
|
1586 #ifdef SUPPORT_LIBBZ2 |
|
1587 printf("Files whose names end in .bz2 are read using bzlib2.\n"); |
|
1588 #endif |
|
1589 |
|
1590 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 |
|
1591 printf("Other files and the standard input are read as plain files.\n\n"); |
|
1592 #else |
|
1593 printf("All files are read as plain files, without any interpretation.\n\n"); |
|
1594 #endif |
|
1595 |
|
1596 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
|
1597 printf("Options:\n"); |
|
1598 |
|
1599 for (op = optionlist; op->one_char != 0; op++) |
|
1600 { |
|
1601 int n; |
|
1602 char s[4]; |
|
1603 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
|
1604 n = 30 - printf(" %s --%s", s, op->long_name); |
|
1605 if (n < 1) n = 1; |
|
1606 printf("%.*s%s\n", n, " ", op->help_text); |
|
1607 } |
|
1608 |
|
1609 printf("\nWhen reading patterns from a file instead of using a command line option,\n"); |
|
1610 printf("trailing white space is removed and blank lines are ignored.\n"); |
|
1611 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); |
|
1612 |
|
1613 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); |
|
1614 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); |
|
1615 } |
|
1616 |
|
1617 |
|
1618 |
|
1619 |
|
1620 /************************************************* |
|
1621 * Handle a single-letter, no data option * |
|
1622 *************************************************/ |
|
1623 |
|
1624 static int |
|
1625 handle_option(int letter, int options) |
|
1626 { |
|
1627 switch(letter) |
|
1628 { |
|
1629 case N_FOFFSETS: file_offsets = TRUE; break; |
|
1630 case N_HELP: help(); exit(0); |
|
1631 case N_LOFFSETS: line_offsets = number = TRUE; break; |
|
1632 case 'c': count_only = TRUE; break; |
|
1633 case 'F': process_options |= PO_FIXED_STRINGS; break; |
|
1634 case 'H': filenames = FN_FORCE; break; |
|
1635 case 'h': filenames = FN_NONE; break; |
|
1636 case 'i': options |= PCRE_CASELESS; break; |
|
1637 case 'l': filenames = FN_ONLY; break; |
|
1638 case 'L': filenames = FN_NOMATCH_ONLY; break; |
|
1639 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; |
|
1640 case 'n': number = TRUE; break; |
|
1641 case 'o': only_matching = TRUE; break; |
|
1642 case 'q': quiet = TRUE; break; |
|
1643 case 'r': dee_action = dee_RECURSE; break; |
|
1644 case 's': silent = TRUE; break; |
|
1645 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break; |
|
1646 case 'v': invert = TRUE; break; |
|
1647 case 'w': process_options |= PO_WORD_MATCH; break; |
|
1648 case 'x': process_options |= PO_LINE_MATCH; break; |
|
1649 |
|
1650 case 'V': |
|
1651 fprintf(stderr, "pcregrep version %s\n", pcre_version()); |
|
1652 exit(0); |
|
1653 break; |
|
1654 |
|
1655 default: |
|
1656 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); |
|
1657 exit(usage(2)); |
|
1658 } |
|
1659 |
|
1660 return options; |
|
1661 } |
|
1662 |
|
1663 |
|
1664 |
|
1665 |
|
1666 /************************************************* |
|
1667 * Construct printed ordinal * |
|
1668 *************************************************/ |
|
1669 |
|
1670 /* This turns a number into "1st", "3rd", etc. */ |
|
1671 |
|
1672 static char * |
|
1673 ordin(int n) |
|
1674 { |
|
1675 static char buffer[8]; |
|
1676 char *p = buffer; |
|
1677 sprintf(p, "%d", n); |
|
1678 while (*p != 0) p++; |
|
1679 switch (n%10) |
|
1680 { |
|
1681 case 1: strcpy(p, "st"); break; |
|
1682 case 2: strcpy(p, "nd"); break; |
|
1683 case 3: strcpy(p, "rd"); break; |
|
1684 default: strcpy(p, "th"); break; |
|
1685 } |
|
1686 return buffer; |
|
1687 } |
|
1688 |
|
1689 |
|
1690 |
|
1691 /************************************************* |
|
1692 * Compile a single pattern * |
|
1693 *************************************************/ |
|
1694 |
|
1695 /* When the -F option has been used, this is called for each substring. |
|
1696 Otherwise it's called for each supplied pattern. |
|
1697 |
|
1698 Arguments: |
|
1699 pattern the pattern string |
|
1700 options the PCRE options |
|
1701 filename the file name, or NULL for a command-line pattern |
|
1702 count 0 if this is the only command line pattern, or |
|
1703 number of the command line pattern, or |
|
1704 linenumber for a pattern from a file |
|
1705 |
|
1706 Returns: TRUE on success, FALSE after an error |
|
1707 */ |
|
1708 |
|
1709 static BOOL |
|
1710 compile_single_pattern(char *pattern, int options, char *filename, int count) |
|
1711 { |
|
1712 char buffer[MBUFTHIRD + 16]; |
|
1713 const char *error; |
|
1714 int errptr; |
|
1715 |
|
1716 if (pattern_count >= MAX_PATTERN_COUNT) |
|
1717 { |
|
1718 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n", |
|
1719 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT); |
|
1720 return FALSE; |
|
1721 } |
|
1722 |
|
1723 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern, |
|
1724 suffix[process_options]); |
|
1725 pattern_list[pattern_count] = |
|
1726 pcre_compile(buffer, options, &error, &errptr, pcretables); |
|
1727 if (pattern_list[pattern_count] != NULL) |
|
1728 { |
|
1729 pattern_count++; |
|
1730 return TRUE; |
|
1731 } |
|
1732 |
|
1733 /* Handle compile errors */ |
|
1734 |
|
1735 errptr -= (int)strlen(prefix[process_options]); |
|
1736 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern); |
|
1737 |
|
1738 if (filename == NULL) |
|
1739 { |
|
1740 if (count == 0) |
|
1741 fprintf(stderr, "pcregrep: Error in command-line regex " |
|
1742 "at offset %d: %s\n", errptr, error); |
|
1743 else |
|
1744 fprintf(stderr, "pcregrep: Error in %s command-line regex " |
|
1745 "at offset %d: %s\n", ordin(count), errptr, error); |
|
1746 } |
|
1747 else |
|
1748 { |
|
1749 fprintf(stderr, "pcregrep: Error in regex in line %d of %s " |
|
1750 "at offset %d: %s\n", count, filename, errptr, error); |
|
1751 } |
|
1752 |
|
1753 return FALSE; |
|
1754 } |
|
1755 |
|
1756 |
|
1757 |
|
1758 /************************************************* |
|
1759 * Compile one supplied pattern * |
|
1760 *************************************************/ |
|
1761 |
|
1762 /* When the -F option has been used, each string may be a list of strings, |
|
1763 separated by line breaks. They will be matched literally. |
|
1764 |
|
1765 Arguments: |
|
1766 pattern the pattern string |
|
1767 options the PCRE options |
|
1768 filename the file name, or NULL for a command-line pattern |
|
1769 count 0 if this is the only command line pattern, or |
|
1770 number of the command line pattern, or |
|
1771 linenumber for a pattern from a file |
|
1772 |
|
1773 Returns: TRUE on success, FALSE after an error |
|
1774 */ |
|
1775 |
|
1776 static BOOL |
|
1777 compile_pattern(char *pattern, int options, char *filename, int count) |
|
1778 { |
|
1779 if ((process_options & PO_FIXED_STRINGS) != 0) |
|
1780 { |
|
1781 char *eop = pattern + strlen(pattern); |
|
1782 char buffer[MBUFTHIRD]; |
|
1783 for(;;) |
|
1784 { |
|
1785 int ellength; |
|
1786 char *p = end_of_line(pattern, eop, &ellength); |
|
1787 if (ellength == 0) |
|
1788 return compile_single_pattern(pattern, options, filename, count); |
|
1789 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern); |
|
1790 pattern = p; |
|
1791 if (!compile_single_pattern(buffer, options, filename, count)) |
|
1792 return FALSE; |
|
1793 } |
|
1794 } |
|
1795 else return compile_single_pattern(pattern, options, filename, count); |
|
1796 } |
|
1797 |
|
1798 |
|
1799 |
|
1800 /************************************************* |
|
1801 * Main program * |
|
1802 *************************************************/ |
|
1803 |
|
1804 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ |
|
1805 |
|
1806 int |
|
1807 main(int argc, char **argv) |
|
1808 { |
|
1809 int i, j; |
|
1810 int rc = 1; |
|
1811 int pcre_options = 0; |
|
1812 int cmd_pattern_count = 0; |
|
1813 int hint_count = 0; |
|
1814 int errptr; |
|
1815 BOOL only_one_at_top; |
|
1816 char *patterns[MAX_PATTERN_COUNT]; |
|
1817 const char *locale_from = "--locale"; |
|
1818 const char *error; |
|
1819 |
|
1820 /* Set the default line ending value from the default in the PCRE library; |
|
1821 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". |
|
1822 */ |
|
1823 |
|
1824 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i); |
|
1825 switch(i) |
|
1826 { |
|
1827 default: newline = (char *)"lf"; break; |
|
1828 case '\r': newline = (char *)"cr"; break; |
|
1829 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break; |
|
1830 case -1: newline = (char *)"any"; break; |
|
1831 case -2: newline = (char *)"anycrlf"; break; |
|
1832 } |
|
1833 |
|
1834 /* Process the options */ |
|
1835 |
|
1836 for (i = 1; i < argc; i++) |
|
1837 { |
|
1838 option_item *op = NULL; |
|
1839 char *option_data = (char *)""; /* default to keep compiler happy */ |
|
1840 BOOL longop; |
|
1841 BOOL longopwasequals = FALSE; |
|
1842 |
|
1843 if (argv[i][0] != '-') break; |
|
1844 |
|
1845 /* If we hit an argument that is just "-", it may be a reference to STDIN, |
|
1846 but only if we have previously had -e or -f to define the patterns. */ |
|
1847 |
|
1848 if (argv[i][1] == 0) |
|
1849 { |
|
1850 if (pattern_filename != NULL || pattern_count > 0) break; |
|
1851 else exit(usage(2)); |
|
1852 } |
|
1853 |
|
1854 /* Handle a long name option, or -- to terminate the options */ |
|
1855 |
|
1856 if (argv[i][1] == '-') |
|
1857 { |
|
1858 char *arg = argv[i] + 2; |
|
1859 char *argequals = strchr(arg, '='); |
|
1860 |
|
1861 if (*arg == 0) /* -- terminates options */ |
|
1862 { |
|
1863 i++; |
|
1864 break; /* out of the options-handling loop */ |
|
1865 } |
|
1866 |
|
1867 longop = TRUE; |
|
1868 |
|
1869 /* Some long options have data that follows after =, for example file=name. |
|
1870 Some options have variations in the long name spelling: specifically, we |
|
1871 allow "regexp" because GNU grep allows it, though I personally go along |
|
1872 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". |
|
1873 These options are entered in the table as "regex(p)". No option is in both |
|
1874 these categories, fortunately. */ |
|
1875 |
|
1876 for (op = optionlist; op->one_char != 0; op++) |
|
1877 { |
|
1878 char *opbra = strchr(op->long_name, '('); |
|
1879 char *equals = strchr(op->long_name, '='); |
|
1880 if (opbra == NULL) /* Not a (p) case */ |
|
1881 { |
|
1882 if (equals == NULL) /* Not thing=data case */ |
|
1883 { |
|
1884 if (strcmp(arg, op->long_name) == 0) break; |
|
1885 } |
|
1886 else /* Special case xxx=data */ |
|
1887 { |
|
1888 int oplen = equals - op->long_name; |
|
1889 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg; |
|
1890 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) |
|
1891 { |
|
1892 option_data = arg + arglen; |
|
1893 if (*option_data == '=') |
|
1894 { |
|
1895 option_data++; |
|
1896 longopwasequals = TRUE; |
|
1897 } |
|
1898 break; |
|
1899 } |
|
1900 } |
|
1901 } |
|
1902 else /* Special case xxxx(p) */ |
|
1903 { |
|
1904 char buff1[24]; |
|
1905 char buff2[24]; |
|
1906 int baselen = opbra - op->long_name; |
|
1907 sprintf(buff1, "%.*s", baselen, op->long_name); |
|
1908 sprintf(buff2, "%s%.*s", buff1, |
|
1909 (int)strlen(op->long_name) - baselen - 2, opbra + 1); |
|
1910 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0) |
|
1911 break; |
|
1912 } |
|
1913 } |
|
1914 |
|
1915 if (op->one_char == 0) |
|
1916 { |
|
1917 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); |
|
1918 exit(usage(2)); |
|
1919 } |
|
1920 } |
|
1921 |
|
1922 |
|
1923 /* Jeffrey Friedl's debugging harness uses these additional options which |
|
1924 are not in the right form for putting in the option table because they use |
|
1925 only one hyphen, yet are more than one character long. By putting them |
|
1926 separately here, they will not get displayed as part of the help() output, |
|
1927 but I don't think Jeffrey will care about that. */ |
|
1928 |
|
1929 #ifdef JFRIEDL_DEBUG |
|
1930 else if (strcmp(argv[i], "-pre") == 0) { |
|
1931 jfriedl_prefix = argv[++i]; |
|
1932 continue; |
|
1933 } else if (strcmp(argv[i], "-post") == 0) { |
|
1934 jfriedl_postfix = argv[++i]; |
|
1935 continue; |
|
1936 } else if (strcmp(argv[i], "-XT") == 0) { |
|
1937 sscanf(argv[++i], "%d", &jfriedl_XT); |
|
1938 continue; |
|
1939 } else if (strcmp(argv[i], "-XR") == 0) { |
|
1940 sscanf(argv[++i], "%d", &jfriedl_XR); |
|
1941 continue; |
|
1942 } |
|
1943 #endif |
|
1944 |
|
1945 |
|
1946 /* One-char options; many that have no data may be in a single argument; we |
|
1947 continue till we hit the last one or one that needs data. */ |
|
1948 |
|
1949 else |
|
1950 { |
|
1951 char *s = argv[i] + 1; |
|
1952 longop = FALSE; |
|
1953 while (*s != 0) |
|
1954 { |
|
1955 for (op = optionlist; op->one_char != 0; op++) |
|
1956 { if (*s == op->one_char) break; } |
|
1957 if (op->one_char == 0) |
|
1958 { |
|
1959 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n", |
|
1960 *s, argv[i]); |
|
1961 exit(usage(2)); |
|
1962 } |
|
1963 if (op->type != OP_NODATA || s[1] == 0) |
|
1964 { |
|
1965 option_data = s+1; |
|
1966 break; |
|
1967 } |
|
1968 pcre_options = handle_option(*s++, pcre_options); |
|
1969 } |
|
1970 } |
|
1971 |
|
1972 /* At this point we should have op pointing to a matched option. If the type |
|
1973 is NO_DATA, it means that there is no data, and the option might set |
|
1974 something in the PCRE options. */ |
|
1975 |
|
1976 if (op->type == OP_NODATA) |
|
1977 { |
|
1978 pcre_options = handle_option(op->one_char, pcre_options); |
|
1979 continue; |
|
1980 } |
|
1981 |
|
1982 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that |
|
1983 either has a value or defaults to something. It cannot have data in a |
|
1984 separate item. At the moment, the only such options are "colo(u)r" and |
|
1985 Jeffrey Friedl's special -S debugging option. */ |
|
1986 |
|
1987 if (*option_data == 0 && |
|
1988 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER)) |
|
1989 { |
|
1990 switch (op->one_char) |
|
1991 { |
|
1992 case N_COLOUR: |
|
1993 colour_option = (char *)"auto"; |
|
1994 break; |
|
1995 #ifdef JFRIEDL_DEBUG |
|
1996 case 'S': |
|
1997 S_arg = 0; |
|
1998 break; |
|
1999 #endif |
|
2000 } |
|
2001 continue; |
|
2002 } |
|
2003 |
|
2004 /* Otherwise, find the data string for the option. */ |
|
2005 |
|
2006 if (*option_data == 0) |
|
2007 { |
|
2008 if (i >= argc - 1 || longopwasequals) |
|
2009 { |
|
2010 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); |
|
2011 exit(usage(2)); |
|
2012 } |
|
2013 option_data = argv[++i]; |
|
2014 } |
|
2015 |
|
2016 /* If the option type is OP_PATLIST, it's the -e option, which can be called |
|
2017 multiple times to create a list of patterns. */ |
|
2018 |
|
2019 if (op->type == OP_PATLIST) |
|
2020 { |
|
2021 if (cmd_pattern_count >= MAX_PATTERN_COUNT) |
|
2022 { |
|
2023 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n", |
|
2024 MAX_PATTERN_COUNT); |
|
2025 return 2; |
|
2026 } |
|
2027 patterns[cmd_pattern_count++] = option_data; |
|
2028 } |
|
2029 |
|
2030 /* Otherwise, deal with single string or numeric data values. */ |
|
2031 |
|
2032 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER) |
|
2033 { |
|
2034 *((char **)op->dataptr) = option_data; |
|
2035 } |
|
2036 else |
|
2037 { |
|
2038 char *endptr; |
|
2039 int n = strtoul(option_data, &endptr, 10); |
|
2040 if (*endptr != 0) |
|
2041 { |
|
2042 if (longop) |
|
2043 { |
|
2044 char *equals = strchr(op->long_name, '='); |
|
2045 int nlen = (equals == NULL)? (int)strlen(op->long_name) : |
|
2046 equals - op->long_name; |
|
2047 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", |
|
2048 option_data, nlen, op->long_name); |
|
2049 } |
|
2050 else |
|
2051 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", |
|
2052 option_data, op->one_char); |
|
2053 exit(usage(2)); |
|
2054 } |
|
2055 *((int *)op->dataptr) = n; |
|
2056 } |
|
2057 } |
|
2058 |
|
2059 /* Options have been decoded. If -C was used, its value is used as a default |
|
2060 for -A and -B. */ |
|
2061 |
|
2062 if (both_context > 0) |
|
2063 { |
|
2064 if (after_context == 0) after_context = both_context; |
|
2065 if (before_context == 0) before_context = both_context; |
|
2066 } |
|
2067 |
|
2068 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. |
|
2069 However, the latter two set the only_matching flag. */ |
|
2070 |
|
2071 if ((only_matching && (file_offsets || line_offsets)) || |
|
2072 (file_offsets && line_offsets)) |
|
2073 { |
|
2074 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " |
|
2075 "and/or --line-offsets\n"); |
|
2076 exit(usage(2)); |
|
2077 } |
|
2078 |
|
2079 if (file_offsets || line_offsets) only_matching = TRUE; |
|
2080 |
|
2081 /* If a locale has not been provided as an option, see if the LC_CTYPE or |
|
2082 LC_ALL environment variable is set, and if so, use it. */ |
|
2083 |
|
2084 if (locale == NULL) |
|
2085 { |
|
2086 locale = getenv("LC_ALL"); |
|
2087 locale_from = "LCC_ALL"; |
|
2088 } |
|
2089 |
|
2090 if (locale == NULL) |
|
2091 { |
|
2092 locale = getenv("LC_CTYPE"); |
|
2093 locale_from = "LC_CTYPE"; |
|
2094 } |
|
2095 |
|
2096 /* If a locale has been provided, set it, and generate the tables the PCRE |
|
2097 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ |
|
2098 |
|
2099 if (locale != NULL) |
|
2100 { |
|
2101 if (setlocale(LC_CTYPE, locale) == NULL) |
|
2102 { |
|
2103 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", |
|
2104 locale, locale_from); |
|
2105 return 2; |
|
2106 } |
|
2107 pcretables = pcre_maketables(); |
|
2108 } |
|
2109 |
|
2110 /* Sort out colouring */ |
|
2111 |
|
2112 if (colour_option != NULL && strcmp(colour_option, "never") != 0) |
|
2113 { |
|
2114 if (strcmp(colour_option, "always") == 0) do_colour = TRUE; |
|
2115 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); |
|
2116 else |
|
2117 { |
|
2118 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", |
|
2119 colour_option); |
|
2120 return 2; |
|
2121 } |
|
2122 if (do_colour) |
|
2123 { |
|
2124 char *cs = getenv("PCREGREP_COLOUR"); |
|
2125 if (cs == NULL) cs = getenv("PCREGREP_COLOR"); |
|
2126 if (cs != NULL) colour_string = cs; |
|
2127 } |
|
2128 } |
|
2129 |
|
2130 /* Interpret the newline type; the default settings are Unix-like. */ |
|
2131 |
|
2132 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0) |
|
2133 { |
|
2134 pcre_options |= PCRE_NEWLINE_CR; |
|
2135 endlinetype = EL_CR; |
|
2136 } |
|
2137 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0) |
|
2138 { |
|
2139 pcre_options |= PCRE_NEWLINE_LF; |
|
2140 endlinetype = EL_LF; |
|
2141 } |
|
2142 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0) |
|
2143 { |
|
2144 pcre_options |= PCRE_NEWLINE_CRLF; |
|
2145 endlinetype = EL_CRLF; |
|
2146 } |
|
2147 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) |
|
2148 { |
|
2149 pcre_options |= PCRE_NEWLINE_ANY; |
|
2150 endlinetype = EL_ANY; |
|
2151 } |
|
2152 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) |
|
2153 { |
|
2154 pcre_options |= PCRE_NEWLINE_ANYCRLF; |
|
2155 endlinetype = EL_ANYCRLF; |
|
2156 } |
|
2157 else |
|
2158 { |
|
2159 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); |
|
2160 return 2; |
|
2161 } |
|
2162 |
|
2163 /* Interpret the text values for -d and -D */ |
|
2164 |
|
2165 if (dee_option != NULL) |
|
2166 { |
|
2167 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; |
|
2168 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; |
|
2169 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; |
|
2170 else |
|
2171 { |
|
2172 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); |
|
2173 return 2; |
|
2174 } |
|
2175 } |
|
2176 |
|
2177 if (DEE_option != NULL) |
|
2178 { |
|
2179 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; |
|
2180 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; |
|
2181 else |
|
2182 { |
|
2183 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); |
|
2184 return 2; |
|
2185 } |
|
2186 } |
|
2187 |
|
2188 /* Check the values for Jeffrey Friedl's debugging options. */ |
|
2189 |
|
2190 #ifdef JFRIEDL_DEBUG |
|
2191 if (S_arg > 9) |
|
2192 { |
|
2193 fprintf(stderr, "pcregrep: bad value for -S option\n"); |
|
2194 return 2; |
|
2195 } |
|
2196 if (jfriedl_XT != 0 || jfriedl_XR != 0) |
|
2197 { |
|
2198 if (jfriedl_XT == 0) jfriedl_XT = 1; |
|
2199 if (jfriedl_XR == 0) jfriedl_XR = 1; |
|
2200 } |
|
2201 #endif |
|
2202 |
|
2203 /* Get memory to store the pattern and hints lists. */ |
|
2204 |
|
2205 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
|
2206 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
|
2207 |
|
2208 if (pattern_list == NULL || hints_list == NULL) |
|
2209 { |
|
2210 fprintf(stderr, "pcregrep: malloc failed\n"); |
|
2211 goto EXIT2; |
|
2212 } |
|
2213 |
|
2214 /* If no patterns were provided by -e, and there is no file provided by -f, |
|
2215 the first argument is the one and only pattern, and it must exist. */ |
|
2216 |
|
2217 if (cmd_pattern_count == 0 && pattern_filename == NULL) |
|
2218 { |
|
2219 if (i >= argc) return usage(2); |
|
2220 patterns[cmd_pattern_count++] = argv[i++]; |
|
2221 } |
|
2222 |
|
2223 /* Compile the patterns that were provided on the command line, either by |
|
2224 multiple uses of -e or as a single unkeyed pattern. */ |
|
2225 |
|
2226 for (j = 0; j < cmd_pattern_count; j++) |
|
2227 { |
|
2228 if (!compile_pattern(patterns[j], pcre_options, NULL, |
|
2229 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) |
|
2230 goto EXIT2; |
|
2231 } |
|
2232 |
|
2233 /* Compile the regular expressions that are provided in a file. */ |
|
2234 |
|
2235 if (pattern_filename != NULL) |
|
2236 { |
|
2237 int linenumber = 0; |
|
2238 FILE *f; |
|
2239 char *filename; |
|
2240 char buffer[MBUFTHIRD]; |
|
2241 |
|
2242 if (strcmp(pattern_filename, "-") == 0) |
|
2243 { |
|
2244 f = stdin; |
|
2245 filename = stdin_name; |
|
2246 } |
|
2247 else |
|
2248 { |
|
2249 f = fopen(pattern_filename, "r"); |
|
2250 if (f == NULL) |
|
2251 { |
|
2252 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
|
2253 strerror(errno)); |
|
2254 goto EXIT2; |
|
2255 } |
|
2256 filename = pattern_filename; |
|
2257 } |
|
2258 |
|
2259 while (fgets(buffer, MBUFTHIRD, f) != NULL) |
|
2260 { |
|
2261 char *s = buffer + (int)strlen(buffer); |
|
2262 while (s > buffer && isspace((unsigned char)(s[-1]))) s--; |
|
2263 *s = 0; |
|
2264 linenumber++; |
|
2265 if (buffer[0] == 0) continue; /* Skip blank lines */ |
|
2266 if (!compile_pattern(buffer, pcre_options, filename, linenumber)) |
|
2267 goto EXIT2; |
|
2268 } |
|
2269 |
|
2270 if (f != stdin) fclose(f); |
|
2271 } |
|
2272 |
|
2273 /* Study the regular expressions, as we will be running them many times */ |
|
2274 |
|
2275 for (j = 0; j < pattern_count; j++) |
|
2276 { |
|
2277 hints_list[j] = pcre_study(pattern_list[j], 0, &error); |
|
2278 if (error != NULL) |
|
2279 { |
|
2280 char s[16]; |
|
2281 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); |
|
2282 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); |
|
2283 goto EXIT2; |
|
2284 } |
|
2285 hint_count++; |
|
2286 } |
|
2287 |
|
2288 /* If there are include or exclude patterns, compile them. */ |
|
2289 |
|
2290 if (exclude_pattern != NULL) |
|
2291 { |
|
2292 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, |
|
2293 pcretables); |
|
2294 if (exclude_compiled == NULL) |
|
2295 { |
|
2296 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", |
|
2297 errptr, error); |
|
2298 goto EXIT2; |
|
2299 } |
|
2300 } |
|
2301 |
|
2302 if (include_pattern != NULL) |
|
2303 { |
|
2304 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, |
|
2305 pcretables); |
|
2306 if (include_compiled == NULL) |
|
2307 { |
|
2308 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", |
|
2309 errptr, error); |
|
2310 goto EXIT2; |
|
2311 } |
|
2312 } |
|
2313 |
|
2314 if (exclude_dir_pattern != NULL) |
|
2315 { |
|
2316 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr, |
|
2317 pcretables); |
|
2318 if (exclude_dir_compiled == NULL) |
|
2319 { |
|
2320 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n", |
|
2321 errptr, error); |
|
2322 goto EXIT2; |
|
2323 } |
|
2324 } |
|
2325 |
|
2326 if (include_dir_pattern != NULL) |
|
2327 { |
|
2328 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr, |
|
2329 pcretables); |
|
2330 if (include_dir_compiled == NULL) |
|
2331 { |
|
2332 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n", |
|
2333 errptr, error); |
|
2334 goto EXIT2; |
|
2335 } |
|
2336 } |
|
2337 |
|
2338 /* If there are no further arguments, do the business on stdin and exit. */ |
|
2339 |
|
2340 if (i >= argc) |
|
2341 { |
|
2342 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL); |
|
2343 goto EXIT; |
|
2344 } |
|
2345 |
|
2346 /* Otherwise, work through the remaining arguments as files or directories. |
|
2347 Pass in the fact that there is only one argument at top level - this suppresses |
|
2348 the file name if the argument is not a directory and filenames are not |
|
2349 otherwise forced. */ |
|
2350 |
|
2351 only_one_at_top = i == argc - 1; /* Catch initial value of i */ |
|
2352 |
|
2353 for (; i < argc; i++) |
|
2354 { |
|
2355 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, |
|
2356 only_one_at_top); |
|
2357 if (frc > 1) rc = frc; |
|
2358 else if (frc == 0 && rc == 1) rc = 0; |
|
2359 } |
|
2360 |
|
2361 EXIT: |
|
2362 if (pattern_list != NULL) |
|
2363 { |
|
2364 for (i = 0; i < pattern_count; i++) free(pattern_list[i]); |
|
2365 free(pattern_list); |
|
2366 } |
|
2367 if (hints_list != NULL) |
|
2368 { |
|
2369 for (i = 0; i < hint_count; i++) free(hints_list[i]); |
|
2370 free(hints_list); |
|
2371 } |
|
2372 return rc; |
|
2373 |
|
2374 EXIT2: |
|
2375 rc = 2; |
|
2376 goto EXIT; |
|
2377 } |
|
2378 |
|
2379 /* End of pcregrep */ |