equal
deleted
inserted
replaced
|
1 #! /usr/bin/env python |
|
2 |
|
3 # 1) Regular Expressions Test |
|
4 # |
|
5 # Read a file of (extended per egrep) regular expressions (one per line), |
|
6 # and apply those to all files whose names are listed on the command line. |
|
7 # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns |
|
8 # against a five /etc/termcap files. Tests using more elaborate patters |
|
9 # would also be interesting. Your code should not break if given hundreds |
|
10 # of regular expressions or binary files to scan. |
|
11 |
|
12 # This implementation: |
|
13 # - combines all patterns into a single one using ( ... | ... | ... ) |
|
14 # - reads patterns from stdin, scans files given as command line arguments |
|
15 # - produces output in the format <file>:<lineno>:<line> |
|
16 # - is only about 2.5 times as slow as egrep (though I couldn't run |
|
17 # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) |
|
18 |
|
19 import string |
|
20 import sys |
|
21 import re |
|
22 |
|
23 def main(): |
|
24 pats = map(chomp, sys.stdin.readlines()) |
|
25 bigpat = '(' + '|'.join(pats) + ')' |
|
26 prog = re.compile(bigpat) |
|
27 |
|
28 for file in sys.argv[1:]: |
|
29 try: |
|
30 fp = open(file, 'r') |
|
31 except IOError, msg: |
|
32 print "%s: %s" % (file, msg) |
|
33 continue |
|
34 lineno = 0 |
|
35 while 1: |
|
36 line = fp.readline() |
|
37 if not line: |
|
38 break |
|
39 lineno = lineno + 1 |
|
40 if prog.search(line): |
|
41 print "%s:%s:%s" % (file, lineno, line), |
|
42 |
|
43 def chomp(s): |
|
44 return s.rstrip('\n') |
|
45 |
|
46 if __name__ == '__main__': |
|
47 main() |