|
1 #! /usr/bin/env python |
|
2 |
|
3 """Show file statistics by extension.""" |
|
4 |
|
5 import os |
|
6 import sys |
|
7 |
|
8 class Stats: |
|
9 |
|
10 def __init__(self): |
|
11 self.stats = {} |
|
12 |
|
13 def statargs(self, args): |
|
14 for arg in args: |
|
15 if os.path.isdir(arg): |
|
16 self.statdir(arg) |
|
17 elif os.path.isfile(arg): |
|
18 self.statfile(arg) |
|
19 else: |
|
20 sys.stderr.write("Can't find %s\n" % arg) |
|
21 self.addstats("<???>", "unknown", 1) |
|
22 |
|
23 def statdir(self, dir): |
|
24 self.addstats("<dir>", "dirs", 1) |
|
25 try: |
|
26 names = os.listdir(dir) |
|
27 except os.error, err: |
|
28 sys.stderr.write("Can't list %s: %s\n" % (dir, err)) |
|
29 self.addstats("<dir>", "unlistable", 1) |
|
30 return |
|
31 names.sort() |
|
32 for name in names: |
|
33 if name.startswith(".#"): |
|
34 continue # Skip CVS temp files |
|
35 if name.endswith("~"): |
|
36 continue# Skip Emacs backup files |
|
37 full = os.path.join(dir, name) |
|
38 if os.path.islink(full): |
|
39 self.addstats("<lnk>", "links", 1) |
|
40 elif os.path.isdir(full): |
|
41 self.statdir(full) |
|
42 else: |
|
43 self.statfile(full) |
|
44 |
|
45 def statfile(self, filename): |
|
46 head, ext = os.path.splitext(filename) |
|
47 head, base = os.path.split(filename) |
|
48 if ext == base: |
|
49 ext = "" # E.g. .cvsignore is deemed not to have an extension |
|
50 ext = os.path.normcase(ext) |
|
51 if not ext: |
|
52 ext = "<none>" |
|
53 self.addstats(ext, "files", 1) |
|
54 try: |
|
55 f = open(filename, "rb") |
|
56 except IOError, err: |
|
57 sys.stderr.write("Can't open %s: %s\n" % (filename, err)) |
|
58 self.addstats(ext, "unopenable", 1) |
|
59 return |
|
60 data = f.read() |
|
61 f.close() |
|
62 self.addstats(ext, "bytes", len(data)) |
|
63 if '\0' in data: |
|
64 self.addstats(ext, "binary", 1) |
|
65 return |
|
66 if not data: |
|
67 self.addstats(ext, "empty", 1) |
|
68 #self.addstats(ext, "chars", len(data)) |
|
69 lines = data.splitlines() |
|
70 self.addstats(ext, "lines", len(lines)) |
|
71 del lines |
|
72 words = data.split() |
|
73 self.addstats(ext, "words", len(words)) |
|
74 |
|
75 def addstats(self, ext, key, n): |
|
76 d = self.stats.setdefault(ext, {}) |
|
77 d[key] = d.get(key, 0) + n |
|
78 |
|
79 def report(self): |
|
80 exts = self.stats.keys() |
|
81 exts.sort() |
|
82 # Get the column keys |
|
83 columns = {} |
|
84 for ext in exts: |
|
85 columns.update(self.stats[ext]) |
|
86 cols = columns.keys() |
|
87 cols.sort() |
|
88 colwidth = {} |
|
89 colwidth["ext"] = max([len(ext) for ext in exts]) |
|
90 minwidth = 6 |
|
91 self.stats["TOTAL"] = {} |
|
92 for col in cols: |
|
93 total = 0 |
|
94 cw = max(minwidth, len(col)) |
|
95 for ext in exts: |
|
96 value = self.stats[ext].get(col) |
|
97 if value is None: |
|
98 w = 0 |
|
99 else: |
|
100 w = len("%d" % value) |
|
101 total += value |
|
102 cw = max(cw, w) |
|
103 cw = max(cw, len(str(total))) |
|
104 colwidth[col] = cw |
|
105 self.stats["TOTAL"][col] = total |
|
106 exts.append("TOTAL") |
|
107 for ext in exts: |
|
108 self.stats[ext]["ext"] = ext |
|
109 cols.insert(0, "ext") |
|
110 def printheader(): |
|
111 for col in cols: |
|
112 print "%*s" % (colwidth[col], col), |
|
113 print |
|
114 printheader() |
|
115 for ext in exts: |
|
116 for col in cols: |
|
117 value = self.stats[ext].get(col, "") |
|
118 print "%*s" % (colwidth[col], value), |
|
119 print |
|
120 printheader() # Another header at the bottom |
|
121 |
|
122 def main(): |
|
123 args = sys.argv[1:] |
|
124 if not args: |
|
125 args = [os.curdir] |
|
126 s = Stats() |
|
127 s.statargs(args) |
|
128 s.report() |
|
129 |
|
130 if __name__ == "__main__": |
|
131 main() |