|
1 """Utilities for comparing files and directories. |
|
2 |
|
3 Classes: |
|
4 dircmp |
|
5 |
|
6 Functions: |
|
7 cmp(f1, f2, shallow=1) -> int |
|
8 cmpfiles(a, b, common) -> ([], [], []) |
|
9 |
|
10 """ |
|
11 |
|
12 import os |
|
13 import stat |
|
14 from itertools import ifilter, ifilterfalse, imap, izip |
|
15 |
|
16 __all__ = ["cmp","dircmp","cmpfiles"] |
|
17 |
|
18 _cache = {} |
|
19 BUFSIZE=8*1024 |
|
20 |
|
21 def cmp(f1, f2, shallow=1): |
|
22 """Compare two files. |
|
23 |
|
24 Arguments: |
|
25 |
|
26 f1 -- First file name |
|
27 |
|
28 f2 -- Second file name |
|
29 |
|
30 shallow -- Just check stat signature (do not read the files). |
|
31 defaults to 1. |
|
32 |
|
33 Return value: |
|
34 |
|
35 True if the files are the same, False otherwise. |
|
36 |
|
37 This function uses a cache for past comparisons and the results, |
|
38 with a cache invalidation mechanism relying on stale signatures. |
|
39 |
|
40 """ |
|
41 |
|
42 s1 = _sig(os.stat(f1)) |
|
43 s2 = _sig(os.stat(f2)) |
|
44 if s1[0] != stat.S_IFREG or s2[0] != stat.S_IFREG: |
|
45 return False |
|
46 if shallow and s1 == s2: |
|
47 return True |
|
48 if s1[1] != s2[1]: |
|
49 return False |
|
50 |
|
51 result = _cache.get((f1, f2)) |
|
52 if result and (s1, s2) == result[:2]: |
|
53 return result[2] |
|
54 outcome = _do_cmp(f1, f2) |
|
55 _cache[f1, f2] = s1, s2, outcome |
|
56 return outcome |
|
57 |
|
58 def _sig(st): |
|
59 return (stat.S_IFMT(st.st_mode), |
|
60 st.st_size, |
|
61 st.st_mtime) |
|
62 |
|
63 def _do_cmp(f1, f2): |
|
64 bufsize = BUFSIZE |
|
65 fp1 = open(f1, 'rb') |
|
66 fp2 = open(f2, 'rb') |
|
67 while True: |
|
68 b1 = fp1.read(bufsize) |
|
69 b2 = fp2.read(bufsize) |
|
70 if b1 != b2: |
|
71 return False |
|
72 if not b1: |
|
73 return True |
|
74 |
|
75 # Directory comparison class. |
|
76 # |
|
77 class dircmp: |
|
78 """A class that manages the comparison of 2 directories. |
|
79 |
|
80 dircmp(a,b,ignore=None,hide=None) |
|
81 A and B are directories. |
|
82 IGNORE is a list of names to ignore, |
|
83 defaults to ['RCS', 'CVS', 'tags']. |
|
84 HIDE is a list of names to hide, |
|
85 defaults to [os.curdir, os.pardir]. |
|
86 |
|
87 High level usage: |
|
88 x = dircmp(dir1, dir2) |
|
89 x.report() -> prints a report on the differences between dir1 and dir2 |
|
90 or |
|
91 x.report_partial_closure() -> prints report on differences between dir1 |
|
92 and dir2, and reports on common immediate subdirectories. |
|
93 x.report_full_closure() -> like report_partial_closure, |
|
94 but fully recursive. |
|
95 |
|
96 Attributes: |
|
97 left_list, right_list: The files in dir1 and dir2, |
|
98 filtered by hide and ignore. |
|
99 common: a list of names in both dir1 and dir2. |
|
100 left_only, right_only: names only in dir1, dir2. |
|
101 common_dirs: subdirectories in both dir1 and dir2. |
|
102 common_files: files in both dir1 and dir2. |
|
103 common_funny: names in both dir1 and dir2 where the type differs between |
|
104 dir1 and dir2, or the name is not stat-able. |
|
105 same_files: list of identical files. |
|
106 diff_files: list of filenames which differ. |
|
107 funny_files: list of files which could not be compared. |
|
108 subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. |
|
109 """ |
|
110 |
|
111 def __init__(self, a, b, ignore=None, hide=None): # Initialize |
|
112 self.left = a |
|
113 self.right = b |
|
114 if hide is None: |
|
115 self.hide = [os.curdir, os.pardir] # Names never to be shown |
|
116 else: |
|
117 self.hide = hide |
|
118 if ignore is None: |
|
119 self.ignore = ['RCS', 'CVS', 'tags'] # Names ignored in comparison |
|
120 else: |
|
121 self.ignore = ignore |
|
122 |
|
123 def phase0(self): # Compare everything except common subdirectories |
|
124 self.left_list = _filter(os.listdir(self.left), |
|
125 self.hide+self.ignore) |
|
126 self.right_list = _filter(os.listdir(self.right), |
|
127 self.hide+self.ignore) |
|
128 self.left_list.sort() |
|
129 self.right_list.sort() |
|
130 |
|
131 def phase1(self): # Compute common names |
|
132 a = dict(izip(imap(os.path.normcase, self.left_list), self.left_list)) |
|
133 b = dict(izip(imap(os.path.normcase, self.right_list), self.right_list)) |
|
134 self.common = map(a.__getitem__, ifilter(b.__contains__, a)) |
|
135 self.left_only = map(a.__getitem__, ifilterfalse(b.__contains__, a)) |
|
136 self.right_only = map(b.__getitem__, ifilterfalse(a.__contains__, b)) |
|
137 |
|
138 def phase2(self): # Distinguish files, directories, funnies |
|
139 self.common_dirs = [] |
|
140 self.common_files = [] |
|
141 self.common_funny = [] |
|
142 |
|
143 for x in self.common: |
|
144 a_path = os.path.join(self.left, x) |
|
145 b_path = os.path.join(self.right, x) |
|
146 |
|
147 ok = 1 |
|
148 try: |
|
149 a_stat = os.stat(a_path) |
|
150 except os.error, why: |
|
151 # print 'Can\'t stat', a_path, ':', why[1] |
|
152 ok = 0 |
|
153 try: |
|
154 b_stat = os.stat(b_path) |
|
155 except os.error, why: |
|
156 # print 'Can\'t stat', b_path, ':', why[1] |
|
157 ok = 0 |
|
158 |
|
159 if ok: |
|
160 a_type = stat.S_IFMT(a_stat.st_mode) |
|
161 b_type = stat.S_IFMT(b_stat.st_mode) |
|
162 if a_type != b_type: |
|
163 self.common_funny.append(x) |
|
164 elif stat.S_ISDIR(a_type): |
|
165 self.common_dirs.append(x) |
|
166 elif stat.S_ISREG(a_type): |
|
167 self.common_files.append(x) |
|
168 else: |
|
169 self.common_funny.append(x) |
|
170 else: |
|
171 self.common_funny.append(x) |
|
172 |
|
173 def phase3(self): # Find out differences between common files |
|
174 xx = cmpfiles(self.left, self.right, self.common_files) |
|
175 self.same_files, self.diff_files, self.funny_files = xx |
|
176 |
|
177 def phase4(self): # Find out differences between common subdirectories |
|
178 # A new dircmp object is created for each common subdirectory, |
|
179 # these are stored in a dictionary indexed by filename. |
|
180 # The hide and ignore properties are inherited from the parent |
|
181 self.subdirs = {} |
|
182 for x in self.common_dirs: |
|
183 a_x = os.path.join(self.left, x) |
|
184 b_x = os.path.join(self.right, x) |
|
185 self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) |
|
186 |
|
187 def phase4_closure(self): # Recursively call phase4() on subdirectories |
|
188 self.phase4() |
|
189 for sd in self.subdirs.itervalues(): |
|
190 sd.phase4_closure() |
|
191 |
|
192 def report(self): # Print a report on the differences between a and b |
|
193 # Output format is purposely lousy |
|
194 print 'diff', self.left, self.right |
|
195 if self.left_only: |
|
196 self.left_only.sort() |
|
197 print 'Only in', self.left, ':', self.left_only |
|
198 if self.right_only: |
|
199 self.right_only.sort() |
|
200 print 'Only in', self.right, ':', self.right_only |
|
201 if self.same_files: |
|
202 self.same_files.sort() |
|
203 print 'Identical files :', self.same_files |
|
204 if self.diff_files: |
|
205 self.diff_files.sort() |
|
206 print 'Differing files :', self.diff_files |
|
207 if self.funny_files: |
|
208 self.funny_files.sort() |
|
209 print 'Trouble with common files :', self.funny_files |
|
210 if self.common_dirs: |
|
211 self.common_dirs.sort() |
|
212 print 'Common subdirectories :', self.common_dirs |
|
213 if self.common_funny: |
|
214 self.common_funny.sort() |
|
215 print 'Common funny cases :', self.common_funny |
|
216 |
|
217 def report_partial_closure(self): # Print reports on self and on subdirs |
|
218 self.report() |
|
219 for sd in self.subdirs.itervalues(): |
|
220 print |
|
221 sd.report() |
|
222 |
|
223 def report_full_closure(self): # Report on self and subdirs recursively |
|
224 self.report() |
|
225 for sd in self.subdirs.itervalues(): |
|
226 print |
|
227 sd.report_full_closure() |
|
228 |
|
229 methodmap = dict(subdirs=phase4, |
|
230 same_files=phase3, diff_files=phase3, funny_files=phase3, |
|
231 common_dirs = phase2, common_files=phase2, common_funny=phase2, |
|
232 common=phase1, left_only=phase1, right_only=phase1, |
|
233 left_list=phase0, right_list=phase0) |
|
234 |
|
235 def __getattr__(self, attr): |
|
236 if attr not in self.methodmap: |
|
237 raise AttributeError, attr |
|
238 self.methodmap[attr](self) |
|
239 return getattr(self, attr) |
|
240 |
|
241 def cmpfiles(a, b, common, shallow=1): |
|
242 """Compare common files in two directories. |
|
243 |
|
244 a, b -- directory names |
|
245 common -- list of file names found in both directories |
|
246 shallow -- if true, do comparison based solely on stat() information |
|
247 |
|
248 Returns a tuple of three lists: |
|
249 files that compare equal |
|
250 files that are different |
|
251 filenames that aren't regular files. |
|
252 |
|
253 """ |
|
254 res = ([], [], []) |
|
255 for x in common: |
|
256 ax = os.path.join(a, x) |
|
257 bx = os.path.join(b, x) |
|
258 res[_cmp(ax, bx, shallow)].append(x) |
|
259 return res |
|
260 |
|
261 |
|
262 # Compare two files. |
|
263 # Return: |
|
264 # 0 for equal |
|
265 # 1 for different |
|
266 # 2 for funny cases (can't stat, etc.) |
|
267 # |
|
268 def _cmp(a, b, sh, abs=abs, cmp=cmp): |
|
269 try: |
|
270 return not abs(cmp(a, b, sh)) |
|
271 except os.error: |
|
272 return 2 |
|
273 |
|
274 |
|
275 # Return a copy with items that occur in skip removed. |
|
276 # |
|
277 def _filter(flist, skip): |
|
278 return list(ifilterfalse(skip.__contains__, flist)) |
|
279 |
|
280 |
|
281 # Demonstration and testing. |
|
282 # |
|
283 def demo(): |
|
284 import sys |
|
285 import getopt |
|
286 options, args = getopt.getopt(sys.argv[1:], 'r') |
|
287 if len(args) != 2: |
|
288 raise getopt.GetoptError('need exactly two args', None) |
|
289 dd = dircmp(args[0], args[1]) |
|
290 if ('-r', '') in options: |
|
291 dd.report_full_closure() |
|
292 else: |
|
293 dd.report() |
|
294 |
|
295 if __name__ == '__main__': |
|
296 demo() |