1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 """
21 File manipulation related functionalities:
22 * Filescanner
23 * rmtree (fixed version)
24 * move (fixed version)
25 """
26 import codecs
27 import fnmatch
28 import locale
29 import logging
30 import os
31 import re
32 import sys
33 import shutil
34 import StringIO
35 import hashlib
36 import subprocess
37
38
39 import pathaddition.match
40 import stat
41
42 LOGGER = logging.getLogger('fileutils')
43 LOGGER_LOCK = logging.getLogger('fileutils.lock')
44
45
46
48 """ This class implements all the required infrastructure for filescanning. """
49
51 """ Initialization. """
52 self.includes = []
53 self.excludes = []
54 self.includes_files = []
55 self.excludes_files = []
56 self.selectors = []
57 self.filetypes = []
58
60 """ Adds an include path to the scanner. """
61 if include.endswith('/') or include.endswith('\\'):
62 include = include + '**'
63
64 self.includes.append(include)
65
67 """ Adds an exclude path to the scanner. """
68 if exclude.endswith('/') or exclude.endswith('\\'):
69 exclude = exclude + '**'
70
71 self.excludes.append(exclude)
72
74 """ Adds an exclude file to the scanner. """
75 self.excludes_files.append(exclude)
76
80
82 """ Adds a filetype selection to the scanner. """
83 self.filetypes.append(filetype)
84
86 """ Returns if path is included by the scanner. """
87 LOGGER.debug("is_included: path = " + path)
88 if path.replace('\\', '/') in self.includes_files or path in self.includes_files:
89 return True
90 for inc in self.includes:
91 if self.match(path, inc):
92 LOGGER.debug("Included: " + path + " by " + inc)
93 return True
94 return False
95
97 """ Returns if path is excluded by the scanner. """
98 LOGGER.debug("is_excluded: path = " + path)
99 if path.replace('\\', '/') in self.excludes_files or path in self.excludes_files:
100 return True
101 for ex in self.excludes:
102 if self.match(path, ex):
103 LOGGER.debug("Excluded: " + path + " by " + ex)
104 return True
105 return False
106
108 """ Returns if path is selected by all selectors in the scanner. """
109 LOGGER.debug("is_selected: path = " + path)
110 for selector in self.selectors:
111 if not selector.is_selected(path):
112 return False
113 LOGGER.debug("Selected: " + path)
114 return True
115
117 """ Test if a file matches one filetype. """
118 if len(self.filetypes) == 0:
119 return True
120 LOGGER.debug("is_filetype: path = " + path)
121 for filetype in self.filetypes:
122 if self.match(path, filetype):
123 LOGGER.debug("Filetype: " + path + " by " + filetype)
124 return True
125 return False
126
127 - def match(self, filename, pattern):
130
136
138 """ Returns a string representing this instance. """
139 content = []
140 for inc in self.includes:
141 content.append('include:' + os.path.normpath(inc))
142 for ex in self.excludes:
143 content.append('exclude:' + os.path.normpath(ex))
144 return ';'.join(content)
145
147 """ Returns a string representing this instance. """
148 return self.__str__()
149
151 """ Abstract method which much be overriden to implement the scanning process. """
152 raise Exception("scan method must be overriden")
153
154
156 """Scans the filesystem for files that match the selection paths.
157
158 The scanner is configured with a root directory. Any number of include
159 and exclude paths can be added. The scan() method is a generator that
160 returns matching files one at a time when called as an iterator.
161
162 This is a revisited implementation of the filescanner. It now relies on
163 the module pathaddition.match that implements a Ant-like regular expression matcher.
164
165 Rules:
166 - Includes and excludes should not start with *
167 - Includes and excludes should not have wildcard searches ending with ** (e.g. wildcard**)
168
169 Supported includes and excludes:
170 - filename.txt
171 - filename.*
172 - dir/
173 - dir/*
174 - dir/**
175 """
182
183
184
186 """ Scans the files required to zip"""
187
188
189 excludescopy = self.excludes[:]
190 for f in excludescopy:
191 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))):
192 self.excludes_files.append(f)
193 self.excludes.remove(f)
194
195 includescopy = self.includes[:]
196 for f in includescopy:
197 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))):
198 self.includes_files.append(f)
199 self.includes.remove(f)
200
201 LOGGER.debug('Scanning sub-root directories')
202 for root_dir in self.find_subroots():
203 for dirpath, subdirs, files in os.walk(unicode(root_dir)):
204 subroot = dirpath[len(self.root_dir):]
205
206 dirs_to_remove = []
207 for subdir in subdirs:
208 if self.is_excluded(os.path.join(subroot, subdir)):
209 dirs_to_remove.append(subdir)
210
211 for dir_remove in dirs_to_remove:
212 subdirs.remove(dir_remove)
213
214 LOGGER.debug('Scanning directory: ' + dirpath)
215 for file_ in files:
216 path = os.path.join(subroot, file_)
217 if self.is_filetype(path) and self.is_included(path) and \
218 self.is_selected(os.path.join(dirpath, file_)) and not self.is_excluded(path):
219 ret_path = os.path.join(dirpath, file_)
220 yield ret_path
221
222 LOGGER.debug('Checking for empty directory: ' + dirpath)
223
224 if self.is_included(subroot) and not self.is_excluded(subroot):
225 if len(files) == 0 and len(subdirs) == 0:
226 yield dirpath
227
228
230 """Finds all the subdirectory roots based on the include paths.
231
232 Often large archive operations define a number of archives from the root
233 of the drive. Walking the tree from the root is very time-consuming, so
234 selecting more specific subdirectory roots improves performance.
235 """
236 def splitpath(path):
237 """ Returns the splitted path"""
238 return path.split(os.sep)
239
240 root_dirs = []
241
242
243 subdirs_not_usable = False
244 for inc in self.includes + self.includes_files:
245 first_path_segment = splitpath(os.path.normpath(inc))[0]
246 if first_path_segment.find('*') != -1:
247 subdirs_not_usable = True
248
249
250 if not subdirs_not_usable:
251 for inc in self.includes + self.includes_files:
252 include = None
253 LOGGER.debug("===> inc %s" % inc)
254 contains_globs = False
255 for pathcomp in splitpath(os.path.normpath(inc)):
256 if pathcomp.find('*') != -1:
257 contains_globs = True
258 break
259 else:
260 if include == None:
261 include = pathcomp
262 else:
263 include = os.path.join(include, pathcomp)
264 if not contains_globs:
265 include = os.path.dirname(include)
266
267 LOGGER.debug("include %s" % include)
268 if include != None:
269 root_dir = os.path.normpath(os.path.join(self.root_dir, include))
270 is_new_root = True
271 for root in root_dirs[:]:
272 if destinsrc(root, root_dir):
273 LOGGER.debug("root contains include, skip it")
274 is_new_root = False
275 break
276 if destinsrc(root_dir, root):
277 LOGGER.debug("include contains root, so remove root")
278 root_dirs.remove(root)
279 if is_new_root:
280 root_dirs.append(root_dir)
281
282 if len(root_dirs) == 0:
283 root_dirs = [os.path.normpath(self.root_dir)]
284 LOGGER.debug('Roots = ' + str(root_dirs))
285 return root_dirs
286
289
292
293
295 """Recursively move a file or directory to another location.
296
297 If the destination is on our current filesystem, then simply use
298 rename. Otherwise, copy src to the dst and then remove src.
299 A lot more could be done here... A look at a mv.c shows a lot of
300 the issues this implementation glosses over.
301
302 """
303 try:
304 os.rename(src, dst)
305 except OSError:
306 if os.path.isdir(src):
307 if destinsrc(src, dst):
308 raise Exception, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
309 shutil.copytree(src, dst, symlinks=True)
310 rmtree(src)
311 else:
312 shutil.copy2(src, dst)
313 os.unlink(src)
314
316 """ Catch shutil.rmtree failures on Windows when files are read-only. Thanks Google!"""
317 if sys.platform == 'win32':
318 rootdir = os.path.normpath(rootdir)
319 if not os.path.isabs(rootdir):
320 rootdir = os.path.join(os.path.abspath('.'), rootdir)
321 if not rootdir.startswith('\\\\'):
322 rootdir = u"\\\\?\\" + rootdir
323
324 def cb_handle_error(fcn, path, excinfo):
325 """ Error handler, removing readonly and deleting the file. """
326 os.chmod(path, 0666)
327 if os.path.isdir(path):
328 rmdir(path)
329 elif os.path.isfile(path):
330 remove(path)
331 else:
332 fcn(path)
333 return shutil.rmtree(rootdir, onerror=cb_handle_error)
334
336 """ Fixed version of destinscr, that doesn't match dst with same root name."""
337 if sys.platform == "win32":
338 src = src.lower()
339 dst = dst.lower()
340 src = os.path.abspath(src)
341 dst = os.path.abspath(dst)
342 if not src.endswith(os.path.sep):
343 src += os.path.sep
344 if not dst.endswith(os.path.sep):
345 dst += os.path.sep
346 return dst.startswith(src)
347
348
350 """ Search for executable in the PATH."""
351 pathlist = os.environ['PATH'].split(os.pathsep)
352 for folder in pathlist:
353 filename = os.path.join(folder, executable)
354 try:
355 status = os.stat(filename)
356 except os.error:
357 continue
358
359 if stat.S_ISREG(status[stat.ST_MODE]):
360 mode = stat.S_IMODE(status[stat.ST_MODE])
361 if mode & 0111:
362 return os.path.normpath(filename)
363 return None
364
365
367 """ Read the policy number from the policy file.
368 strict allows to activate the new policy scanning.
369 """
370 value = None
371 error = ""
372 try:
373 LOGGER.debug('Opening policy file: ' + filename)
374 policy_data = load_policy_content(filename)
375 match = re.match(r'^((?:\d+)|(?:0842[0-9a-zA-Z]{3}))\s*$', policy_data, re.M|re.DOTALL)
376 if match != None:
377 value = match.group(1)
378 else:
379 error = "Content of '%s' doesn't match r'^\d+|0842[0-9a-zA-Z]{3}\s*$'." % filename
380 except Exception, exc:
381 error = str(exc)
382 if value is not None:
383 return value
384
385 raise Exception(error)
386
388 """ Testing policy content loading. """
389 try:
390 fileh = codecs.open(filename, 'r', 'ascii')
391 data = fileh.read()
392 fileh.close()
393 return data
394 except:
395 raise Exception("Error loading '%s' as an ASCII file." % filename)
396
397 ENCODING_MATRIX = {
398 codecs.BOM_UTF8: 'utf_8',
399 codecs.BOM_UTF16: 'utf_16',
400 codecs.BOM_UTF16_BE: 'utf_16_be',
401 codecs.BOM_UTF16_LE: 'utf_16_le',
402 }
403
405 """Given a byte string, guess the encoding.
406
407 First it tries for UTF8/UTF16 BOM.
408
409 Next it tries the standard 'UTF8', 'ISO-8859-1', and 'cp1252' encodings,
410 Plus several gathered from locale information.
411
412 The calling program *must* first call locale.setlocale(locale.LC_ALL, '')
413
414 If successful it returns (decoded_unicode, successful_encoding)
415 If unsuccessful it raises a ``UnicodeError``.
416
417 This was taken from http://www.voidspace.org.uk/python/articles/guessing_encoding.shtml
418 """
419 for bom, enc in ENCODING_MATRIX.items():
420 if data.startswith(bom):
421 return data.decode(enc), enc
422 encodings = ['ascii', 'UTF-8']
423 successful_encoding = None
424 try:
425 encodings.append(locale.getlocale()[1])
426 except (AttributeError, IndexError):
427 pass
428 try:
429 encodings.append(locale.getdefaultlocale()[1])
430 except (AttributeError, IndexError):
431 pass
432
433 encodings.append('ISO8859-1')
434 encodings.append('cp1252')
435 for enc in encodings:
436 if not enc:
437 continue
438 try:
439 decoded = unicode(data, enc)
440 successful_encoding = enc
441 break
442 except (UnicodeError, LookupError):
443 pass
444 if successful_encoding is None:
445 raise UnicodeError('Unable to decode input data. Tried the'
446 ' following encodings: %s.' %
447 ', '.join([repr(enc) for enc in encodings if enc]))
448 else:
449 if successful_encoding == 'ascii':
450
451 successful_encoding = 'ISO8859-1'
452 return (decoded, successful_encoding)
453
454 -def getmd5(fullpath, chunk_size=2**16):
455 """ returns the md5 value"""
456 file_handle = open(fullpath, "rb")
457 md5 = hashlib.md5()
458 while 1:
459 chunk = file_handle.read(chunk_size)
460 if not chunk:
461 break
462 md5.update(chunk)
463 file_handle.close()
464 return md5.hexdigest()
465
467 """ Read the policy category from the policy file. """
468 value = None
469 error = ""
470 try:
471 LOGGER.debug('Opening symbian policy file: ' + filename)
472 try:
473 fileh = codecs.open(filename, 'r', 'ascii')
474 except:
475 raise Exception("Error loading '%s' as an ASCII file." % filename)
476 for line in fileh:
477 match = re.match(r'^Category\s+([A-Z])\s*$', line, re.M|re.DOTALL)
478 if match != None:
479 value = match.group(1)
480 return value
481 if match == None:
482 error = "Content of '%s' doesn't match r'^Category\s+([A-Z])\s*$'." % filename
483 except Exception, exc:
484 error = str(exc)
485 if value is not None:
486 return value
487
488 raise Exception(error)
489
490
493
494 if os.name == 'nt':
495 import win32file
496 import win32con
497 import winerror
498 import time
499 import pywintypes
500 import string
501 import win32api
502 import win32netcon
503 import win32wnet
504
506 """ This object implement file locking for windows. """
507
512
513 - def lock(self, wait=False):
514 LOGGER_LOCK.debug("lock")
515
516 if self.fd == None:
517 self.fd = open(self._filename, "w+")
518 wfd = win32file._get_osfhandle(self.fd.fileno())
519 if not wait:
520 try:
521 win32file.LockFile(wfd, 0, 0, 0xffff, 0)
522 except:
523 raise LockFailedException()
524 else:
525 while True:
526 try:
527 win32file.LockFile(wfd, 0, 0, 0xffff, 0)
528 break
529 except win32file.error, exc:
530 if exc[0] != winerror.ERROR_LOCK_VIOLATION:
531 raise exc
532 LOGGER_LOCK.debug("waiting")
533 time.sleep(1)
534
536 LOGGER_LOCK.debug("unlock")
537 if self.fd == None:
538 LOGGER_LOCK.debug("already unlocked")
539 return
540 wfd = win32file._get_osfhandle(self.fd.fileno())
541 try:
542
543 win32file.UnlockFile(wfd, 0 , 0, 0xffff, 0)
544 self.fd.close()
545 self.fd = None
546 except win32file.error, exc:
547 if exc[0] != 158:
548 raise
549
550
554
555
557 """ Return the first free drive found else it raise an exception. """
558 DRIVE_LABELS = sorted(list(set(string.ascii_uppercase) - set(win32api.GetLogicalDriveStrings())), reverse=True)
559 if len(DRIVE_LABELS) != 0 :
560 return DRIVE_LABELS[0] + ":"
561 raise Exception("No free drive left.")
562
564 """ Substing path as a drive. """
565 path = os.path.normpath(path)
566 p = subprocess.Popen("subst %s %s" % (drive, path), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
567 errmsg = p.communicate()[0]
568 if p.returncode != 0:
569 raise Exception("Error substing '%s' under '%s': %s" % (path, drive, errmsg))
570
572 """ Unsubsting the drive. """
573 p = subprocess.Popen("subst /D %s" % (drive), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
574 errmsg = p.communicate()[0]
575 if p.returncode != 0:
576 raise Exception("Error unsubsting '%s': %s" % (drive, errmsg))
577
579 """ Catch os.rmdir failures on Windows when path is too long (more than 256 chars)."""
580 path = win32api.GetShortPathName(path)
581 win32file.RemoveDirectory(path)
582
588
589 - def mount(drive, unc, username=None, password=None, persistent=False):
590 """ Windows helper function to map a network drive. """
591 flags = 0
592 if persistent:
593 flags = win32netcon.CONNECT_UPDATE_PROFILE
594 win32wnet.WNetAddConnection2(win32netcon.RESOURCETYPE_DISK, drive, unc, None, username, password, flags)
595
596
598 """ Windows helper function to map a network drive. """
599 drive_type = win32file.GetDriveType(drive)
600 if drive_type == win32con.DRIVE_REMOTE:
601 win32wnet.WNetCancelConnection2(drive, win32netcon.CONNECT_UPDATE_PROFILE, 1)
602 else:
603 raise Exception("%s couldn't be umount." % drive)
604
605 else:
608
611
615 - def lock(self, wait=False):
619
621 """
622 Recursively touches all the files in the source path mentioned.
623 It does not touch the directories.
624 """
625 srcnames = os.listdir(srcdir)
626 for name in srcnames:
627 srcfname = os.path.join(srcdir, name)
628 if os.path.isdir(srcfname):
629 touch(srcfname)
630 else:
631 if os.path.exists(srcfname):
632 os.utime(srcfname, None)
633