Module fileutils
[hide private]
[frames] | no frames]

Source Code for Module fileutils

  1  #============================================================================  
  2  #Name        : fileutils.py  
  3  #Part of     : Helium  
  4   
  5  #Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). 
  6  #All rights reserved. 
  7  #This component and the accompanying materials are made available 
  8  #under the terms of the License "Eclipse Public License v1.0" 
  9  #which accompanies this distribution, and is available 
 10  #at the URL "http://www.eclipse.org/legal/epl-v10.html". 
 11  # 
 12  #Initial Contributors: 
 13  #Nokia Corporation - initial contribution. 
 14  # 
 15  #Contributors: 
 16  # 
 17  #Description: 
 18  #=============================================================================== 
 19   
 20  """ 
 21  File manipulation related functionalities: 
 22   * Filescanner 
 23   * rmtree (fixed version) 
 24   * move (fixed version) 
 25  """ 
 26  import codecs 
 27  import fnmatch 
 28  import locale 
 29  import logging 
 30  import os 
 31  import re 
 32  import sys 
 33  import shutil 
 34  import StringIO 
 35  import hashlib 
 36  import subprocess 
 37   
 38   
 39  import pathaddition.match 
 40  import stat 
 41   
 42  LOGGER = logging.getLogger('fileutils') 
 43  LOGGER_LOCK = logging.getLogger('fileutils.lock') 
 44  #LOGGER.addHandler(logging.FileHandler('default.log')) 
 45  #logging.basicConfig(level=logging.DEBUG) 
 46   
47 -class AbstractScanner(object):
48 """ This class implements all the required infrastructure for filescanning. """ 49
50 - def __init__(self):
51 """ Initialization. """ 52 self.includes = [] 53 self.excludes = [] 54 self.includes_files = [] 55 self.excludes_files = [] 56 self.selectors = [] 57 self.filetypes = []
58
59 - def add_include(self, include):
60 """ Adds an include path to the scanner. """ 61 if include.endswith('/') or include.endswith('\\'): 62 include = include + '**' 63 64 self.includes.append(include)
65
66 - def add_exclude(self, exclude):
67 """ Adds an exclude path to the scanner. """ 68 if exclude.endswith('/') or exclude.endswith('\\'): 69 exclude = exclude + '**' 70 71 self.excludes.append(exclude)
72
73 - def add_exclude_file(self, exclude):
74 """ Adds an exclude file to the scanner. """ 75 self.excludes_files.append(exclude)
76
77 - def add_selector(self, selector):
78 """ Add selector to the scanner. """ 79 self.selectors.append(selector)
80
81 - def add_filetype(self, filetype):
82 """ Adds a filetype selection to the scanner. """ 83 self.filetypes.append(filetype)
84
85 - def is_included(self, path):
86 """ Returns if path is included by the scanner. """ 87 LOGGER.debug("is_included: path = " + path) 88 if path.replace('\\', '/') in self.includes_files or path in self.includes_files: 89 return True 90 for inc in self.includes: 91 if self.match(path, inc): 92 LOGGER.debug("Included: " + path + " by " + inc) 93 return True 94 return False
95
96 - def is_excluded(self, path):
97 """ Returns if path is excluded by the scanner. """ 98 LOGGER.debug("is_excluded: path = " + path) 99 if path.replace('\\', '/') in self.excludes_files or path in self.excludes_files: 100 return True 101 for ex in self.excludes: 102 if self.match(path, ex): 103 LOGGER.debug("Excluded: " + path + " by " + ex) 104 return True 105 return False
106
107 - def is_selected(self, path):
108 """ Returns if path is selected by all selectors in the scanner. """ 109 LOGGER.debug("is_selected: path = " + path) 110 for selector in self.selectors: 111 if not selector.is_selected(path): 112 return False 113 LOGGER.debug("Selected: " + path) 114 return True
115
116 - def is_filetype(self, path):
117 """ Test if a file matches one filetype. """ 118 if len(self.filetypes) == 0: 119 return True 120 LOGGER.debug("is_filetype: path = " + path) 121 for filetype in self.filetypes: 122 if self.match(path, filetype): 123 LOGGER.debug("Filetype: " + path + " by " + filetype) 124 return True 125 return False
126
127 - def match(self, filename, pattern):
128 """ Is filename matching pattern? """ 129 return pathaddition.match.ant_match(filename, pattern, casesensitive=(sys.platform != "win32"))
130
131 - def test_path(self, root, relpath):
132 """ Test if a path matches filetype, include, exclude, and selection process.""" 133 return self.is_filetype(relpath) and self.is_included(relpath) \ 134 and not self.is_excluded(relpath) and \ 135 self.is_selected(os.path.join(root, relpath))
136
137 - def __str__(self):
138 """ Returns a string representing this instance. """ 139 content = [] 140 for inc in self.includes: 141 content.append('include:' + os.path.normpath(inc)) 142 for ex in self.excludes: 143 content.append('exclude:' + os.path.normpath(ex)) 144 return ';'.join(content)
145
146 - def __repr__(self):
147 """ Returns a string representing this instance. """ 148 return self.__str__()
149
150 - def scan(self):
151 """ Abstract method which much be overriden to implement the scanning process. """ 152 raise Exception("scan method must be overriden")
153 154
155 -class FileScanner(AbstractScanner):
156 """Scans the filesystem for files that match the selection paths. 157 158 The scanner is configured with a root directory. Any number of include 159 and exclude paths can be added. The scan() method is a generator that 160 returns matching files one at a time when called as an iterator. 161 162 This is a revisited implementation of the filescanner. It now relies on 163 the module pathaddition.match that implements a Ant-like regular expression matcher. 164 165 Rules: 166 - Includes and excludes should not start with * 167 - Includes and excludes should not have wildcard searches ending with ** (e.g. wildcard**) 168 169 Supported includes and excludes: 170 - filename.txt 171 - filename.* 172 - dir/ 173 - dir/* 174 - dir/** 175 """
176 - def __init__(self, root_dir):
177 """ Initialization. """ 178 AbstractScanner.__init__(self) 179 self.root_dir = os.path.normpath(root_dir) 180 if not self.root_dir.endswith(os.sep): 181 self.root_dir = self.root_dir + os.sep
182 # Add 1 so the final path separator is removed 183 #self.root_dirLength = len(self.root_dir) + 1 184
185 - def scan(self):
186 """ Scans the files required to zip""" 187 #paths_cache = [] 188 189 excludescopy = self.excludes[:] 190 for f in excludescopy: 191 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))): 192 self.excludes_files.append(f) 193 self.excludes.remove(f) 194 195 includescopy = self.includes[:] 196 for f in includescopy: 197 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))): 198 self.includes_files.append(f) 199 self.includes.remove(f) 200 201 LOGGER.debug('Scanning sub-root directories') 202 for root_dir in self.find_subroots(): 203 for dirpath, subdirs, files in os.walk(unicode(root_dir)): 204 subroot = dirpath[len(self.root_dir):] 205 206 dirs_to_remove = [] 207 for subdir in subdirs: 208 if self.is_excluded(os.path.join(subroot, subdir)): 209 dirs_to_remove.append(subdir) 210 211 for dir_remove in dirs_to_remove: 212 subdirs.remove(dir_remove) 213 214 LOGGER.debug('Scanning directory: ' + dirpath) 215 for file_ in files: 216 path = os.path.join(subroot, file_) 217 if self.is_filetype(path) and self.is_included(path) and \ 218 self.is_selected(os.path.join(dirpath, file_)) and not self.is_excluded(path): 219 ret_path = os.path.join(dirpath, file_) 220 yield ret_path 221 222 LOGGER.debug('Checking for empty directory: ' + dirpath) 223 # Check for including empty directories 224 if self.is_included(subroot) and not self.is_excluded(subroot): 225 if len(files) == 0 and len(subdirs) == 0: 226 yield dirpath
227 228
229 - def find_subroots(self):
230 """Finds all the subdirectory roots based on the include paths. 231 232 Often large archive operations define a number of archives from the root 233 of the drive. Walking the tree from the root is very time-consuming, so 234 selecting more specific subdirectory roots improves performance. 235 """ 236 def splitpath(path): 237 """ Returns the splitted path""" 238 return path.split(os.sep)
239 240 root_dirs = [] 241 242 # Look for includes that start with wildcards. 243 subdirs_not_usable = False 244 for inc in self.includes + self.includes_files: 245 first_path_segment = splitpath(os.path.normpath(inc))[0] 246 if first_path_segment.find('*') != -1: 247 subdirs_not_usable = True 248 249 # Parse all includes for sub-roots 250 if not subdirs_not_usable: 251 for inc in self.includes + self.includes_files: 252 include = None 253 LOGGER.debug("===> inc %s" % inc) 254 contains_globs = False 255 for pathcomp in splitpath(os.path.normpath(inc)): 256 if pathcomp.find('*') != -1: 257 contains_globs = True 258 break 259 else: 260 if include == None: 261 include = pathcomp 262 else: 263 include = os.path.join(include, pathcomp) 264 if not contains_globs: 265 include = os.path.dirname(include) 266 267 LOGGER.debug("include %s" % include) 268 if include != None: 269 root_dir = os.path.normpath(os.path.join(self.root_dir, include)) 270 is_new_root = True 271 for root in root_dirs[:]: 272 if destinsrc(root, root_dir): 273 LOGGER.debug("root contains include, skip it") 274 is_new_root = False 275 break 276 if destinsrc(root_dir, root): 277 LOGGER.debug("include contains root, so remove root") 278 root_dirs.remove(root) 279 if is_new_root: 280 root_dirs.append(root_dir) 281 282 if len(root_dirs) == 0: 283 root_dirs = [os.path.normpath(self.root_dir)] 284 LOGGER.debug('Roots = ' + str(root_dirs)) 285 return root_dirs
286
287 - def __str__(self):
288 return os.path.normpath(self.root_dir) + ';' + AbstractScanner.__str__(self)
289
290 - def __repr__(self):
291 return self.__str__()
292 293
294 -def move(src, dst):
295 """Recursively move a file or directory to another location. 296 297 If the destination is on our current filesystem, then simply use 298 rename. Otherwise, copy src to the dst and then remove src. 299 A lot more could be done here... A look at a mv.c shows a lot of 300 the issues this implementation glosses over. 301 302 """ 303 try: 304 os.rename(src, dst) 305 except OSError: 306 if os.path.isdir(src): 307 if destinsrc(src, dst): 308 raise Exception, "Cannot move a directory '%s' into itself '%s'." % (src, dst) 309 shutil.copytree(src, dst, symlinks=True) 310 rmtree(src) 311 else: 312 shutil.copy2(src, dst) 313 os.unlink(src)
314
315 -def rmtree(rootdir):
316 """ Catch shutil.rmtree failures on Windows when files are read-only. Thanks Google!""" 317 if sys.platform == 'win32': 318 rootdir = os.path.normpath(rootdir) 319 if not os.path.isabs(rootdir): 320 rootdir = os.path.join(os.path.abspath('.'), rootdir) 321 if not rootdir.startswith('\\\\'): 322 rootdir = u"\\\\?\\" + rootdir 323 324 def cb_handle_error(fcn, path, excinfo): 325 """ Error handler, removing readonly and deleting the file. """ 326 os.chmod(path, 0666) 327 if os.path.isdir(path): 328 rmdir(path) 329 elif os.path.isfile(path): 330 remove(path) 331 else: 332 fcn(path)
333 return shutil.rmtree(rootdir, onerror=cb_handle_error) 334
335 -def destinsrc(src, dst):
336 """ Fixed version of destinscr, that doesn't match dst with same root name.""" 337 if sys.platform == "win32": 338 src = src.lower() 339 dst = dst.lower() 340 src = os.path.abspath(src) 341 dst = os.path.abspath(dst) 342 if not src.endswith(os.path.sep): 343 src += os.path.sep 344 if not dst.endswith(os.path.sep): 345 dst += os.path.sep 346 return dst.startswith(src)
347 348
349 -def which(executable):
350 """ Search for executable in the PATH.""" 351 pathlist = os.environ['PATH'].split(os.pathsep) 352 for folder in pathlist: 353 filename = os.path.join(folder, executable) 354 try: 355 status = os.stat(filename) 356 except os.error: 357 continue 358 # Check if the path is a regular file 359 if stat.S_ISREG(status[stat.ST_MODE]): 360 mode = stat.S_IMODE(status[stat.ST_MODE]) 361 if mode & 0111: 362 return os.path.normpath(filename) 363 return None
364 365
366 -def read_policy_content(filename):
367 """ Read the policy number from the policy file. 368 strict allows to activate the new policy scanning. 369 """ 370 value = None 371 error = "" 372 try: 373 LOGGER.debug('Opening policy file: ' + filename) 374 policy_data = load_policy_content(filename) 375 match = re.match(r'^((?:\d+)|(?:0842[0-9a-zA-Z]{3}))\s*$', policy_data, re.M|re.DOTALL) 376 if match != None: 377 value = match.group(1) 378 else: 379 error = "Content of '%s' doesn't match r'^\d+|0842[0-9a-zA-Z]{3}\s*$'." % filename 380 except Exception, exc: 381 error = str(exc) 382 if value is not None: 383 return value 384 # worse case.... 385 raise Exception(error)
386
387 -def load_policy_content(filename):
388 """ Testing policy content loading. """ 389 try: 390 fileh = codecs.open(filename, 'r', 'ascii') 391 data = fileh.read() 392 fileh.close() 393 return data 394 except: 395 raise Exception("Error loading '%s' as an ASCII file." % filename)
396 397 ENCODING_MATRIX = { 398 codecs.BOM_UTF8: 'utf_8', 399 codecs.BOM_UTF16: 'utf_16', 400 codecs.BOM_UTF16_BE: 'utf_16_be', 401 codecs.BOM_UTF16_LE: 'utf_16_le', 402 } 403
404 -def guess_encoding(data):
405 """Given a byte string, guess the encoding. 406 407 First it tries for UTF8/UTF16 BOM. 408 409 Next it tries the standard 'UTF8', 'ISO-8859-1', and 'cp1252' encodings, 410 Plus several gathered from locale information. 411 412 The calling program *must* first call locale.setlocale(locale.LC_ALL, '') 413 414 If successful it returns (decoded_unicode, successful_encoding) 415 If unsuccessful it raises a ``UnicodeError``. 416 417 This was taken from http://www.voidspace.org.uk/python/articles/guessing_encoding.shtml 418 """ 419 for bom, enc in ENCODING_MATRIX.items(): 420 if data.startswith(bom): 421 return data.decode(enc), enc 422 encodings = ['ascii', 'UTF-8'] 423 successful_encoding = None 424 try: 425 encodings.append(locale.getlocale()[1]) 426 except (AttributeError, IndexError): 427 pass 428 try: 429 encodings.append(locale.getdefaultlocale()[1]) 430 except (AttributeError, IndexError): 431 pass 432 # latin-1 433 encodings.append('ISO8859-1') 434 encodings.append('cp1252') 435 for enc in encodings: 436 if not enc: 437 continue 438 try: 439 decoded = unicode(data, enc) 440 successful_encoding = enc 441 break 442 except (UnicodeError, LookupError): 443 pass 444 if successful_encoding is None: 445 raise UnicodeError('Unable to decode input data. Tried the' 446 ' following encodings: %s.' % 447 ', '.join([repr(enc) for enc in encodings if enc])) 448 else: 449 if successful_encoding == 'ascii': 450 # our default ascii encoding 451 successful_encoding = 'ISO8859-1' 452 return (decoded, successful_encoding)
453
454 -def getmd5(fullpath, chunk_size=2**16):
455 """ returns the md5 value""" 456 file_handle = open(fullpath, "rb") 457 md5 = hashlib.md5() 458 while 1: 459 chunk = file_handle.read(chunk_size) 460 if not chunk: 461 break 462 md5.update(chunk) 463 file_handle.close() 464 return md5.hexdigest()
465
466 -def read_symbian_policy_content(filename):
467 """ Read the policy category from the policy file. """ 468 value = None 469 error = "" 470 try: 471 LOGGER.debug('Opening symbian policy file: ' + filename) 472 try: 473 fileh = codecs.open(filename, 'r', 'ascii') 474 except: 475 raise Exception("Error loading '%s' as an ASCII file." % filename) 476 for line in fileh: 477 match = re.match(r'^Category\s+([A-Z])\s*$', line, re.M|re.DOTALL) 478 if match != None: 479 value = match.group(1) 480 return value 481 if match == None: 482 error = "Content of '%s' doesn't match r'^Category\s+([A-Z])\s*$'." % filename 483 except Exception, exc: 484 error = str(exc) 485 if value is not None: 486 return value 487 # worse case.... 488 raise Exception(error)
489 490
491 -class LockFailedException(Exception):
492 pass
493 494 if os.name == 'nt': 495 import win32file 496 import win32con 497 import winerror 498 import time 499 import pywintypes 500 import string 501 import win32api 502 import win32netcon 503 import win32wnet 504
505 - class Lock:
506 """ This object implement file locking for windows. """ 507
508 - def __init__(self, filename):
509 LOGGER_LOCK.debug("__init__") 510 self._filename = filename 511 self.fd = None
512
513 - def lock(self, wait=False):
514 LOGGER_LOCK.debug("lock") 515 # Open the file 516 if self.fd == None: 517 self.fd = open(self._filename, "w+") 518 wfd = win32file._get_osfhandle(self.fd.fileno()) 519 if not wait: 520 try: 521 win32file.LockFile(wfd, 0, 0, 0xffff, 0) 522 except: 523 raise LockFailedException() 524 else: 525 while True: 526 try: 527 win32file.LockFile(wfd, 0, 0, 0xffff, 0) 528 break 529 except win32file.error, exc: 530 if exc[0] != winerror.ERROR_LOCK_VIOLATION: 531 raise exc 532 LOGGER_LOCK.debug("waiting") 533 time.sleep(1)
534
535 - def unlock(self):
536 LOGGER_LOCK.debug("unlock") 537 if self.fd == None: 538 LOGGER_LOCK.debug("already unlocked") 539 return 540 wfd = win32file._get_osfhandle(self.fd.fileno()) 541 try: 542 # pylint: disable-msg=E1101 543 win32file.UnlockFile(wfd, 0 , 0, 0xffff, 0) 544 self.fd.close() 545 self.fd = None 546 except win32file.error, exc: 547 if exc[0] != 158: 548 raise
549 550
551 - def __del__(self):
552 LOGGER_LOCK.debug("__del__") 553 self.unlock()
554 555
556 - def get_next_free_drive():
557 """ Return the first free drive found else it raise an exception. """ 558 DRIVE_LABELS = sorted(list(set(string.ascii_uppercase) - set(win32api.GetLogicalDriveStrings())), reverse=True) 559 if len(DRIVE_LABELS) != 0 : 560 return DRIVE_LABELS[0] + ":" 561 raise Exception("No free drive left.")
562
563 - def subst(drive, path):
564 """ Substing path as a drive. """ 565 path = os.path.normpath(path) 566 p = subprocess.Popen("subst %s %s" % (drive, path), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 567 errmsg = p.communicate()[0] 568 if p.returncode != 0: 569 raise Exception("Error substing '%s' under '%s': %s" % (path, drive, errmsg))
570
571 - def unsubst(drive):
572 """ Unsubsting the drive. """ 573 p = subprocess.Popen("subst /D %s" % (drive), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 574 errmsg = p.communicate()[0] 575 if p.returncode != 0: 576 raise Exception("Error unsubsting '%s': %s" % (drive, errmsg))
577
578 - def rmdir(path):
579 """ Catch os.rmdir failures on Windows when path is too long (more than 256 chars).""" 580 path = win32api.GetShortPathName(path) 581 win32file.RemoveDirectory(path)
582
583 - def remove(filename):
584 """ Catch os.rmdir failures on Windows when path is too long (more than 256 chars).""" 585 filename = win32api.GetShortPathName(filename) 586 filename = filename.lstrip("\\\\?\\") 587 os.remove(filename)
588
589 - def mount(drive, unc, username=None, password=None, persistent=False):
590 """ Windows helper function to map a network drive. """ 591 flags = 0 592 if persistent: 593 flags = win32netcon.CONNECT_UPDATE_PROFILE 594 win32wnet.WNetAddConnection2(win32netcon.RESOURCETYPE_DISK, drive, unc, None, username, password, flags)
595 596
597 - def umount(drive):
598 """ Windows helper function to map a network drive. """ 599 drive_type = win32file.GetDriveType(drive) 600 if drive_type == win32con.DRIVE_REMOTE: 601 win32wnet.WNetCancelConnection2(drive, win32netcon.CONNECT_UPDATE_PROFILE, 1) 602 else: 603 raise Exception("%s couldn't be umount." % drive)
604 605 else:
606 - def rmdir(path):
607 return os.rmdir(path)
608
609 - def remove(path):
610 return os.remove(path)
611
612 - class Lock:
613 - def __init__(self, filename):
614 pass
615 - def lock(self, wait=False):
616 pass
617 - def unlock(self):
618 pass
619
620 -def touch(srcdir):
621 """ 622 Recursively touches all the files in the source path mentioned. 623 It does not touch the directories. 624 """ 625 srcnames = os.listdir(srcdir) 626 for name in srcnames: 627 srcfname = os.path.join(srcdir, name) 628 if os.path.isdir(srcfname): 629 touch(srcfname) 630 else: 631 if os.path.exists(srcfname): 632 os.utime(srcfname, None)
633