fileutils

+ 1 #============================================================================ + 2 #Name : fileutils.py + 3 #Part of : Helium + 4 + 5 #Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). + 6 #All rights reserved. + 7 #This component and the accompanying materials are made available + 8 #under the terms of the License "Eclipse Public License v1.0" + 9 #which accompanies this distribution, and is available + 10 #at the URL "http://www.eclipse.org/legal/epl-v10.html". + 11 # + 12 #Initial Contributors: + 13 #Nokia Corporation - initial contribution. + 14 # + 15 #Contributors: + 16 # + 17 #Description: + 18 #=============================================================================== + 19 + 20 """ + 21 File manipulation related functionalities: + 22 * Filescanner + 23 * rmtree (fixed version) + 24 * move (fixed version) + 25 """ + 26 import codecs + 27 import fnmatch + 28 import locale + 29 import logging + 30 import os + 31 import re + 32 import sys + 33 import shutil + 34 import StringIO + 35 import hashlib + 36 import subprocess + 37 + 38 + 39 import pathaddition.match + 40 import stat + 41 + 42 LOGGER = logging.getLogger('fileutils') + 43 LOGGER_LOCK = logging.getLogger('fileutils.lock') + 44 #LOGGER.addHandler(logging.FileHandler('default.log')) + 45 #logging.basicConfig(level=logging.DEBUG) + 46 +

47 -class AbstractScanner(object): +

48 """ This class implements all the required infrastructure for filescanning. """ + 49 +

50 - def __init__(self): +

51 """ Initialization. """ + 52 self.includes = [] + 53 self.excludes = [] + 54 self.includes_files = [] + 55 self.excludes_files = [] + 56 self.selectors = [] + 57 self.filetypes = [] +

58 +

59 - def add_include(self, include): +

60 """ Adds an include path to the scanner. """ + 61 if include.endswith('/') or include.endswith('\\'): + 62 include = include + '**' + 63 + 64 self.includes.append(include) +

65 +

66 - def add_exclude(self, exclude): +

67 """ Adds an exclude path to the scanner. """ + 68 if exclude.endswith('/') or exclude.endswith('\\'): + 69 exclude = exclude + '**' + 70 + 71 self.excludes.append(exclude) +

72 +

73 - def add_exclude_file(self, exclude): +

74 """ Adds an exclude file to the scanner. """ + 75 self.excludes_files.append(exclude) +

76 +

77 - def add_selector(self, selector): +

78 """ Add selector to the scanner. """ + 79 self.selectors.append(selector) +

80 +

81 - def add_filetype(self, filetype): +

82 """ Adds a filetype selection to the scanner. """ + 83 self.filetypes.append(filetype) +

84 +

85 - def is_included(self, path): +

86 """ Returns if path is included by the scanner. """ + 87 LOGGER.debug("is_included: path = " + path) + 88 if path.replace('\\', '/') in self.includes_files or path in self.includes_files: + 89 return True + 90 for inc in self.includes: + 91 if self.match(path, inc): + 92 LOGGER.debug("Included: " + path + " by " + inc) + 93 return True + 94 return False +

95 +

96 - def is_excluded(self, path): +

97 """ Returns if path is excluded by the scanner. """ + 98 LOGGER.debug("is_excluded: path = " + path) + 99 if path.replace('\\', '/') in self.excludes_files or path in self.excludes_files: +100 return True +101 for ex in self.excludes: +102 if self.match(path, ex): +103 LOGGER.debug("Excluded: " + path + " by " + ex) +104 return True +105 return False +

106 +

107 - def is_selected(self, path): +

108 """ Returns if path is selected by all selectors in the scanner. """ +109 LOGGER.debug("is_selected: path = " + path) +110 for selector in self.selectors: +111 if not selector.is_selected(path): +112 return False +113 LOGGER.debug("Selected: " + path) +114 return True +

115 +

116 - def is_filetype(self, path): +

117 """ Test if a file matches one filetype. """ +118 if len(self.filetypes) == 0: +119 return True +120 LOGGER.debug("is_filetype: path = " + path) +121 for filetype in self.filetypes: +122 if self.match(path, filetype): +123 LOGGER.debug("Filetype: " + path + " by " + filetype) +124 return True +125 return False +

126 +

127 - def match(self, filename, pattern): +

128 """ Is filename matching pattern? """ +129 return pathaddition.match.ant_match(filename, pattern, casesensitive=(sys.platform != "win32")) +

130 +

131 - def test_path(self, root, relpath): +

132 """ Test if a path matches filetype, include, exclude, and selection process.""" +133 return self.is_filetype(relpath) and self.is_included(relpath) \ +134 and not self.is_excluded(relpath) and \ +135 self.is_selected(os.path.join(root, relpath)) +

136 +

137 - def __str__(self): +

138 """ Returns a string representing this instance. """ +139 content = [] +140 for inc in self.includes: +141 content.append('include:' + os.path.normpath(inc)) +142 for ex in self.excludes: +143 content.append('exclude:' + os.path.normpath(ex)) +144 return ';'.join(content) +

145 +

146 - def __repr__(self): +

147 """ Returns a string representing this instance. """ +148 return self.__str__() +

149 +

150 - def scan(self): +

151 """ Abstract method which much be overriden to implement the scanning process. """ +152 raise Exception("scan method must be overriden") +

153 +154 +

155 -class FileScanner(AbstractScanner): +

156 """Scans the filesystem for files that match the selection paths. +157 +158 The scanner is configured with a root directory. Any number of include +159 and exclude paths can be added. The scan() method is a generator that +160 returns matching files one at a time when called as an iterator. +161 +162 This is a revisited implementation of the filescanner. It now relies on +163 the module pathaddition.match that implements a Ant-like regular expression matcher. +164 +165 Rules: +166 - Includes and excludes should not start with * +167 - Includes and excludes should not have wildcard searches ending with ** (e.g. wildcard**) +168 +169 Supported includes and excludes: +170 - filename.txt +171 - filename.* +172 - dir/ +173 - dir/* +174 - dir/** +175 """ +

176 - def __init__(self, root_dir): +

177 """ Initialization. """ +178 AbstractScanner.__init__(self) +179 self.root_dir = os.path.normpath(root_dir) +180 if not self.root_dir.endswith(os.sep): +181 self.root_dir = self.root_dir + os.sep +

182 # Add 1 so the final path separator is removed +183 #self.root_dirLength = len(self.root_dir) + 1 +184 +

185 - def scan(self): +

186 """ Scans the files required to zip""" +187 #paths_cache = [] +188 +189 excludescopy = self.excludes[:] +190 for f in excludescopy: +191 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))): +192 self.excludes_files.append(f) +193 self.excludes.remove(f) +194 +195 includescopy = self.includes[:] +196 for f in includescopy: +197 if os.path.exists(os.path.normpath(os.path.join(self.root_dir, f))): +198 self.includes_files.append(f) +199 self.includes.remove(f) +200 +201 LOGGER.debug('Scanning sub-root directories') +202 for root_dir in self.find_subroots(): +203 for dirpath, subdirs, files in os.walk(unicode(root_dir)): +204 subroot = dirpath[len(self.root_dir):] +205 +206 dirs_to_remove = [] +207 for subdir in subdirs: +208 if self.is_excluded(os.path.join(subroot, subdir)): +209 dirs_to_remove.append(subdir) +210 +211 for dir_remove in dirs_to_remove: +212 subdirs.remove(dir_remove) +213 +214 LOGGER.debug('Scanning directory: ' + dirpath) +215 for file_ in files: +216 path = os.path.join(subroot, file_) +217 if self.is_filetype(path) and self.is_included(path) and \ +218 self.is_selected(os.path.join(dirpath, file_)) and not self.is_excluded(path): +219 ret_path = os.path.join(dirpath, file_) +220 yield ret_path +221 +222 LOGGER.debug('Checking for empty directory: ' + dirpath) +223 # Check for including empty directories +224 if self.is_included(subroot) and not self.is_excluded(subroot): +225 if len(files) == 0 and len(subdirs) == 0: +226 yield dirpath +

227 +228 +

229 - def find_subroots(self): +

230 """Finds all the subdirectory roots based on the include paths. +231 +232 Often large archive operations define a number of archives from the root +233 of the drive. Walking the tree from the root is very time-consuming, so +234 selecting more specific subdirectory roots improves performance. +235 """ +236 def splitpath(path): +237 """ Returns the splitted path""" +238 return path.split(os.sep) +

239 +240 root_dirs = [] +241 +242 # Look for includes that start with wildcards. +243 subdirs_not_usable = False +244 for inc in self.includes + self.includes_files: +245 first_path_segment = splitpath(os.path.normpath(inc))[0] +246 if first_path_segment.find('*') != -1: +247 subdirs_not_usable = True +248 +249 # Parse all includes for sub-roots +250 if not subdirs_not_usable: +251 for inc in self.includes + self.includes_files: +252 include = None +253 LOGGER.debug("===> inc %s" % inc) +254 contains_globs = False +255 for pathcomp in splitpath(os.path.normpath(inc)): +256 if pathcomp.find('*') != -1: +257 contains_globs = True +258 break +259 else: +260 if include == None: +261 include = pathcomp +262 else: +263 include = os.path.join(include, pathcomp) +264 if not contains_globs: +265 include = os.path.dirname(include) +266 +267 LOGGER.debug("include %s" % include) +268 if include != None: +269 root_dir = os.path.normpath(os.path.join(self.root_dir, include)) +270 is_new_root = True +271 for root in root_dirs[:]: +272 if destinsrc(root, root_dir): +273 LOGGER.debug("root contains include, skip it") +274 is_new_root = False +275 break +276 if destinsrc(root_dir, root): +277 LOGGER.debug("include contains root, so remove root") +278 root_dirs.remove(root) +279 if is_new_root: +280 root_dirs.append(root_dir) +281 +282 if len(root_dirs) == 0: +283 root_dirs = [os.path.normpath(self.root_dir)] +284 LOGGER.debug('Roots = ' + str(root_dirs)) +285 return root_dirs +

286 +

287 - def __str__(self): +

288 return os.path.normpath(self.root_dir) + ';' + AbstractScanner.__str__(self) +

289 +

290 - def __repr__(self): +

291 return self.__str__() +

292 +293 +

294 -def move(src, dst): +

295 """Recursively move a file or directory to another location. +296 +297 If the destination is on our current filesystem, then simply use +298 rename. Otherwise, copy src to the dst and then remove src. +299 A lot more could be done here... A look at a mv.c shows a lot of +300 the issues this implementation glosses over. +301 +302 """ +303 try: +304 os.rename(src, dst) +305 except OSError: +306 if os.path.isdir(src): +307 if destinsrc(src, dst): +308 raise Exception, "Cannot move a directory '%s' into itself '%s'." % (src, dst) +309 shutil.copytree(src, dst, symlinks=True) +310 rmtree(src) +311 else: +312 shutil.copy2(src, dst) +313 os.unlink(src) +

314 +

315 -def rmtree(rootdir): +

316 """ Catch shutil.rmtree failures on Windows when files are read-only. Thanks Google!""" +317 if sys.platform == 'win32': +318 rootdir = os.path.normpath(rootdir) +319 if not os.path.isabs(rootdir): +320 rootdir = os.path.join(os.path.abspath('.'), rootdir) +321 if not rootdir.startswith('\\\\'): +322 rootdir = u"\\\\?\\" + rootdir +323 +324 def cb_handle_error(fcn, path, excinfo): +325 """ Error handler, removing readonly and deleting the file. """ +326 os.chmod(path, 0666) +327 if os.path.isdir(path): +328 rmdir(path) +329 elif os.path.isfile(path): +330 remove(path) +331 else: +332 fcn(path) +

333 return shutil.rmtree(rootdir, onerror=cb_handle_error) +334 +

335 -def destinsrc(src, dst): +

336 """ Fixed version of destinscr, that doesn't match dst with same root name.""" +337 if sys.platform == "win32": +338 src = src.lower() +339 dst = dst.lower() +340 src = os.path.abspath(src) +341 dst = os.path.abspath(dst) +342 if not src.endswith(os.path.sep): +343 src += os.path.sep +344 if not dst.endswith(os.path.sep): +345 dst += os.path.sep +346 return dst.startswith(src) +

347 +348 +

349 -def which(executable): +

350 """ Search for executable in the PATH.""" +351 pathlist = os.environ['PATH'].split(os.pathsep) +352 for folder in pathlist: +353 filename = os.path.join(folder, executable) +354 try: +355 status = os.stat(filename) +356 except os.error: +357 continue +358 # Check if the path is a regular file +359 if stat.S_ISREG(status[stat.ST_MODE]): +360 mode = stat.S_IMODE(status[stat.ST_MODE]) +361 if mode & 0111: +362 return os.path.normpath(filename) +363 return None +

364 +365 +

366 -def read_policy_content(filename): +

367 """ Read the policy number from the policy file. +368 strict allows to activate the new policy scanning. +369 """ +370 value = None +371 error = "" +372 try: +373 LOGGER.debug('Opening policy file: ' + filename) +374 policy_data = load_policy_content(filename) +375 match = re.match(r'^((?:\d+)|(?:0842[0-9a-zA-Z]{3}))\s*$', policy_data, re.M|re.DOTALL) +376 if match != None: +377 value = match.group(1) +378 else: +379 error = "Content of '%s' doesn't match r'^\d+|0842[0-9a-zA-Z]{3}\s*$'." % filename +380 except Exception, exc: +381 error = str(exc) +382 if value is not None: +383 return value +384 # worse case.... +385 raise Exception(error) +

386 +

387 -def load_policy_content(filename): +

388 """ Testing policy content loading. """ +389 try: +390 fileh = codecs.open(filename, 'r', 'ascii') +391 data = fileh.read() +392 fileh.close() +393 return data +394 except: +395 raise Exception("Error loading '%s' as an ASCII file." % filename) +

396 +397 ENCODING_MATRIX = { +398 codecs.BOM_UTF8: 'utf_8', +399 codecs.BOM_UTF16: 'utf_16', +400 codecs.BOM_UTF16_BE: 'utf_16_be', +401 codecs.BOM_UTF16_LE: 'utf_16_le', +402 } +403 +

404 -def guess_encoding(data): +

405 """Given a byte string, guess the encoding. +406 +407 First it tries for UTF8/UTF16 BOM. +408 +409 Next it tries the standard 'UTF8', 'ISO-8859-1', and 'cp1252' encodings, +410 Plus several gathered from locale information. +411 +412 The calling program *must* first call locale.setlocale(locale.LC_ALL, '') +413 +414 If successful it returns (decoded_unicode, successful_encoding) +415 If unsuccessful it raises a ``UnicodeError``. +416 +417 This was taken from http://www.voidspace.org.uk/python/articles/guessing_encoding.shtml +418 """ +419 for bom, enc in ENCODING_MATRIX.items(): +420 if data.startswith(bom): +421 return data.decode(enc), enc +422 encodings = ['ascii', 'UTF-8'] +423 successful_encoding = None +424 try: +425 encodings.append(locale.getlocale()[1]) +426 except (AttributeError, IndexError): +427 pass +428 try: +429 encodings.append(locale.getdefaultlocale()[1]) +430 except (AttributeError, IndexError): +431 pass +432 # latin-1 +433 encodings.append('ISO8859-1') +434 encodings.append('cp1252') +435 for enc in encodings: +436 if not enc: +437 continue +438 try: +439 decoded = unicode(data, enc) +440 successful_encoding = enc +441 break +442 except (UnicodeError, LookupError): +443 pass +444 if successful_encoding is None: +445 raise UnicodeError('Unable to decode input data. Tried the' +446 ' following encodings: %s.' % +447 ', '.join([repr(enc) for enc in encodings if enc])) +448 else: +449 if successful_encoding == 'ascii': +450 # our default ascii encoding +451 successful_encoding = 'ISO8859-1' +452 return (decoded, successful_encoding) +

453 +

454 -def getmd5(fullpath, chunk_size=2**16): +

455 """ returns the md5 value""" +456 file_handle = open(fullpath, "rb") +457 md5 = hashlib.md5() +458 while 1: +459 chunk = file_handle.read(chunk_size) +460 if not chunk: +461 break +462 md5.update(chunk) +463 file_handle.close() +464 return md5.hexdigest() +

465 +

466 -def read_symbian_policy_content(filename): +

467 """ Read the policy category from the policy file. """ +468 value = None +469 error = "" +470 try: +471 LOGGER.debug('Opening symbian policy file: ' + filename) +472 try: +473 fileh = codecs.open(filename, 'r', 'ascii') +474 except: +475 raise Exception("Error loading '%s' as an ASCII file." % filename) +476 for line in fileh: +477 match = re.match(r'^Category\s+([A-Z])\s*$', line, re.M|re.DOTALL) +478 if match != None: +479 value = match.group(1) +480 return value +481 if match == None: +482 error = "Content of '%s' doesn't match r'^Category\s+([A-Z])\s*$'." % filename +483 except Exception, exc: +484 error = str(exc) +485 if value is not None: +486 return value +487 # worse case.... +488 raise Exception(error) +

489 +490 +

491 -class LockFailedException(Exception): +

492 pass +

493 +494 if os.name == 'nt': +495 import win32file +496 import win32con +497 import winerror +498 import time +499 import pywintypes +500 import string +501 import win32api +502 import win32netcon +503 import win32wnet +504 +

505 - class Lock: +

506 """ This object implement file locking for windows. """ +507 +

508 - def __init__(self, filename): +

509 LOGGER_LOCK.debug("__init__") +510 self._filename = filename +511 self.fd = None +

512 +

513 - def lock(self, wait=False): +

514 LOGGER_LOCK.debug("lock") +515 # Open the file +516 if self.fd == None: +517 self.fd = open(self._filename, "w+") +518 wfd = win32file._get_osfhandle(self.fd.fileno()) +519 if not wait: +520 try: +521 win32file.LockFile(wfd, 0, 0, 0xffff, 0) +522 except: +523 raise LockFailedException() +524 else: +525 while True: +526 try: +527 win32file.LockFile(wfd, 0, 0, 0xffff, 0) +528 break +529 except win32file.error, exc: +530 if exc[0] != winerror.ERROR_LOCK_VIOLATION: +531 raise exc +532 LOGGER_LOCK.debug("waiting") +533 time.sleep(1) +

534 +

535 - def unlock(self): +

536 LOGGER_LOCK.debug("unlock") +537 if self.fd == None: +538 LOGGER_LOCK.debug("already unlocked") +539 return +540 wfd = win32file._get_osfhandle(self.fd.fileno()) +541 try: +542 # pylint: disable-msg=E1101 +543 win32file.UnlockFile(wfd, 0 , 0, 0xffff, 0) +544 self.fd.close() +545 self.fd = None +546 except win32file.error, exc: +547 if exc[0] != 158: +548 raise +

549 +550 +

551 - def __del__(self): +

552 LOGGER_LOCK.debug("__del__") +553 self.unlock() +

554 +555 +

556 - def get_next_free_drive(): +

557 """ Return the first free drive found else it raise an exception. """ +558 DRIVE_LABELS = sorted(list(set(string.ascii_uppercase) - set(win32api.GetLogicalDriveStrings())), reverse=True) +559 if len(DRIVE_LABELS) != 0 : +560 return DRIVE_LABELS[0] + ":" +561 raise Exception("No free drive left.") +

562 +

563 - def subst(drive, path): +

564 """ Substing path as a drive. """ +565 path = os.path.normpath(path) +566 p = subprocess.Popen("subst %s %s" % (drive, path), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) +567 errmsg = p.communicate()[0] +568 if p.returncode != 0: +569 raise Exception("Error substing '%s' under '%s': %s" % (path, drive, errmsg)) +

570 +

571 - def unsubst(drive): +

572 """ Unsubsting the drive. """ +573 p = subprocess.Popen("subst /D %s" % (drive), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) +574 errmsg = p.communicate()[0] +575 if p.returncode != 0: +576 raise Exception("Error unsubsting '%s': %s" % (drive, errmsg)) +

577 +

578 - def rmdir(path): +

579 """ Catch os.rmdir failures on Windows when path is too long (more than 256 chars).""" +580 path = win32api.GetShortPathName(path) +581 win32file.RemoveDirectory(path) +

582 +

583 - def remove(filename): +

584 """ Catch os.rmdir failures on Windows when path is too long (more than 256 chars).""" +585 filename = win32api.GetShortPathName(filename) +586 filename = filename.lstrip("\\\\?\\") +587 os.remove(filename) +

588 +

589 - def mount(drive, unc, username=None, password=None, persistent=False): +

590 """ Windows helper function to map a network drive. """ +591 flags = 0 +592 if persistent: +593 flags = win32netcon.CONNECT_UPDATE_PROFILE +594 win32wnet.WNetAddConnection2(win32netcon.RESOURCETYPE_DISK, drive, unc, None, username, password, flags) +

595 +596 +

597 - def umount(drive): +

598 """ Windows helper function to map a network drive. """ +599 drive_type = win32file.GetDriveType(drive) +600 if drive_type == win32con.DRIVE_REMOTE: +601 win32wnet.WNetCancelConnection2(drive, win32netcon.CONNECT_UPDATE_PROFILE, 1) +602 else: +603 raise Exception("%s couldn't be umount." % drive) +

604 +605 else: +

606 - def rmdir(path): +

607 return os.rmdir(path) +

608 +

609 - def remove(path): +

610 return os.remove(path) +

611 +

612 - class Lock: +

613 - def __init__(self, filename): +

614 pass +

615 - def lock(self, wait=False): +

616 pass +

617 - def unlock(self): +

618 pass +

619 +

620 -def touch(srcdir): +

621 """ +622 Recursively touches all the files in the source path mentioned. +623 It does not touch the directories. +624 """ +625 srcnames = os.listdir(srcdir) +626 for name in srcnames: +627 srcfname = os.path.join(srcdir, name) +628 if os.path.isdir(srcfname): +629 touch(srcfname) +630 else: +631 if os.path.exists(srcfname): +632 os.utime(srcfname, None) +

633 +

Source Code for Module fileutils