python-2.5.2/win32/Lib/distutils/text_file.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 """text_file
       
     2 
       
     3 provides the TextFile class, which gives an interface to text files
       
     4 that (optionally) takes care of stripping comments, ignoring blank
       
     5 lines, and joining lines with backslashes."""
       
     6 
       
     7 __revision__ = "$Id: text_file.py 29687 2002-11-14 02:25:42Z akuchling $"
       
     8 
       
     9 from types import *
       
    10 import sys, os, string
       
    11 
       
    12 
       
    13 class TextFile:
       
    14 
       
    15     """Provides a file-like object that takes care of all the things you
       
    16        commonly want to do when processing a text file that has some
       
    17        line-by-line syntax: strip comments (as long as "#" is your
       
    18        comment character), skip blank lines, join adjacent lines by
       
    19        escaping the newline (ie. backslash at end of line), strip
       
    20        leading and/or trailing whitespace.  All of these are optional
       
    21        and independently controllable.
       
    22 
       
    23        Provides a 'warn()' method so you can generate warning messages that
       
    24        report physical line number, even if the logical line in question
       
    25        spans multiple physical lines.  Also provides 'unreadline()' for
       
    26        implementing line-at-a-time lookahead.
       
    27 
       
    28        Constructor is called as:
       
    29 
       
    30            TextFile (filename=None, file=None, **options)
       
    31 
       
    32        It bombs (RuntimeError) if both 'filename' and 'file' are None;
       
    33        'filename' should be a string, and 'file' a file object (or
       
    34        something that provides 'readline()' and 'close()' methods).  It is
       
    35        recommended that you supply at least 'filename', so that TextFile
       
    36        can include it in warning messages.  If 'file' is not supplied,
       
    37        TextFile creates its own using the 'open()' builtin.
       
    38 
       
    39        The options are all boolean, and affect the value returned by
       
    40        'readline()':
       
    41          strip_comments [default: true]
       
    42            strip from "#" to end-of-line, as well as any whitespace
       
    43            leading up to the "#" -- unless it is escaped by a backslash
       
    44          lstrip_ws [default: false]
       
    45            strip leading whitespace from each line before returning it
       
    46          rstrip_ws [default: true]
       
    47            strip trailing whitespace (including line terminator!) from
       
    48            each line before returning it
       
    49          skip_blanks [default: true}
       
    50            skip lines that are empty *after* stripping comments and
       
    51            whitespace.  (If both lstrip_ws and rstrip_ws are false,
       
    52            then some lines may consist of solely whitespace: these will
       
    53            *not* be skipped, even if 'skip_blanks' is true.)
       
    54          join_lines [default: false]
       
    55            if a backslash is the last non-newline character on a line
       
    56            after stripping comments and whitespace, join the following line
       
    57            to it to form one "logical line"; if N consecutive lines end
       
    58            with a backslash, then N+1 physical lines will be joined to
       
    59            form one logical line.
       
    60          collapse_join [default: false]
       
    61            strip leading whitespace from lines that are joined to their
       
    62            predecessor; only matters if (join_lines and not lstrip_ws)
       
    63 
       
    64        Note that since 'rstrip_ws' can strip the trailing newline, the
       
    65        semantics of 'readline()' must differ from those of the builtin file
       
    66        object's 'readline()' method!  In particular, 'readline()' returns
       
    67        None for end-of-file: an empty string might just be a blank line (or
       
    68        an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
       
    69        not."""
       
    70 
       
    71     default_options = { 'strip_comments': 1,
       
    72                         'skip_blanks':    1,
       
    73                         'lstrip_ws':      0,
       
    74                         'rstrip_ws':      1,
       
    75                         'join_lines':     0,
       
    76                         'collapse_join':  0,
       
    77                       }
       
    78 
       
    79     def __init__ (self, filename=None, file=None, **options):
       
    80         """Construct a new TextFile object.  At least one of 'filename'
       
    81            (a string) and 'file' (a file-like object) must be supplied.
       
    82            They keyword argument options are described above and affect
       
    83            the values returned by 'readline()'."""
       
    84 
       
    85         if filename is None and file is None:
       
    86             raise RuntimeError, \
       
    87                   "you must supply either or both of 'filename' and 'file'"
       
    88 
       
    89         # set values for all options -- either from client option hash
       
    90         # or fallback to default_options
       
    91         for opt in self.default_options.keys():
       
    92             if options.has_key (opt):
       
    93                 setattr (self, opt, options[opt])
       
    94 
       
    95             else:
       
    96                 setattr (self, opt, self.default_options[opt])
       
    97 
       
    98         # sanity check client option hash
       
    99         for opt in options.keys():
       
   100             if not self.default_options.has_key (opt):
       
   101                 raise KeyError, "invalid TextFile option '%s'" % opt
       
   102 
       
   103         if file is None:
       
   104             self.open (filename)
       
   105         else:
       
   106             self.filename = filename
       
   107             self.file = file
       
   108             self.current_line = 0       # assuming that file is at BOF!
       
   109 
       
   110         # 'linebuf' is a stack of lines that will be emptied before we
       
   111         # actually read from the file; it's only populated by an
       
   112         # 'unreadline()' operation
       
   113         self.linebuf = []
       
   114 
       
   115 
       
   116     def open (self, filename):
       
   117         """Open a new file named 'filename'.  This overrides both the
       
   118            'filename' and 'file' arguments to the constructor."""
       
   119 
       
   120         self.filename = filename
       
   121         self.file = open (self.filename, 'r')
       
   122         self.current_line = 0
       
   123 
       
   124 
       
   125     def close (self):
       
   126         """Close the current file and forget everything we know about it
       
   127            (filename, current line number)."""
       
   128 
       
   129         self.file.close ()
       
   130         self.file = None
       
   131         self.filename = None
       
   132         self.current_line = None
       
   133 
       
   134 
       
   135     def gen_error (self, msg, line=None):
       
   136         outmsg = []
       
   137         if line is None:
       
   138             line = self.current_line
       
   139         outmsg.append(self.filename + ", ")
       
   140         if type (line) in (ListType, TupleType):
       
   141             outmsg.append("lines %d-%d: " % tuple (line))
       
   142         else:
       
   143             outmsg.append("line %d: " % line)
       
   144         outmsg.append(str(msg))
       
   145         return string.join(outmsg, "")
       
   146 
       
   147 
       
   148     def error (self, msg, line=None):
       
   149         raise ValueError, "error: " + self.gen_error(msg, line)
       
   150 
       
   151     def warn (self, msg, line=None):
       
   152         """Print (to stderr) a warning message tied to the current logical
       
   153            line in the current file.  If the current logical line in the
       
   154            file spans multiple physical lines, the warning refers to the
       
   155            whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
       
   156            the current line number; it may be a list or tuple to indicate a
       
   157            range of physical lines, or an integer for a single physical
       
   158            line."""
       
   159         sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
       
   160 
       
   161 
       
   162     def readline (self):
       
   163         """Read and return a single logical line from the current file (or
       
   164            from an internal buffer if lines have previously been "unread"
       
   165            with 'unreadline()').  If the 'join_lines' option is true, this
       
   166            may involve reading multiple physical lines concatenated into a
       
   167            single string.  Updates the current line number, so calling
       
   168            'warn()' after 'readline()' emits a warning about the physical
       
   169            line(s) just read.  Returns None on end-of-file, since the empty
       
   170            string can occur if 'rstrip_ws' is true but 'strip_blanks' is
       
   171            not."""
       
   172 
       
   173         # If any "unread" lines waiting in 'linebuf', return the top
       
   174         # one.  (We don't actually buffer read-ahead data -- lines only
       
   175         # get put in 'linebuf' if the client explicitly does an
       
   176         # 'unreadline()'.
       
   177         if self.linebuf:
       
   178             line = self.linebuf[-1]
       
   179             del self.linebuf[-1]
       
   180             return line
       
   181 
       
   182         buildup_line = ''
       
   183 
       
   184         while 1:
       
   185             # read the line, make it None if EOF
       
   186             line = self.file.readline()
       
   187             if line == '': line = None
       
   188 
       
   189             if self.strip_comments and line:
       
   190 
       
   191                 # Look for the first "#" in the line.  If none, never
       
   192                 # mind.  If we find one and it's the first character, or
       
   193                 # is not preceded by "\", then it starts a comment --
       
   194                 # strip the comment, strip whitespace before it, and
       
   195                 # carry on.  Otherwise, it's just an escaped "#", so
       
   196                 # unescape it (and any other escaped "#"'s that might be
       
   197                 # lurking in there) and otherwise leave the line alone.
       
   198 
       
   199                 pos = string.find (line, "#")
       
   200                 if pos == -1:           # no "#" -- no comments
       
   201                     pass
       
   202 
       
   203                 # It's definitely a comment -- either "#" is the first
       
   204                 # character, or it's elsewhere and unescaped.
       
   205                 elif pos == 0 or line[pos-1] != "\\":
       
   206                     # Have to preserve the trailing newline, because it's
       
   207                     # the job of a later step (rstrip_ws) to remove it --
       
   208                     # and if rstrip_ws is false, we'd better preserve it!
       
   209                     # (NB. this means that if the final line is all comment
       
   210                     # and has no trailing newline, we will think that it's
       
   211                     # EOF; I think that's OK.)
       
   212                     eol = (line[-1] == '\n') and '\n' or ''
       
   213                     line = line[0:pos] + eol
       
   214 
       
   215                     # If all that's left is whitespace, then skip line
       
   216                     # *now*, before we try to join it to 'buildup_line' --
       
   217                     # that way constructs like
       
   218                     #   hello \\
       
   219                     #   # comment that should be ignored
       
   220                     #   there
       
   221                     # result in "hello there".
       
   222                     if string.strip(line) == "":
       
   223                         continue
       
   224 
       
   225                 else:                   # it's an escaped "#"
       
   226                     line = string.replace (line, "\\#", "#")
       
   227 
       
   228 
       
   229             # did previous line end with a backslash? then accumulate
       
   230             if self.join_lines and buildup_line:
       
   231                 # oops: end of file
       
   232                 if line is None:
       
   233                     self.warn ("continuation line immediately precedes "
       
   234                                "end-of-file")
       
   235                     return buildup_line
       
   236 
       
   237                 if self.collapse_join:
       
   238                     line = string.lstrip (line)
       
   239                 line = buildup_line + line
       
   240 
       
   241                 # careful: pay attention to line number when incrementing it
       
   242                 if type (self.current_line) is ListType:
       
   243                     self.current_line[1] = self.current_line[1] + 1
       
   244                 else:
       
   245                     self.current_line = [self.current_line,
       
   246                                          self.current_line+1]
       
   247             # just an ordinary line, read it as usual
       
   248             else:
       
   249                 if line is None:        # eof
       
   250                     return None
       
   251 
       
   252                 # still have to be careful about incrementing the line number!
       
   253                 if type (self.current_line) is ListType:
       
   254                     self.current_line = self.current_line[1] + 1
       
   255                 else:
       
   256                     self.current_line = self.current_line + 1
       
   257 
       
   258 
       
   259             # strip whitespace however the client wants (leading and
       
   260             # trailing, or one or the other, or neither)
       
   261             if self.lstrip_ws and self.rstrip_ws:
       
   262                 line = string.strip (line)
       
   263             elif self.lstrip_ws:
       
   264                 line = string.lstrip (line)
       
   265             elif self.rstrip_ws:
       
   266                 line = string.rstrip (line)
       
   267 
       
   268             # blank line (whether we rstrip'ed or not)? skip to next line
       
   269             # if appropriate
       
   270             if (line == '' or line == '\n') and self.skip_blanks:
       
   271                 continue
       
   272 
       
   273             if self.join_lines:
       
   274                 if line[-1] == '\\':
       
   275                     buildup_line = line[:-1]
       
   276                     continue
       
   277 
       
   278                 if line[-2:] == '\\\n':
       
   279                     buildup_line = line[0:-2] + '\n'
       
   280                     continue
       
   281 
       
   282             # well, I guess there's some actual content there: return it
       
   283             return line
       
   284 
       
   285     # readline ()
       
   286 
       
   287 
       
   288     def readlines (self):
       
   289         """Read and return the list of all logical lines remaining in the
       
   290            current file."""
       
   291 
       
   292         lines = []
       
   293         while 1:
       
   294             line = self.readline()
       
   295             if line is None:
       
   296                 return lines
       
   297             lines.append (line)
       
   298 
       
   299 
       
   300     def unreadline (self, line):
       
   301         """Push 'line' (a string) onto an internal buffer that will be
       
   302            checked by future 'readline()' calls.  Handy for implementing
       
   303            a parser with line-at-a-time lookahead."""
       
   304 
       
   305         self.linebuf.append (line)
       
   306 
       
   307 
       
   308 if __name__ == "__main__":
       
   309     test_data = """# test file
       
   310 
       
   311 line 3 \\
       
   312 # intervening comment
       
   313   continues on next line
       
   314 """
       
   315     # result 1: no fancy options
       
   316     result1 = map (lambda x: x + "\n", string.split (test_data, "\n")[0:-1])
       
   317 
       
   318     # result 2: just strip comments
       
   319     result2 = ["\n",
       
   320                "line 3 \\\n",
       
   321                "  continues on next line\n"]
       
   322 
       
   323     # result 3: just strip blank lines
       
   324     result3 = ["# test file\n",
       
   325                "line 3 \\\n",
       
   326                "# intervening comment\n",
       
   327                "  continues on next line\n"]
       
   328 
       
   329     # result 4: default, strip comments, blank lines, and trailing whitespace
       
   330     result4 = ["line 3 \\",
       
   331                "  continues on next line"]
       
   332 
       
   333     # result 5: strip comments and blanks, plus join lines (but don't
       
   334     # "collapse" joined lines
       
   335     result5 = ["line 3   continues on next line"]
       
   336 
       
   337     # result 6: strip comments and blanks, plus join lines (and
       
   338     # "collapse" joined lines
       
   339     result6 = ["line 3 continues on next line"]
       
   340 
       
   341     def test_input (count, description, file, expected_result):
       
   342         result = file.readlines ()
       
   343         # result = string.join (result, '')
       
   344         if result == expected_result:
       
   345             print "ok %d (%s)" % (count, description)
       
   346         else:
       
   347             print "not ok %d (%s):" % (count, description)
       
   348             print "** expected:"
       
   349             print expected_result
       
   350             print "** received:"
       
   351             print result
       
   352 
       
   353 
       
   354     filename = "test.txt"
       
   355     out_file = open (filename, "w")
       
   356     out_file.write (test_data)
       
   357     out_file.close ()
       
   358 
       
   359     in_file = TextFile (filename, strip_comments=0, skip_blanks=0,
       
   360                         lstrip_ws=0, rstrip_ws=0)
       
   361     test_input (1, "no processing", in_file, result1)
       
   362 
       
   363     in_file = TextFile (filename, strip_comments=1, skip_blanks=0,
       
   364                         lstrip_ws=0, rstrip_ws=0)
       
   365     test_input (2, "strip comments", in_file, result2)
       
   366 
       
   367     in_file = TextFile (filename, strip_comments=0, skip_blanks=1,
       
   368                         lstrip_ws=0, rstrip_ws=0)
       
   369     test_input (3, "strip blanks", in_file, result3)
       
   370 
       
   371     in_file = TextFile (filename)
       
   372     test_input (4, "default processing", in_file, result4)
       
   373 
       
   374     in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
       
   375                         join_lines=1, rstrip_ws=1)
       
   376     test_input (5, "join lines without collapsing", in_file, result5)
       
   377 
       
   378     in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
       
   379                         join_lines=1, rstrip_ws=1, collapse_join=1)
       
   380     test_input (6, "join lines with collapsing", in_file, result6)
       
   381 
       
   382     os.remove (filename)