|
1 """text_file |
|
2 |
|
3 provides the TextFile class, which gives an interface to text files |
|
4 that (optionally) takes care of stripping comments, ignoring blank |
|
5 lines, and joining lines with backslashes.""" |
|
6 |
|
7 __revision__ = "$Id: text_file.py 29687 2002-11-14 02:25:42Z akuchling $" |
|
8 |
|
9 from types import * |
|
10 import sys, os, string |
|
11 |
|
12 |
|
13 class TextFile: |
|
14 |
|
15 """Provides a file-like object that takes care of all the things you |
|
16 commonly want to do when processing a text file that has some |
|
17 line-by-line syntax: strip comments (as long as "#" is your |
|
18 comment character), skip blank lines, join adjacent lines by |
|
19 escaping the newline (ie. backslash at end of line), strip |
|
20 leading and/or trailing whitespace. All of these are optional |
|
21 and independently controllable. |
|
22 |
|
23 Provides a 'warn()' method so you can generate warning messages that |
|
24 report physical line number, even if the logical line in question |
|
25 spans multiple physical lines. Also provides 'unreadline()' for |
|
26 implementing line-at-a-time lookahead. |
|
27 |
|
28 Constructor is called as: |
|
29 |
|
30 TextFile (filename=None, file=None, **options) |
|
31 |
|
32 It bombs (RuntimeError) if both 'filename' and 'file' are None; |
|
33 'filename' should be a string, and 'file' a file object (or |
|
34 something that provides 'readline()' and 'close()' methods). It is |
|
35 recommended that you supply at least 'filename', so that TextFile |
|
36 can include it in warning messages. If 'file' is not supplied, |
|
37 TextFile creates its own using the 'open()' builtin. |
|
38 |
|
39 The options are all boolean, and affect the value returned by |
|
40 'readline()': |
|
41 strip_comments [default: true] |
|
42 strip from "#" to end-of-line, as well as any whitespace |
|
43 leading up to the "#" -- unless it is escaped by a backslash |
|
44 lstrip_ws [default: false] |
|
45 strip leading whitespace from each line before returning it |
|
46 rstrip_ws [default: true] |
|
47 strip trailing whitespace (including line terminator!) from |
|
48 each line before returning it |
|
49 skip_blanks [default: true} |
|
50 skip lines that are empty *after* stripping comments and |
|
51 whitespace. (If both lstrip_ws and rstrip_ws are false, |
|
52 then some lines may consist of solely whitespace: these will |
|
53 *not* be skipped, even if 'skip_blanks' is true.) |
|
54 join_lines [default: false] |
|
55 if a backslash is the last non-newline character on a line |
|
56 after stripping comments and whitespace, join the following line |
|
57 to it to form one "logical line"; if N consecutive lines end |
|
58 with a backslash, then N+1 physical lines will be joined to |
|
59 form one logical line. |
|
60 collapse_join [default: false] |
|
61 strip leading whitespace from lines that are joined to their |
|
62 predecessor; only matters if (join_lines and not lstrip_ws) |
|
63 |
|
64 Note that since 'rstrip_ws' can strip the trailing newline, the |
|
65 semantics of 'readline()' must differ from those of the builtin file |
|
66 object's 'readline()' method! In particular, 'readline()' returns |
|
67 None for end-of-file: an empty string might just be a blank line (or |
|
68 an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is |
|
69 not.""" |
|
70 |
|
71 default_options = { 'strip_comments': 1, |
|
72 'skip_blanks': 1, |
|
73 'lstrip_ws': 0, |
|
74 'rstrip_ws': 1, |
|
75 'join_lines': 0, |
|
76 'collapse_join': 0, |
|
77 } |
|
78 |
|
79 def __init__ (self, filename=None, file=None, **options): |
|
80 """Construct a new TextFile object. At least one of 'filename' |
|
81 (a string) and 'file' (a file-like object) must be supplied. |
|
82 They keyword argument options are described above and affect |
|
83 the values returned by 'readline()'.""" |
|
84 |
|
85 if filename is None and file is None: |
|
86 raise RuntimeError, \ |
|
87 "you must supply either or both of 'filename' and 'file'" |
|
88 |
|
89 # set values for all options -- either from client option hash |
|
90 # or fallback to default_options |
|
91 for opt in self.default_options.keys(): |
|
92 if options.has_key (opt): |
|
93 setattr (self, opt, options[opt]) |
|
94 |
|
95 else: |
|
96 setattr (self, opt, self.default_options[opt]) |
|
97 |
|
98 # sanity check client option hash |
|
99 for opt in options.keys(): |
|
100 if not self.default_options.has_key (opt): |
|
101 raise KeyError, "invalid TextFile option '%s'" % opt |
|
102 |
|
103 if file is None: |
|
104 self.open (filename) |
|
105 else: |
|
106 self.filename = filename |
|
107 self.file = file |
|
108 self.current_line = 0 # assuming that file is at BOF! |
|
109 |
|
110 # 'linebuf' is a stack of lines that will be emptied before we |
|
111 # actually read from the file; it's only populated by an |
|
112 # 'unreadline()' operation |
|
113 self.linebuf = [] |
|
114 |
|
115 |
|
116 def open (self, filename): |
|
117 """Open a new file named 'filename'. This overrides both the |
|
118 'filename' and 'file' arguments to the constructor.""" |
|
119 |
|
120 self.filename = filename |
|
121 self.file = open (self.filename, 'r') |
|
122 self.current_line = 0 |
|
123 |
|
124 |
|
125 def close (self): |
|
126 """Close the current file and forget everything we know about it |
|
127 (filename, current line number).""" |
|
128 |
|
129 self.file.close () |
|
130 self.file = None |
|
131 self.filename = None |
|
132 self.current_line = None |
|
133 |
|
134 |
|
135 def gen_error (self, msg, line=None): |
|
136 outmsg = [] |
|
137 if line is None: |
|
138 line = self.current_line |
|
139 outmsg.append(self.filename + ", ") |
|
140 if type (line) in (ListType, TupleType): |
|
141 outmsg.append("lines %d-%d: " % tuple (line)) |
|
142 else: |
|
143 outmsg.append("line %d: " % line) |
|
144 outmsg.append(str(msg)) |
|
145 return string.join(outmsg, "") |
|
146 |
|
147 |
|
148 def error (self, msg, line=None): |
|
149 raise ValueError, "error: " + self.gen_error(msg, line) |
|
150 |
|
151 def warn (self, msg, line=None): |
|
152 """Print (to stderr) a warning message tied to the current logical |
|
153 line in the current file. If the current logical line in the |
|
154 file spans multiple physical lines, the warning refers to the |
|
155 whole range, eg. "lines 3-5". If 'line' supplied, it overrides |
|
156 the current line number; it may be a list or tuple to indicate a |
|
157 range of physical lines, or an integer for a single physical |
|
158 line.""" |
|
159 sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n") |
|
160 |
|
161 |
|
162 def readline (self): |
|
163 """Read and return a single logical line from the current file (or |
|
164 from an internal buffer if lines have previously been "unread" |
|
165 with 'unreadline()'). If the 'join_lines' option is true, this |
|
166 may involve reading multiple physical lines concatenated into a |
|
167 single string. Updates the current line number, so calling |
|
168 'warn()' after 'readline()' emits a warning about the physical |
|
169 line(s) just read. Returns None on end-of-file, since the empty |
|
170 string can occur if 'rstrip_ws' is true but 'strip_blanks' is |
|
171 not.""" |
|
172 |
|
173 # If any "unread" lines waiting in 'linebuf', return the top |
|
174 # one. (We don't actually buffer read-ahead data -- lines only |
|
175 # get put in 'linebuf' if the client explicitly does an |
|
176 # 'unreadline()'. |
|
177 if self.linebuf: |
|
178 line = self.linebuf[-1] |
|
179 del self.linebuf[-1] |
|
180 return line |
|
181 |
|
182 buildup_line = '' |
|
183 |
|
184 while 1: |
|
185 # read the line, make it None if EOF |
|
186 line = self.file.readline() |
|
187 if line == '': line = None |
|
188 |
|
189 if self.strip_comments and line: |
|
190 |
|
191 # Look for the first "#" in the line. If none, never |
|
192 # mind. If we find one and it's the first character, or |
|
193 # is not preceded by "\", then it starts a comment -- |
|
194 # strip the comment, strip whitespace before it, and |
|
195 # carry on. Otherwise, it's just an escaped "#", so |
|
196 # unescape it (and any other escaped "#"'s that might be |
|
197 # lurking in there) and otherwise leave the line alone. |
|
198 |
|
199 pos = string.find (line, "#") |
|
200 if pos == -1: # no "#" -- no comments |
|
201 pass |
|
202 |
|
203 # It's definitely a comment -- either "#" is the first |
|
204 # character, or it's elsewhere and unescaped. |
|
205 elif pos == 0 or line[pos-1] != "\\": |
|
206 # Have to preserve the trailing newline, because it's |
|
207 # the job of a later step (rstrip_ws) to remove it -- |
|
208 # and if rstrip_ws is false, we'd better preserve it! |
|
209 # (NB. this means that if the final line is all comment |
|
210 # and has no trailing newline, we will think that it's |
|
211 # EOF; I think that's OK.) |
|
212 eol = (line[-1] == '\n') and '\n' or '' |
|
213 line = line[0:pos] + eol |
|
214 |
|
215 # If all that's left is whitespace, then skip line |
|
216 # *now*, before we try to join it to 'buildup_line' -- |
|
217 # that way constructs like |
|
218 # hello \\ |
|
219 # # comment that should be ignored |
|
220 # there |
|
221 # result in "hello there". |
|
222 if string.strip(line) == "": |
|
223 continue |
|
224 |
|
225 else: # it's an escaped "#" |
|
226 line = string.replace (line, "\\#", "#") |
|
227 |
|
228 |
|
229 # did previous line end with a backslash? then accumulate |
|
230 if self.join_lines and buildup_line: |
|
231 # oops: end of file |
|
232 if line is None: |
|
233 self.warn ("continuation line immediately precedes " |
|
234 "end-of-file") |
|
235 return buildup_line |
|
236 |
|
237 if self.collapse_join: |
|
238 line = string.lstrip (line) |
|
239 line = buildup_line + line |
|
240 |
|
241 # careful: pay attention to line number when incrementing it |
|
242 if type (self.current_line) is ListType: |
|
243 self.current_line[1] = self.current_line[1] + 1 |
|
244 else: |
|
245 self.current_line = [self.current_line, |
|
246 self.current_line+1] |
|
247 # just an ordinary line, read it as usual |
|
248 else: |
|
249 if line is None: # eof |
|
250 return None |
|
251 |
|
252 # still have to be careful about incrementing the line number! |
|
253 if type (self.current_line) is ListType: |
|
254 self.current_line = self.current_line[1] + 1 |
|
255 else: |
|
256 self.current_line = self.current_line + 1 |
|
257 |
|
258 |
|
259 # strip whitespace however the client wants (leading and |
|
260 # trailing, or one or the other, or neither) |
|
261 if self.lstrip_ws and self.rstrip_ws: |
|
262 line = string.strip (line) |
|
263 elif self.lstrip_ws: |
|
264 line = string.lstrip (line) |
|
265 elif self.rstrip_ws: |
|
266 line = string.rstrip (line) |
|
267 |
|
268 # blank line (whether we rstrip'ed or not)? skip to next line |
|
269 # if appropriate |
|
270 if (line == '' or line == '\n') and self.skip_blanks: |
|
271 continue |
|
272 |
|
273 if self.join_lines: |
|
274 if line[-1] == '\\': |
|
275 buildup_line = line[:-1] |
|
276 continue |
|
277 |
|
278 if line[-2:] == '\\\n': |
|
279 buildup_line = line[0:-2] + '\n' |
|
280 continue |
|
281 |
|
282 # well, I guess there's some actual content there: return it |
|
283 return line |
|
284 |
|
285 # readline () |
|
286 |
|
287 |
|
288 def readlines (self): |
|
289 """Read and return the list of all logical lines remaining in the |
|
290 current file.""" |
|
291 |
|
292 lines = [] |
|
293 while 1: |
|
294 line = self.readline() |
|
295 if line is None: |
|
296 return lines |
|
297 lines.append (line) |
|
298 |
|
299 |
|
300 def unreadline (self, line): |
|
301 """Push 'line' (a string) onto an internal buffer that will be |
|
302 checked by future 'readline()' calls. Handy for implementing |
|
303 a parser with line-at-a-time lookahead.""" |
|
304 |
|
305 self.linebuf.append (line) |
|
306 |
|
307 |
|
308 if __name__ == "__main__": |
|
309 test_data = """# test file |
|
310 |
|
311 line 3 \\ |
|
312 # intervening comment |
|
313 continues on next line |
|
314 """ |
|
315 # result 1: no fancy options |
|
316 result1 = map (lambda x: x + "\n", string.split (test_data, "\n")[0:-1]) |
|
317 |
|
318 # result 2: just strip comments |
|
319 result2 = ["\n", |
|
320 "line 3 \\\n", |
|
321 " continues on next line\n"] |
|
322 |
|
323 # result 3: just strip blank lines |
|
324 result3 = ["# test file\n", |
|
325 "line 3 \\\n", |
|
326 "# intervening comment\n", |
|
327 " continues on next line\n"] |
|
328 |
|
329 # result 4: default, strip comments, blank lines, and trailing whitespace |
|
330 result4 = ["line 3 \\", |
|
331 " continues on next line"] |
|
332 |
|
333 # result 5: strip comments and blanks, plus join lines (but don't |
|
334 # "collapse" joined lines |
|
335 result5 = ["line 3 continues on next line"] |
|
336 |
|
337 # result 6: strip comments and blanks, plus join lines (and |
|
338 # "collapse" joined lines |
|
339 result6 = ["line 3 continues on next line"] |
|
340 |
|
341 def test_input (count, description, file, expected_result): |
|
342 result = file.readlines () |
|
343 # result = string.join (result, '') |
|
344 if result == expected_result: |
|
345 print "ok %d (%s)" % (count, description) |
|
346 else: |
|
347 print "not ok %d (%s):" % (count, description) |
|
348 print "** expected:" |
|
349 print expected_result |
|
350 print "** received:" |
|
351 print result |
|
352 |
|
353 |
|
354 filename = "test.txt" |
|
355 out_file = open (filename, "w") |
|
356 out_file.write (test_data) |
|
357 out_file.close () |
|
358 |
|
359 in_file = TextFile (filename, strip_comments=0, skip_blanks=0, |
|
360 lstrip_ws=0, rstrip_ws=0) |
|
361 test_input (1, "no processing", in_file, result1) |
|
362 |
|
363 in_file = TextFile (filename, strip_comments=1, skip_blanks=0, |
|
364 lstrip_ws=0, rstrip_ws=0) |
|
365 test_input (2, "strip comments", in_file, result2) |
|
366 |
|
367 in_file = TextFile (filename, strip_comments=0, skip_blanks=1, |
|
368 lstrip_ws=0, rstrip_ws=0) |
|
369 test_input (3, "strip blanks", in_file, result3) |
|
370 |
|
371 in_file = TextFile (filename) |
|
372 test_input (4, "default processing", in_file, result4) |
|
373 |
|
374 in_file = TextFile (filename, strip_comments=1, skip_blanks=1, |
|
375 join_lines=1, rstrip_ws=1) |
|
376 test_input (5, "join lines without collapsing", in_file, result5) |
|
377 |
|
378 in_file = TextFile (filename, strip_comments=1, skip_blanks=1, |
|
379 join_lines=1, rstrip_ws=1, collapse_join=1) |
|
380 test_input (6, "join lines with collapsing", in_file, result6) |
|
381 |
|
382 os.remove (filename) |