|
1 """Helper class to quickly write a loop over all standard input files. |
|
2 |
|
3 Typical use is: |
|
4 |
|
5 import fileinput |
|
6 for line in fileinput.input(): |
|
7 process(line) |
|
8 |
|
9 This iterates over the lines of all files listed in sys.argv[1:], |
|
10 defaulting to sys.stdin if the list is empty. If a filename is '-' it |
|
11 is also replaced by sys.stdin. To specify an alternative list of |
|
12 filenames, pass it as the argument to input(). A single file name is |
|
13 also allowed. |
|
14 |
|
15 Functions filename(), lineno() return the filename and cumulative line |
|
16 number of the line that has just been read; filelineno() returns its |
|
17 line number in the current file; isfirstline() returns true iff the |
|
18 line just read is the first line of its file; isstdin() returns true |
|
19 iff the line was read from sys.stdin. Function nextfile() closes the |
|
20 current file so that the next iteration will read the first line from |
|
21 the next file (if any); lines not read from the file will not count |
|
22 towards the cumulative line count; the filename is not changed until |
|
23 after the first line of the next file has been read. Function close() |
|
24 closes the sequence. |
|
25 |
|
26 Before any lines have been read, filename() returns None and both line |
|
27 numbers are zero; nextfile() has no effect. After all lines have been |
|
28 read, filename() and the line number functions return the values |
|
29 pertaining to the last line read; nextfile() has no effect. |
|
30 |
|
31 All files are opened in text mode by default, you can override this by |
|
32 setting the mode parameter to input() or FileInput.__init__(). |
|
33 If an I/O error occurs during opening or reading a file, the IOError |
|
34 exception is raised. |
|
35 |
|
36 If sys.stdin is used more than once, the second and further use will |
|
37 return no lines, except perhaps for interactive use, or if it has been |
|
38 explicitly reset (e.g. using sys.stdin.seek(0)). |
|
39 |
|
40 Empty files are opened and immediately closed; the only time their |
|
41 presence in the list of filenames is noticeable at all is when the |
|
42 last file opened is empty. |
|
43 |
|
44 It is possible that the last line of a file doesn't end in a newline |
|
45 character; otherwise lines are returned including the trailing |
|
46 newline. |
|
47 |
|
48 Class FileInput is the implementation; its methods filename(), |
|
49 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() |
|
50 correspond to the functions in the module. In addition it has a |
|
51 readline() method which returns the next input line, and a |
|
52 __getitem__() method which implements the sequence behavior. The |
|
53 sequence must be accessed in strictly sequential order; sequence |
|
54 access and readline() cannot be mixed. |
|
55 |
|
56 Optional in-place filtering: if the keyword argument inplace=1 is |
|
57 passed to input() or to the FileInput constructor, the file is moved |
|
58 to a backup file and standard output is directed to the input file. |
|
59 This makes it possible to write a filter that rewrites its input file |
|
60 in place. If the keyword argument backup=".<some extension>" is also |
|
61 given, it specifies the extension for the backup file, and the backup |
|
62 file remains around; by default, the extension is ".bak" and it is |
|
63 deleted when the output file is closed. In-place filtering is |
|
64 disabled when standard input is read. XXX The current implementation |
|
65 does not work for MS-DOS 8+3 filesystems. |
|
66 |
|
67 Performance: this module is unfortunately one of the slower ways of |
|
68 processing large numbers of input lines. Nevertheless, a significant |
|
69 speed-up has been obtained by using readlines(bufsize) instead of |
|
70 readline(). A new keyword argument, bufsize=N, is present on the |
|
71 input() function and the FileInput() class to override the default |
|
72 buffer size. |
|
73 |
|
74 XXX Possible additions: |
|
75 |
|
76 - optional getopt argument processing |
|
77 - isatty() |
|
78 - read(), read(size), even readlines() |
|
79 |
|
80 """ |
|
81 |
|
82 import sys, os |
|
83 |
|
84 __all__ = ["input","close","nextfile","filename","lineno","filelineno", |
|
85 "isfirstline","isstdin","FileInput"] |
|
86 |
|
87 _state = None |
|
88 |
|
89 DEFAULT_BUFSIZE = 8*1024 |
|
90 |
|
91 def input(files=None, inplace=0, backup="", bufsize=0, |
|
92 mode="r", openhook=None): |
|
93 """input([files[, inplace[, backup[, mode[, openhook]]]]]) |
|
94 |
|
95 Create an instance of the FileInput class. The instance will be used |
|
96 as global state for the functions of this module, and is also returned |
|
97 to use during iteration. The parameters to this function will be passed |
|
98 along to the constructor of the FileInput class. |
|
99 """ |
|
100 global _state |
|
101 if _state and _state._file: |
|
102 raise RuntimeError, "input() already active" |
|
103 _state = FileInput(files, inplace, backup, bufsize, mode, openhook) |
|
104 return _state |
|
105 |
|
106 def close(): |
|
107 """Close the sequence.""" |
|
108 global _state |
|
109 state = _state |
|
110 _state = None |
|
111 if state: |
|
112 state.close() |
|
113 |
|
114 def nextfile(): |
|
115 """ |
|
116 Close the current file so that the next iteration will read the first |
|
117 line from the next file (if any); lines not read from the file will |
|
118 not count towards the cumulative line count. The filename is not |
|
119 changed until after the first line of the next file has been read. |
|
120 Before the first line has been read, this function has no effect; |
|
121 it cannot be used to skip the first file. After the last line of the |
|
122 last file has been read, this function has no effect. |
|
123 """ |
|
124 if not _state: |
|
125 raise RuntimeError, "no active input()" |
|
126 return _state.nextfile() |
|
127 |
|
128 def filename(): |
|
129 """ |
|
130 Return the name of the file currently being read. |
|
131 Before the first line has been read, returns None. |
|
132 """ |
|
133 if not _state: |
|
134 raise RuntimeError, "no active input()" |
|
135 return _state.filename() |
|
136 |
|
137 def lineno(): |
|
138 """ |
|
139 Return the cumulative line number of the line that has just been read. |
|
140 Before the first line has been read, returns 0. After the last line |
|
141 of the last file has been read, returns the line number of that line. |
|
142 """ |
|
143 if not _state: |
|
144 raise RuntimeError, "no active input()" |
|
145 return _state.lineno() |
|
146 |
|
147 def filelineno(): |
|
148 """ |
|
149 Return the line number in the current file. Before the first line |
|
150 has been read, returns 0. After the last line of the last file has |
|
151 been read, returns the line number of that line within the file. |
|
152 """ |
|
153 if not _state: |
|
154 raise RuntimeError, "no active input()" |
|
155 return _state.filelineno() |
|
156 |
|
157 def fileno(): |
|
158 """ |
|
159 Return the file number of the current file. When no file is currently |
|
160 opened, returns -1. |
|
161 """ |
|
162 if not _state: |
|
163 raise RuntimeError, "no active input()" |
|
164 return _state.fileno() |
|
165 |
|
166 def isfirstline(): |
|
167 """ |
|
168 Returns true the line just read is the first line of its file, |
|
169 otherwise returns false. |
|
170 """ |
|
171 if not _state: |
|
172 raise RuntimeError, "no active input()" |
|
173 return _state.isfirstline() |
|
174 |
|
175 def isstdin(): |
|
176 """ |
|
177 Returns true if the last line was read from sys.stdin, |
|
178 otherwise returns false. |
|
179 """ |
|
180 if not _state: |
|
181 raise RuntimeError, "no active input()" |
|
182 return _state.isstdin() |
|
183 |
|
184 class FileInput: |
|
185 """class FileInput([files[, inplace[, backup[, mode[, openhook]]]]]) |
|
186 |
|
187 Class FileInput is the implementation of the module; its methods |
|
188 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(), |
|
189 nextfile() and close() correspond to the functions of the same name |
|
190 in the module. |
|
191 In addition it has a readline() method which returns the next |
|
192 input line, and a __getitem__() method which implements the |
|
193 sequence behavior. The sequence must be accessed in strictly |
|
194 sequential order; random access and readline() cannot be mixed. |
|
195 """ |
|
196 |
|
197 def __init__(self, files=None, inplace=0, backup="", bufsize=0, |
|
198 mode="r", openhook=None): |
|
199 if isinstance(files, basestring): |
|
200 files = (files,) |
|
201 else: |
|
202 if files is None: |
|
203 files = sys.argv[1:] |
|
204 if not files: |
|
205 files = ('-',) |
|
206 else: |
|
207 files = tuple(files) |
|
208 self._files = files |
|
209 self._inplace = inplace |
|
210 self._backup = backup |
|
211 self._bufsize = bufsize or DEFAULT_BUFSIZE |
|
212 self._savestdout = None |
|
213 self._output = None |
|
214 self._filename = None |
|
215 self._lineno = 0 |
|
216 self._filelineno = 0 |
|
217 self._file = None |
|
218 self._isstdin = False |
|
219 self._backupfilename = None |
|
220 self._buffer = [] |
|
221 self._bufindex = 0 |
|
222 # restrict mode argument to reading modes |
|
223 if mode not in ('r', 'rU', 'U', 'rb'): |
|
224 raise ValueError("FileInput opening mode must be one of " |
|
225 "'r', 'rU', 'U' and 'rb'") |
|
226 self._mode = mode |
|
227 if inplace and openhook: |
|
228 raise ValueError("FileInput cannot use an opening hook in inplace mode") |
|
229 elif openhook and not hasattr(openhook, '__call__'): |
|
230 raise ValueError("FileInput openhook must be callable") |
|
231 self._openhook = openhook |
|
232 |
|
233 def __del__(self): |
|
234 self.close() |
|
235 |
|
236 def close(self): |
|
237 self.nextfile() |
|
238 self._files = () |
|
239 |
|
240 def __iter__(self): |
|
241 return self |
|
242 |
|
243 def next(self): |
|
244 try: |
|
245 line = self._buffer[self._bufindex] |
|
246 except IndexError: |
|
247 pass |
|
248 else: |
|
249 self._bufindex += 1 |
|
250 self._lineno += 1 |
|
251 self._filelineno += 1 |
|
252 return line |
|
253 line = self.readline() |
|
254 if not line: |
|
255 raise StopIteration |
|
256 return line |
|
257 |
|
258 def __getitem__(self, i): |
|
259 if i != self._lineno: |
|
260 raise RuntimeError, "accessing lines out of order" |
|
261 try: |
|
262 return self.next() |
|
263 except StopIteration: |
|
264 raise IndexError, "end of input reached" |
|
265 |
|
266 def nextfile(self): |
|
267 savestdout = self._savestdout |
|
268 self._savestdout = 0 |
|
269 if savestdout: |
|
270 sys.stdout = savestdout |
|
271 |
|
272 output = self._output |
|
273 self._output = 0 |
|
274 if output: |
|
275 output.close() |
|
276 |
|
277 file = self._file |
|
278 self._file = 0 |
|
279 if file and not self._isstdin: |
|
280 file.close() |
|
281 |
|
282 backupfilename = self._backupfilename |
|
283 self._backupfilename = 0 |
|
284 if backupfilename and not self._backup: |
|
285 try: os.unlink(backupfilename) |
|
286 except OSError: pass |
|
287 |
|
288 self._isstdin = False |
|
289 self._buffer = [] |
|
290 self._bufindex = 0 |
|
291 |
|
292 def readline(self): |
|
293 try: |
|
294 line = self._buffer[self._bufindex] |
|
295 except IndexError: |
|
296 pass |
|
297 else: |
|
298 self._bufindex += 1 |
|
299 self._lineno += 1 |
|
300 self._filelineno += 1 |
|
301 return line |
|
302 if not self._file: |
|
303 if not self._files: |
|
304 return "" |
|
305 self._filename = self._files[0] |
|
306 self._files = self._files[1:] |
|
307 self._filelineno = 0 |
|
308 self._file = None |
|
309 self._isstdin = False |
|
310 self._backupfilename = 0 |
|
311 if self._filename == '-': |
|
312 self._filename = '<stdin>' |
|
313 self._file = sys.stdin |
|
314 self._isstdin = True |
|
315 else: |
|
316 if self._inplace: |
|
317 self._backupfilename = ( |
|
318 self._filename + (self._backup or os.extsep+"bak")) |
|
319 try: os.unlink(self._backupfilename) |
|
320 except os.error: pass |
|
321 # The next few lines may raise IOError |
|
322 os.rename(self._filename, self._backupfilename) |
|
323 self._file = open(self._backupfilename, self._mode) |
|
324 try: |
|
325 perm = os.fstat(self._file.fileno()).st_mode |
|
326 except OSError: |
|
327 self._output = open(self._filename, "w") |
|
328 else: |
|
329 fd = os.open(self._filename, |
|
330 os.O_CREAT | os.O_WRONLY | os.O_TRUNC, |
|
331 perm) |
|
332 self._output = os.fdopen(fd, "w") |
|
333 try: |
|
334 if hasattr(os, 'chmod'): |
|
335 os.chmod(self._filename, perm) |
|
336 except OSError: |
|
337 pass |
|
338 self._savestdout = sys.stdout |
|
339 sys.stdout = self._output |
|
340 else: |
|
341 # This may raise IOError |
|
342 if self._openhook: |
|
343 self._file = self._openhook(self._filename, self._mode) |
|
344 else: |
|
345 self._file = open(self._filename, self._mode) |
|
346 self._buffer = self._file.readlines(self._bufsize) |
|
347 self._bufindex = 0 |
|
348 if not self._buffer: |
|
349 self.nextfile() |
|
350 # Recursive call |
|
351 return self.readline() |
|
352 |
|
353 def filename(self): |
|
354 return self._filename |
|
355 |
|
356 def lineno(self): |
|
357 return self._lineno |
|
358 |
|
359 def filelineno(self): |
|
360 return self._filelineno |
|
361 |
|
362 def fileno(self): |
|
363 if self._file: |
|
364 try: |
|
365 return self._file.fileno() |
|
366 except ValueError: |
|
367 return -1 |
|
368 else: |
|
369 return -1 |
|
370 |
|
371 def isfirstline(self): |
|
372 return self._filelineno == 1 |
|
373 |
|
374 def isstdin(self): |
|
375 return self._isstdin |
|
376 |
|
377 |
|
378 def hook_compressed(filename, mode): |
|
379 ext = os.path.splitext(filename)[1] |
|
380 if ext == '.gz': |
|
381 import gzip |
|
382 return gzip.open(filename, mode) |
|
383 elif ext == '.bz2': |
|
384 import bz2 |
|
385 return bz2.BZ2File(filename, mode) |
|
386 else: |
|
387 return open(filename, mode) |
|
388 |
|
389 |
|
390 def hook_encoded(encoding): |
|
391 import codecs |
|
392 def openhook(filename, mode): |
|
393 return codecs.open(filename, mode, encoding) |
|
394 return openhook |
|
395 |
|
396 |
|
397 def _test(): |
|
398 import getopt |
|
399 inplace = 0 |
|
400 backup = 0 |
|
401 opts, args = getopt.getopt(sys.argv[1:], "ib:") |
|
402 for o, a in opts: |
|
403 if o == '-i': inplace = 1 |
|
404 if o == '-b': backup = a |
|
405 for line in input(args, inplace=inplace, backup=backup): |
|
406 if line[-1:] == '\n': line = line[:-1] |
|
407 if line[-1:] == '\r': line = line[:-1] |
|
408 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), |
|
409 isfirstline() and "*" or "", line) |
|
410 print "%d: %s[%d]" % (lineno(), filename(), filelineno()) |
|
411 |
|
412 if __name__ == '__main__': |
|
413 _test() |