|
1 """Conversion pipeline templates. |
|
2 |
|
3 The problem: |
|
4 ------------ |
|
5 |
|
6 Suppose you have some data that you want to convert to another format, |
|
7 such as from GIF image format to PPM image format. Maybe the |
|
8 conversion involves several steps (e.g. piping it through compress or |
|
9 uuencode). Some of the conversion steps may require that their input |
|
10 is a disk file, others may be able to read standard input; similar for |
|
11 their output. The input to the entire conversion may also be read |
|
12 from a disk file or from an open file, and similar for its output. |
|
13 |
|
14 The module lets you construct a pipeline template by sticking one or |
|
15 more conversion steps together. It will take care of creating and |
|
16 removing temporary files if they are necessary to hold intermediate |
|
17 data. You can then use the template to do conversions from many |
|
18 different sources to many different destinations. The temporary |
|
19 file names used are different each time the template is used. |
|
20 |
|
21 The templates are objects so you can create templates for many |
|
22 different conversion steps and store them in a dictionary, for |
|
23 instance. |
|
24 |
|
25 |
|
26 Directions: |
|
27 ----------- |
|
28 |
|
29 To create a template: |
|
30 t = Template() |
|
31 |
|
32 To add a conversion step to a template: |
|
33 t.append(command, kind) |
|
34 where kind is a string of two characters: the first is '-' if the |
|
35 command reads its standard input or 'f' if it requires a file; the |
|
36 second likewise for the output. The command must be valid /bin/sh |
|
37 syntax. If input or output files are required, they are passed as |
|
38 $IN and $OUT; otherwise, it must be possible to use the command in |
|
39 a pipeline. |
|
40 |
|
41 To add a conversion step at the beginning: |
|
42 t.prepend(command, kind) |
|
43 |
|
44 To convert a file to another file using a template: |
|
45 sts = t.copy(infile, outfile) |
|
46 If infile or outfile are the empty string, standard input is read or |
|
47 standard output is written, respectively. The return value is the |
|
48 exit status of the conversion pipeline. |
|
49 |
|
50 To open a file for reading or writing through a conversion pipeline: |
|
51 fp = t.open(file, mode) |
|
52 where mode is 'r' to read the file, or 'w' to write it -- just like |
|
53 for the built-in function open() or for os.popen(). |
|
54 |
|
55 To create a new template object initialized to a given one: |
|
56 t2 = t.clone() |
|
57 |
|
58 For an example, see the function test() at the end of the file. |
|
59 """ # ' |
|
60 |
|
61 |
|
62 import re |
|
63 import os |
|
64 import tempfile |
|
65 import string |
|
66 |
|
67 __all__ = ["Template"] |
|
68 |
|
69 # Conversion step kinds |
|
70 |
|
71 FILEIN_FILEOUT = 'ff' # Must read & write real files |
|
72 STDIN_FILEOUT = '-f' # Must write a real file |
|
73 FILEIN_STDOUT = 'f-' # Must read a real file |
|
74 STDIN_STDOUT = '--' # Normal pipeline element |
|
75 SOURCE = '.-' # Must be first, writes stdout |
|
76 SINK = '-.' # Must be last, reads stdin |
|
77 |
|
78 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
|
79 SOURCE, SINK] |
|
80 |
|
81 |
|
82 class Template: |
|
83 """Class representing a pipeline template.""" |
|
84 |
|
85 def __init__(self): |
|
86 """Template() returns a fresh pipeline template.""" |
|
87 self.debugging = 0 |
|
88 self.reset() |
|
89 |
|
90 def __repr__(self): |
|
91 """t.__repr__() implements repr(t).""" |
|
92 return '<Template instance, steps=%r>' % (self.steps,) |
|
93 |
|
94 def reset(self): |
|
95 """t.reset() restores a pipeline template to its initial state.""" |
|
96 self.steps = [] |
|
97 |
|
98 def clone(self): |
|
99 """t.clone() returns a new pipeline template with identical |
|
100 initial state as the current one.""" |
|
101 t = Template() |
|
102 t.steps = self.steps[:] |
|
103 t.debugging = self.debugging |
|
104 return t |
|
105 |
|
106 def debug(self, flag): |
|
107 """t.debug(flag) turns debugging on or off.""" |
|
108 self.debugging = flag |
|
109 |
|
110 def append(self, cmd, kind): |
|
111 """t.append(cmd, kind) adds a new step at the end.""" |
|
112 if type(cmd) is not type(''): |
|
113 raise TypeError, \ |
|
114 'Template.append: cmd must be a string' |
|
115 if kind not in stepkinds: |
|
116 raise ValueError, \ |
|
117 'Template.append: bad kind %r' % (kind,) |
|
118 if kind == SOURCE: |
|
119 raise ValueError, \ |
|
120 'Template.append: SOURCE can only be prepended' |
|
121 if self.steps and self.steps[-1][1] == SINK: |
|
122 raise ValueError, \ |
|
123 'Template.append: already ends with SINK' |
|
124 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
|
125 raise ValueError, \ |
|
126 'Template.append: missing $IN in cmd' |
|
127 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
|
128 raise ValueError, \ |
|
129 'Template.append: missing $OUT in cmd' |
|
130 self.steps.append((cmd, kind)) |
|
131 |
|
132 def prepend(self, cmd, kind): |
|
133 """t.prepend(cmd, kind) adds a new step at the front.""" |
|
134 if type(cmd) is not type(''): |
|
135 raise TypeError, \ |
|
136 'Template.prepend: cmd must be a string' |
|
137 if kind not in stepkinds: |
|
138 raise ValueError, \ |
|
139 'Template.prepend: bad kind %r' % (kind,) |
|
140 if kind == SINK: |
|
141 raise ValueError, \ |
|
142 'Template.prepend: SINK can only be appended' |
|
143 if self.steps and self.steps[0][1] == SOURCE: |
|
144 raise ValueError, \ |
|
145 'Template.prepend: already begins with SOURCE' |
|
146 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
|
147 raise ValueError, \ |
|
148 'Template.prepend: missing $IN in cmd' |
|
149 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
|
150 raise ValueError, \ |
|
151 'Template.prepend: missing $OUT in cmd' |
|
152 self.steps.insert(0, (cmd, kind)) |
|
153 |
|
154 def open(self, file, rw): |
|
155 """t.open(file, rw) returns a pipe or file object open for |
|
156 reading or writing; the file is the other end of the pipeline.""" |
|
157 if rw == 'r': |
|
158 return self.open_r(file) |
|
159 if rw == 'w': |
|
160 return self.open_w(file) |
|
161 raise ValueError, \ |
|
162 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) |
|
163 |
|
164 def open_r(self, file): |
|
165 """t.open_r(file) and t.open_w(file) implement |
|
166 t.open(file, 'r') and t.open(file, 'w') respectively.""" |
|
167 if not self.steps: |
|
168 return open(file, 'r') |
|
169 if self.steps[-1][1] == SINK: |
|
170 raise ValueError, \ |
|
171 'Template.open_r: pipeline ends width SINK' |
|
172 cmd = self.makepipeline(file, '') |
|
173 return os.popen(cmd, 'r') |
|
174 |
|
175 def open_w(self, file): |
|
176 if not self.steps: |
|
177 return open(file, 'w') |
|
178 if self.steps[0][1] == SOURCE: |
|
179 raise ValueError, \ |
|
180 'Template.open_w: pipeline begins with SOURCE' |
|
181 cmd = self.makepipeline('', file) |
|
182 return os.popen(cmd, 'w') |
|
183 |
|
184 def copy(self, infile, outfile): |
|
185 return os.system(self.makepipeline(infile, outfile)) |
|
186 |
|
187 def makepipeline(self, infile, outfile): |
|
188 cmd = makepipeline(infile, self.steps, outfile) |
|
189 if self.debugging: |
|
190 print cmd |
|
191 cmd = 'set -x; ' + cmd |
|
192 return cmd |
|
193 |
|
194 |
|
195 def makepipeline(infile, steps, outfile): |
|
196 # Build a list with for each command: |
|
197 # [input filename or '', command string, kind, output filename or ''] |
|
198 |
|
199 list = [] |
|
200 for cmd, kind in steps: |
|
201 list.append(['', cmd, kind, '']) |
|
202 # |
|
203 # Make sure there is at least one step |
|
204 # |
|
205 if not list: |
|
206 list.append(['', 'cat', '--', '']) |
|
207 # |
|
208 # Take care of the input and output ends |
|
209 # |
|
210 [cmd, kind] = list[0][1:3] |
|
211 if kind[0] == 'f' and not infile: |
|
212 list.insert(0, ['', 'cat', '--', '']) |
|
213 list[0][0] = infile |
|
214 # |
|
215 [cmd, kind] = list[-1][1:3] |
|
216 if kind[1] == 'f' and not outfile: |
|
217 list.append(['', 'cat', '--', '']) |
|
218 list[-1][-1] = outfile |
|
219 # |
|
220 # Invent temporary files to connect stages that need files |
|
221 # |
|
222 garbage = [] |
|
223 for i in range(1, len(list)): |
|
224 lkind = list[i-1][2] |
|
225 rkind = list[i][2] |
|
226 if lkind[1] == 'f' or rkind[0] == 'f': |
|
227 (fd, temp) = tempfile.mkstemp() |
|
228 os.close(fd) |
|
229 garbage.append(temp) |
|
230 list[i-1][-1] = list[i][0] = temp |
|
231 # |
|
232 for item in list: |
|
233 [inf, cmd, kind, outf] = item |
|
234 if kind[1] == 'f': |
|
235 cmd = 'OUT=' + quote(outf) + '; ' + cmd |
|
236 if kind[0] == 'f': |
|
237 cmd = 'IN=' + quote(inf) + '; ' + cmd |
|
238 if kind[0] == '-' and inf: |
|
239 cmd = cmd + ' <' + quote(inf) |
|
240 if kind[1] == '-' and outf: |
|
241 cmd = cmd + ' >' + quote(outf) |
|
242 item[1] = cmd |
|
243 # |
|
244 cmdlist = list[0][1] |
|
245 for item in list[1:]: |
|
246 [cmd, kind] = item[1:3] |
|
247 if item[0] == '': |
|
248 if 'f' in kind: |
|
249 cmd = '{ ' + cmd + '; }' |
|
250 cmdlist = cmdlist + ' |\n' + cmd |
|
251 else: |
|
252 cmdlist = cmdlist + '\n' + cmd |
|
253 # |
|
254 if garbage: |
|
255 rmcmd = 'rm -f' |
|
256 for file in garbage: |
|
257 rmcmd = rmcmd + ' ' + quote(file) |
|
258 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
|
259 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
|
260 # |
|
261 return cmdlist |
|
262 |
|
263 |
|
264 # Reliably quote a string as a single argument for /bin/sh |
|
265 |
|
266 _safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted |
|
267 _funnychars = '"`$\\' # Unsafe inside "double quotes" |
|
268 |
|
269 def quote(file): |
|
270 for c in file: |
|
271 if c not in _safechars: |
|
272 break |
|
273 else: |
|
274 return file |
|
275 if '\'' not in file: |
|
276 return '\'' + file + '\'' |
|
277 res = '' |
|
278 for c in file: |
|
279 if c in _funnychars: |
|
280 c = '\\' + c |
|
281 res = res + c |
|
282 return '"' + res + '"' |