|
1 """Conversion pipeline templates. |
|
2 |
|
3 The problem: |
|
4 ------------ |
|
5 |
|
6 Suppose you have some data that you want to convert to another format, |
|
7 such as from GIF image format to PPM image format. Maybe the |
|
8 conversion involves several steps (e.g. piping it through compress or |
|
9 uuencode). Some of the conversion steps may require that their input |
|
10 is a disk file, others may be able to read standard input; similar for |
|
11 their output. The input to the entire conversion may also be read |
|
12 from a disk file or from an open file, and similar for its output. |
|
13 |
|
14 The module lets you construct a pipeline template by sticking one or |
|
15 more conversion steps together. It will take care of creating and |
|
16 removing temporary files if they are necessary to hold intermediate |
|
17 data. You can then use the template to do conversions from many |
|
18 different sources to many different destinations. The temporary |
|
19 file names used are different each time the template is used. |
|
20 |
|
21 The templates are objects so you can create templates for many |
|
22 different conversion steps and store them in a dictionary, for |
|
23 instance. |
|
24 |
|
25 |
|
26 Directions: |
|
27 ----------- |
|
28 |
|
29 To create a template: |
|
30 t = Template() |
|
31 |
|
32 To add a conversion step to a template: |
|
33 t.append(command, kind) |
|
34 where kind is a string of two characters: the first is '-' if the |
|
35 command reads its standard input or 'f' if it requires a file; the |
|
36 second likewise for the output. The command must be valid /bin/sh |
|
37 syntax. If input or output files are required, they are passed as |
|
38 $IN and $OUT; otherwise, it must be possible to use the command in |
|
39 a pipeline. |
|
40 |
|
41 To add a conversion step at the beginning: |
|
42 t.prepend(command, kind) |
|
43 |
|
44 To convert a file to another file using a template: |
|
45 sts = t.copy(infile, outfile) |
|
46 If infile or outfile are the empty string, standard input is read or |
|
47 standard output is written, respectively. The return value is the |
|
48 exit status of the conversion pipeline. |
|
49 |
|
50 To open a file for reading or writing through a conversion pipeline: |
|
51 fp = t.open(file, mode) |
|
52 where mode is 'r' to read the file, or 'w' to write it -- just like |
|
53 for the built-in function open() or for os.popen(). |
|
54 |
|
55 To create a new template object initialized to a given one: |
|
56 t2 = t.clone() |
|
57 |
|
58 For an example, see the function test() at the end of the file. |
|
59 """ # ' |
|
60 |
|
61 |
|
62 import re |
|
63 |
|
64 import os |
|
65 import tempfile |
|
66 import string |
|
67 |
|
68 __all__ = ["Template"] |
|
69 |
|
70 # Conversion step kinds |
|
71 |
|
72 FILEIN_FILEOUT = 'ff' # Must read & write real files |
|
73 STDIN_FILEOUT = '-f' # Must write a real file |
|
74 FILEIN_STDOUT = 'f-' # Must read a real file |
|
75 STDIN_STDOUT = '--' # Normal pipeline element |
|
76 SOURCE = '.-' # Must be first, writes stdout |
|
77 SINK = '-.' # Must be last, reads stdin |
|
78 |
|
79 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ |
|
80 SOURCE, SINK] |
|
81 |
|
82 |
|
83 class Template: |
|
84 """Class representing a pipeline template.""" |
|
85 |
|
86 def __init__(self): |
|
87 """Template() returns a fresh pipeline template.""" |
|
88 self.debugging = 0 |
|
89 self.reset() |
|
90 |
|
91 def __repr__(self): |
|
92 """t.__repr__() implements repr(t).""" |
|
93 return '<Template instance, steps=%r>' % (self.steps,) |
|
94 |
|
95 def reset(self): |
|
96 """t.reset() restores a pipeline template to its initial state.""" |
|
97 self.steps = [] |
|
98 |
|
99 def clone(self): |
|
100 """t.clone() returns a new pipeline template with identical |
|
101 initial state as the current one.""" |
|
102 t = Template() |
|
103 t.steps = self.steps[:] |
|
104 t.debugging = self.debugging |
|
105 return t |
|
106 |
|
107 def debug(self, flag): |
|
108 """t.debug(flag) turns debugging on or off.""" |
|
109 self.debugging = flag |
|
110 |
|
111 def append(self, cmd, kind): |
|
112 """t.append(cmd, kind) adds a new step at the end.""" |
|
113 if type(cmd) is not type(''): |
|
114 raise TypeError, \ |
|
115 'Template.append: cmd must be a string' |
|
116 if kind not in stepkinds: |
|
117 raise ValueError, \ |
|
118 'Template.append: bad kind %r' % (kind,) |
|
119 if kind == SOURCE: |
|
120 raise ValueError, \ |
|
121 'Template.append: SOURCE can only be prepended' |
|
122 if self.steps and self.steps[-1][1] == SINK: |
|
123 raise ValueError, \ |
|
124 'Template.append: already ends with SINK' |
|
125 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
|
126 raise ValueError, \ |
|
127 'Template.append: missing $IN in cmd' |
|
128 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
|
129 raise ValueError, \ |
|
130 'Template.append: missing $OUT in cmd' |
|
131 self.steps.append((cmd, kind)) |
|
132 |
|
133 def prepend(self, cmd, kind): |
|
134 """t.prepend(cmd, kind) adds a new step at the front.""" |
|
135 if type(cmd) is not type(''): |
|
136 raise TypeError, \ |
|
137 'Template.prepend: cmd must be a string' |
|
138 if kind not in stepkinds: |
|
139 raise ValueError, \ |
|
140 'Template.prepend: bad kind %r' % (kind,) |
|
141 if kind == SINK: |
|
142 raise ValueError, \ |
|
143 'Template.prepend: SINK can only be appended' |
|
144 if self.steps and self.steps[0][1] == SOURCE: |
|
145 raise ValueError, \ |
|
146 'Template.prepend: already begins with SOURCE' |
|
147 if kind[0] == 'f' and not re.search(r'\$IN\b', cmd): |
|
148 raise ValueError, \ |
|
149 'Template.prepend: missing $IN in cmd' |
|
150 if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd): |
|
151 raise ValueError, \ |
|
152 'Template.prepend: missing $OUT in cmd' |
|
153 self.steps.insert(0, (cmd, kind)) |
|
154 |
|
155 def open(self, file, rw): |
|
156 """t.open(file, rw) returns a pipe or file object open for |
|
157 reading or writing; the file is the other end of the pipeline.""" |
|
158 if rw == 'r': |
|
159 return self.open_r(file) |
|
160 if rw == 'w': |
|
161 return self.open_w(file) |
|
162 raise ValueError, \ |
|
163 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,) |
|
164 |
|
165 def open_r(self, file): |
|
166 """t.open_r(file) and t.open_w(file) implement |
|
167 t.open(file, 'r') and t.open(file, 'w') respectively.""" |
|
168 if not self.steps: |
|
169 return open(file, 'r') |
|
170 if self.steps[-1][1] == SINK: |
|
171 raise ValueError, \ |
|
172 'Template.open_r: pipeline ends width SINK' |
|
173 cmd = self.makepipeline(file, '') |
|
174 return os.popen(cmd, 'r') |
|
175 |
|
176 def open_w(self, file): |
|
177 if not self.steps: |
|
178 return open(file, 'w') |
|
179 if self.steps[0][1] == SOURCE: |
|
180 raise ValueError, \ |
|
181 'Template.open_w: pipeline begins with SOURCE' |
|
182 cmd = self.makepipeline('', file) |
|
183 return os.popen(cmd, 'w') |
|
184 |
|
185 def copy(self, infile, outfile): |
|
186 return os.system(self.makepipeline(infile, outfile)) |
|
187 |
|
188 def makepipeline(self, infile, outfile): |
|
189 cmd = makepipeline(infile, self.steps, outfile) |
|
190 if self.debugging: |
|
191 print cmd |
|
192 cmd = 'set -x; ' + cmd |
|
193 return cmd |
|
194 |
|
195 |
|
196 def makepipeline(infile, steps, outfile): |
|
197 # Build a list with for each command: |
|
198 # [input filename or '', command string, kind, output filename or ''] |
|
199 |
|
200 list = [] |
|
201 for cmd, kind in steps: |
|
202 list.append(['', cmd, kind, '']) |
|
203 # |
|
204 # Make sure there is at least one step |
|
205 # |
|
206 if not list: |
|
207 list.append(['', 'cat', '--', '']) |
|
208 # |
|
209 # Take care of the input and output ends |
|
210 # |
|
211 [cmd, kind] = list[0][1:3] |
|
212 if kind[0] == 'f' and not infile: |
|
213 list.insert(0, ['', 'cat', '--', '']) |
|
214 list[0][0] = infile |
|
215 # |
|
216 [cmd, kind] = list[-1][1:3] |
|
217 if kind[1] == 'f' and not outfile: |
|
218 list.append(['', 'cat', '--', '']) |
|
219 list[-1][-1] = outfile |
|
220 # |
|
221 # Invent temporary files to connect stages that need files |
|
222 # |
|
223 garbage = [] |
|
224 for i in range(1, len(list)): |
|
225 lkind = list[i-1][2] |
|
226 rkind = list[i][2] |
|
227 if lkind[1] == 'f' or rkind[0] == 'f': |
|
228 (fd, temp) = tempfile.mkstemp() |
|
229 os.close(fd) |
|
230 garbage.append(temp) |
|
231 list[i-1][-1] = list[i][0] = temp |
|
232 # |
|
233 for item in list: |
|
234 [inf, cmd, kind, outf] = item |
|
235 if kind[1] == 'f': |
|
236 cmd = 'OUT=' + quote(outf) + '; ' + cmd |
|
237 if kind[0] == 'f': |
|
238 cmd = 'IN=' + quote(inf) + '; ' + cmd |
|
239 if kind[0] == '-' and inf: |
|
240 cmd = cmd + ' <' + quote(inf) |
|
241 if kind[1] == '-' and outf: |
|
242 cmd = cmd + ' >' + quote(outf) |
|
243 item[1] = cmd |
|
244 # |
|
245 cmdlist = list[0][1] |
|
246 for item in list[1:]: |
|
247 [cmd, kind] = item[1:3] |
|
248 if item[0] == '': |
|
249 if 'f' in kind: |
|
250 cmd = '{ ' + cmd + '; }' |
|
251 cmdlist = cmdlist + ' |\n' + cmd |
|
252 else: |
|
253 cmdlist = cmdlist + '\n' + cmd |
|
254 # |
|
255 if garbage: |
|
256 rmcmd = 'rm -f' |
|
257 for file in garbage: |
|
258 rmcmd = rmcmd + ' ' + quote(file) |
|
259 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15' |
|
260 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd |
|
261 # |
|
262 return cmdlist |
|
263 |
|
264 |
|
265 # Reliably quote a string as a single argument for /bin/sh |
|
266 |
|
267 _safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted |
|
268 _funnychars = '"`$\\' # Unsafe inside "double quotes" |
|
269 |
|
270 def quote(file): |
|
271 for c in file: |
|
272 if c not in _safechars: |
|
273 break |
|
274 else: |
|
275 return file |
|
276 if '\'' not in file: |
|
277 return '\'' + file + '\'' |
|
278 res = '' |
|
279 for c in file: |
|
280 if c in _funnychars: |
|
281 c = '\\' + c |
|
282 res = res + c |
|
283 return '"' + res + '"' |
|
284 |
|
285 |
|
286 # Small test program and example |
|
287 |
|
288 def test(): |
|
289 print 'Testing...' |
|
290 t = Template() |
|
291 t.append('togif $IN $OUT', 'ff') |
|
292 t.append('giftoppm', '--') |
|
293 t.append('ppmtogif >$OUT', '-f') |
|
294 t.append('fromgif $IN $OUT', 'ff') |
|
295 t.debug(1) |
|
296 FILE = '/usr/local/images/rgb/rogues/guido.rgb' |
|
297 t.copy(FILE, '@temp') |
|
298 print 'Done.' |