|
1 #! /usr/bin/env python |
|
2 |
|
3 """GUI interface to webchecker. |
|
4 |
|
5 This works as a Grail applet too! E.g. |
|
6 |
|
7 <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET> |
|
8 |
|
9 Checkpoints are not (yet??? ever???) supported. |
|
10 |
|
11 User interface: |
|
12 |
|
13 Enter a root to check in the text entry box. To enter more than one root, |
|
14 enter them one at a time and press <Return> for each one. |
|
15 |
|
16 Command buttons Start, Stop and "Check one" govern the checking process in |
|
17 the obvious way. Start and "Check one" also enter the root from the text |
|
18 entry box if one is present. There's also a check box (enabled by default) |
|
19 to decide whether actually to follow external links (since this can slow |
|
20 the checking down considerably). Finally there's a Quit button. |
|
21 |
|
22 A series of checkbuttons determines whether the corresponding output panel |
|
23 is shown. List panels are also automatically shown or hidden when their |
|
24 status changes between empty to non-empty. There are six panels: |
|
25 |
|
26 Log -- raw output from the checker (-v, -q affect this) |
|
27 To check -- links discovered but not yet checked |
|
28 Checked -- links that have been checked |
|
29 Bad links -- links that failed upon checking |
|
30 Errors -- pages containing at least one bad link |
|
31 Details -- details about one URL; double click on a URL in any of |
|
32 the above list panels (not in Log) will show details |
|
33 for that URL |
|
34 |
|
35 Use your window manager's Close command to quit. |
|
36 |
|
37 Command line options: |
|
38 |
|
39 -m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d) |
|
40 -q -- quiet operation (also suppresses external links report) |
|
41 -v -- verbose operation; repeating -v will increase verbosity |
|
42 -t root -- specify root dir which should be treated as internal (can repeat) |
|
43 -a -- don't check name anchors |
|
44 |
|
45 Command line arguments: |
|
46 |
|
47 rooturl -- URL to start checking |
|
48 (default %(DEFROOT)s) |
|
49 |
|
50 XXX The command line options (-m, -q, -v) should be GUI accessible. |
|
51 |
|
52 XXX The roots should be visible as a list (?). |
|
53 |
|
54 XXX The multipanel user interface is clumsy. |
|
55 |
|
56 """ |
|
57 |
|
58 # ' Emacs bait |
|
59 |
|
60 |
|
61 import sys |
|
62 import getopt |
|
63 from Tkinter import * |
|
64 import tktools |
|
65 import webchecker |
|
66 import random |
|
67 |
|
68 # Override some for a weaker platform |
|
69 if sys.platform == 'mac': |
|
70 webchecker.DEFROOT = "http://grail.cnri.reston.va.us/" |
|
71 webchecker.MAXPAGE = 50000 |
|
72 webchecker.verbose = 4 |
|
73 |
|
74 def main(): |
|
75 try: |
|
76 opts, args = getopt.getopt(sys.argv[1:], 't:m:qva') |
|
77 except getopt.error, msg: |
|
78 sys.stdout = sys.stderr |
|
79 print msg |
|
80 print __doc__%vars(webchecker) |
|
81 sys.exit(2) |
|
82 webchecker.verbose = webchecker.VERBOSE |
|
83 webchecker.nonames = webchecker.NONAMES |
|
84 webchecker.maxpage = webchecker.MAXPAGE |
|
85 extra_roots = [] |
|
86 for o, a in opts: |
|
87 if o == '-m': |
|
88 webchecker.maxpage = int(a) |
|
89 if o == '-q': |
|
90 webchecker.verbose = 0 |
|
91 if o == '-v': |
|
92 webchecker.verbose = webchecker.verbose + 1 |
|
93 if o == '-t': |
|
94 extra_roots.append(a) |
|
95 if o == '-a': |
|
96 webchecker.nonames = not webchecker.nonames |
|
97 root = Tk(className='Webchecker') |
|
98 root.protocol("WM_DELETE_WINDOW", root.quit) |
|
99 c = CheckerWindow(root) |
|
100 c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage, |
|
101 nonames=webchecker.nonames) |
|
102 if args: |
|
103 for arg in args[:-1]: |
|
104 c.addroot(arg) |
|
105 c.suggestroot(args[-1]) |
|
106 # Usually conditioned on whether external links |
|
107 # will be checked, but since that's not a command |
|
108 # line option, just toss them in. |
|
109 for url_root in extra_roots: |
|
110 # Make sure it's terminated by a slash, |
|
111 # so that addroot doesn't discard the last |
|
112 # directory component. |
|
113 if url_root[-1] != "/": |
|
114 url_root = url_root + "/" |
|
115 c.addroot(url_root, add_to_do = 0) |
|
116 root.mainloop() |
|
117 |
|
118 |
|
119 class CheckerWindow(webchecker.Checker): |
|
120 |
|
121 def __init__(self, parent, root=webchecker.DEFROOT): |
|
122 self.__parent = parent |
|
123 |
|
124 self.__topcontrols = Frame(parent) |
|
125 self.__topcontrols.pack(side=TOP, fill=X) |
|
126 self.__label = Label(self.__topcontrols, text="Root URL:") |
|
127 self.__label.pack(side=LEFT) |
|
128 self.__rootentry = Entry(self.__topcontrols, width=60) |
|
129 self.__rootentry.pack(side=LEFT) |
|
130 self.__rootentry.bind('<Return>', self.enterroot) |
|
131 self.__rootentry.focus_set() |
|
132 |
|
133 self.__controls = Frame(parent) |
|
134 self.__controls.pack(side=TOP, fill=X) |
|
135 self.__running = 0 |
|
136 self.__start = Button(self.__controls, text="Run", command=self.start) |
|
137 self.__start.pack(side=LEFT) |
|
138 self.__stop = Button(self.__controls, text="Stop", command=self.stop, |
|
139 state=DISABLED) |
|
140 self.__stop.pack(side=LEFT) |
|
141 self.__step = Button(self.__controls, text="Check one", |
|
142 command=self.step) |
|
143 self.__step.pack(side=LEFT) |
|
144 self.__cv = BooleanVar(parent) |
|
145 self.__cv.set(self.checkext) |
|
146 self.__checkext = Checkbutton(self.__controls, variable=self.__cv, |
|
147 command=self.update_checkext, |
|
148 text="Check nonlocal links",) |
|
149 self.__checkext.pack(side=LEFT) |
|
150 self.__reset = Button(self.__controls, text="Start over", command=self.reset) |
|
151 self.__reset.pack(side=LEFT) |
|
152 if __name__ == '__main__': # No Quit button under Grail! |
|
153 self.__quit = Button(self.__controls, text="Quit", |
|
154 command=self.__parent.quit) |
|
155 self.__quit.pack(side=RIGHT) |
|
156 |
|
157 self.__status = Label(parent, text="Status: initial", anchor=W) |
|
158 self.__status.pack(side=TOP, fill=X) |
|
159 self.__checking = Label(parent, text="Idle", anchor=W) |
|
160 self.__checking.pack(side=TOP, fill=X) |
|
161 self.__mp = mp = MultiPanel(parent) |
|
162 sys.stdout = self.__log = LogPanel(mp, "Log") |
|
163 self.__todo = ListPanel(mp, "To check", self, self.showinfo) |
|
164 self.__done = ListPanel(mp, "Checked", self, self.showinfo) |
|
165 self.__bad = ListPanel(mp, "Bad links", self, self.showinfo) |
|
166 self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo) |
|
167 self.__details = LogPanel(mp, "Details") |
|
168 self.root_seed = None |
|
169 webchecker.Checker.__init__(self) |
|
170 if root: |
|
171 root = str(root).strip() |
|
172 if root: |
|
173 self.suggestroot(root) |
|
174 self.newstatus() |
|
175 |
|
176 def reset(self): |
|
177 webchecker.Checker.reset(self) |
|
178 for p in self.__todo, self.__done, self.__bad, self.__errors: |
|
179 p.clear() |
|
180 if self.root_seed: |
|
181 self.suggestroot(self.root_seed) |
|
182 |
|
183 def suggestroot(self, root): |
|
184 self.__rootentry.delete(0, END) |
|
185 self.__rootentry.insert(END, root) |
|
186 self.__rootentry.select_range(0, END) |
|
187 self.root_seed = root |
|
188 |
|
189 def enterroot(self, event=None): |
|
190 root = self.__rootentry.get() |
|
191 root = root.strip() |
|
192 if root: |
|
193 self.__checking.config(text="Adding root "+root) |
|
194 self.__checking.update_idletasks() |
|
195 self.addroot(root) |
|
196 self.__checking.config(text="Idle") |
|
197 try: |
|
198 i = self.__todo.items.index(root) |
|
199 except (ValueError, IndexError): |
|
200 pass |
|
201 else: |
|
202 self.__todo.list.select_clear(0, END) |
|
203 self.__todo.list.select_set(i) |
|
204 self.__todo.list.yview(i) |
|
205 self.__rootentry.delete(0, END) |
|
206 |
|
207 def start(self): |
|
208 self.__start.config(state=DISABLED, relief=SUNKEN) |
|
209 self.__stop.config(state=NORMAL) |
|
210 self.__step.config(state=DISABLED) |
|
211 self.enterroot() |
|
212 self.__running = 1 |
|
213 self.go() |
|
214 |
|
215 def stop(self): |
|
216 self.__stop.config(state=DISABLED, relief=SUNKEN) |
|
217 self.__running = 0 |
|
218 |
|
219 def step(self): |
|
220 self.__start.config(state=DISABLED) |
|
221 self.__step.config(state=DISABLED, relief=SUNKEN) |
|
222 self.enterroot() |
|
223 self.__running = 0 |
|
224 self.dosomething() |
|
225 |
|
226 def go(self): |
|
227 if self.__running: |
|
228 self.__parent.after_idle(self.dosomething) |
|
229 else: |
|
230 self.__checking.config(text="Idle") |
|
231 self.__start.config(state=NORMAL, relief=RAISED) |
|
232 self.__stop.config(state=DISABLED, relief=RAISED) |
|
233 self.__step.config(state=NORMAL, relief=RAISED) |
|
234 |
|
235 __busy = 0 |
|
236 |
|
237 def dosomething(self): |
|
238 if self.__busy: return |
|
239 self.__busy = 1 |
|
240 if self.todo: |
|
241 l = self.__todo.selectedindices() |
|
242 if l: |
|
243 i = l[0] |
|
244 else: |
|
245 i = 0 |
|
246 self.__todo.list.select_set(i) |
|
247 self.__todo.list.yview(i) |
|
248 url = self.__todo.items[i] |
|
249 self.__checking.config(text="Checking "+self.format_url(url)) |
|
250 self.__parent.update() |
|
251 self.dopage(url) |
|
252 else: |
|
253 self.stop() |
|
254 self.__busy = 0 |
|
255 self.go() |
|
256 |
|
257 def showinfo(self, url): |
|
258 d = self.__details |
|
259 d.clear() |
|
260 d.put("URL: %s\n" % self.format_url(url)) |
|
261 if self.bad.has_key(url): |
|
262 d.put("Error: %s\n" % str(self.bad[url])) |
|
263 if url in self.roots: |
|
264 d.put("Note: This is a root URL\n") |
|
265 if self.done.has_key(url): |
|
266 d.put("Status: checked\n") |
|
267 o = self.done[url] |
|
268 elif self.todo.has_key(url): |
|
269 d.put("Status: to check\n") |
|
270 o = self.todo[url] |
|
271 else: |
|
272 d.put("Status: unknown (!)\n") |
|
273 o = [] |
|
274 if (not url[1]) and self.errors.has_key(url[0]): |
|
275 d.put("Bad links from this page:\n") |
|
276 for triple in self.errors[url[0]]: |
|
277 link, rawlink, msg = triple |
|
278 d.put(" HREF %s" % self.format_url(link)) |
|
279 if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink) |
|
280 d.put("\n") |
|
281 d.put(" error %s\n" % str(msg)) |
|
282 self.__mp.showpanel("Details") |
|
283 for source, rawlink in o: |
|
284 d.put("Origin: %s" % source) |
|
285 if rawlink != self.format_url(url): |
|
286 d.put(" (%s)" % rawlink) |
|
287 d.put("\n") |
|
288 d.text.yview("1.0") |
|
289 |
|
290 def setbad(self, url, msg): |
|
291 webchecker.Checker.setbad(self, url, msg) |
|
292 self.__bad.insert(url) |
|
293 self.newstatus() |
|
294 |
|
295 def setgood(self, url): |
|
296 webchecker.Checker.setgood(self, url) |
|
297 self.__bad.remove(url) |
|
298 self.newstatus() |
|
299 |
|
300 def newlink(self, url, origin): |
|
301 webchecker.Checker.newlink(self, url, origin) |
|
302 if self.done.has_key(url): |
|
303 self.__done.insert(url) |
|
304 elif self.todo.has_key(url): |
|
305 self.__todo.insert(url) |
|
306 self.newstatus() |
|
307 |
|
308 def markdone(self, url): |
|
309 webchecker.Checker.markdone(self, url) |
|
310 self.__done.insert(url) |
|
311 self.__todo.remove(url) |
|
312 self.newstatus() |
|
313 |
|
314 def seterror(self, url, triple): |
|
315 webchecker.Checker.seterror(self, url, triple) |
|
316 self.__errors.insert((url, '')) |
|
317 self.newstatus() |
|
318 |
|
319 def newstatus(self): |
|
320 self.__status.config(text="Status: "+self.status()) |
|
321 self.__parent.update() |
|
322 |
|
323 def update_checkext(self): |
|
324 self.checkext = self.__cv.get() |
|
325 |
|
326 |
|
327 class ListPanel: |
|
328 |
|
329 def __init__(self, mp, name, checker, showinfo=None): |
|
330 self.mp = mp |
|
331 self.name = name |
|
332 self.showinfo = showinfo |
|
333 self.checker = checker |
|
334 self.panel = mp.addpanel(name) |
|
335 self.list, self.frame = tktools.make_list_box( |
|
336 self.panel, width=60, height=5) |
|
337 self.list.config(exportselection=0) |
|
338 if showinfo: |
|
339 self.list.bind('<Double-Button-1>', self.doubleclick) |
|
340 self.items = [] |
|
341 |
|
342 def clear(self): |
|
343 self.items = [] |
|
344 self.list.delete(0, END) |
|
345 self.mp.hidepanel(self.name) |
|
346 |
|
347 def doubleclick(self, event): |
|
348 l = self.selectedindices() |
|
349 if l: |
|
350 self.showinfo(self.items[l[0]]) |
|
351 |
|
352 def selectedindices(self): |
|
353 l = self.list.curselection() |
|
354 if not l: return [] |
|
355 return map(int, l) |
|
356 |
|
357 def insert(self, url): |
|
358 if url not in self.items: |
|
359 if not self.items: |
|
360 self.mp.showpanel(self.name) |
|
361 # (I tried sorting alphabetically, but the display is too jumpy) |
|
362 i = len(self.items) |
|
363 self.list.insert(i, self.checker.format_url(url)) |
|
364 self.list.yview(i) |
|
365 self.items.insert(i, url) |
|
366 |
|
367 def remove(self, url): |
|
368 try: |
|
369 i = self.items.index(url) |
|
370 except (ValueError, IndexError): |
|
371 pass |
|
372 else: |
|
373 was_selected = i in self.selectedindices() |
|
374 self.list.delete(i) |
|
375 del self.items[i] |
|
376 if not self.items: |
|
377 self.mp.hidepanel(self.name) |
|
378 elif was_selected: |
|
379 if i >= len(self.items): |
|
380 i = len(self.items) - 1 |
|
381 self.list.select_set(i) |
|
382 |
|
383 |
|
384 class LogPanel: |
|
385 |
|
386 def __init__(self, mp, name): |
|
387 self.mp = mp |
|
388 self.name = name |
|
389 self.panel = mp.addpanel(name) |
|
390 self.text, self.frame = tktools.make_text_box(self.panel, height=10) |
|
391 self.text.config(wrap=NONE) |
|
392 |
|
393 def clear(self): |
|
394 self.text.delete("1.0", END) |
|
395 self.text.yview("1.0") |
|
396 |
|
397 def put(self, s): |
|
398 self.text.insert(END, s) |
|
399 if '\n' in s: |
|
400 self.text.yview(END) |
|
401 |
|
402 def write(self, s): |
|
403 self.text.insert(END, s) |
|
404 if '\n' in s: |
|
405 self.text.yview(END) |
|
406 self.panel.update() |
|
407 |
|
408 |
|
409 class MultiPanel: |
|
410 |
|
411 def __init__(self, parent): |
|
412 self.parent = parent |
|
413 self.frame = Frame(self.parent) |
|
414 self.frame.pack(expand=1, fill=BOTH) |
|
415 self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED) |
|
416 self.topframe.pack(fill=X) |
|
417 self.botframe = Frame(self.frame) |
|
418 self.botframe.pack(expand=1, fill=BOTH) |
|
419 self.panelnames = [] |
|
420 self.panels = {} |
|
421 |
|
422 def addpanel(self, name, on=0): |
|
423 v = StringVar(self.parent) |
|
424 if on: |
|
425 v.set(name) |
|
426 else: |
|
427 v.set("") |
|
428 check = Checkbutton(self.topframe, text=name, |
|
429 offvalue="", onvalue=name, variable=v, |
|
430 command=self.checkpanel) |
|
431 check.pack(side=LEFT) |
|
432 panel = Frame(self.botframe) |
|
433 label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W) |
|
434 label.pack(side=TOP, fill=X) |
|
435 t = v, check, panel |
|
436 self.panelnames.append(name) |
|
437 self.panels[name] = t |
|
438 if on: |
|
439 panel.pack(expand=1, fill=BOTH) |
|
440 return panel |
|
441 |
|
442 def showpanel(self, name): |
|
443 v, check, panel = self.panels[name] |
|
444 v.set(name) |
|
445 panel.pack(expand=1, fill=BOTH) |
|
446 |
|
447 def hidepanel(self, name): |
|
448 v, check, panel = self.panels[name] |
|
449 v.set("") |
|
450 panel.pack_forget() |
|
451 |
|
452 def checkpanel(self): |
|
453 for name in self.panelnames: |
|
454 v, check, panel = self.panels[name] |
|
455 panel.pack_forget() |
|
456 for name in self.panelnames: |
|
457 v, check, panel = self.panels[name] |
|
458 if v.get(): |
|
459 panel.pack(expand=1, fill=BOTH) |
|
460 |
|
461 |
|
462 if __name__ == '__main__': |
|
463 main() |