|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 sphinx.linkcheck |
|
4 ~~~~~~~~~~~~~~~~ |
|
5 |
|
6 The CheckExternalLinksBuilder class. |
|
7 |
|
8 :copyright: 2008 by Georg Brandl, Thomas Lamb. |
|
9 :license: BSD. |
|
10 """ |
|
11 |
|
12 import socket |
|
13 from os import path |
|
14 from urllib2 import build_opener, HTTPError |
|
15 |
|
16 from docutils import nodes |
|
17 |
|
18 from sphinx.builder import Builder |
|
19 from sphinx.util.console import purple, red, darkgreen |
|
20 |
|
21 # create an opener that will simulate a browser user-agent |
|
22 opener = build_opener() |
|
23 opener.addheaders = [('User-agent', 'Mozilla/5.0')] |
|
24 |
|
25 |
|
26 class CheckExternalLinksBuilder(Builder): |
|
27 """ |
|
28 Checks for broken external links. |
|
29 """ |
|
30 name = 'linkcheck' |
|
31 |
|
32 def init(self): |
|
33 self.good = set() |
|
34 self.broken = {} |
|
35 self.redirected = {} |
|
36 # set a timeout for non-responding servers |
|
37 socket.setdefaulttimeout(5.0) |
|
38 # create output file |
|
39 open(path.join(self.outdir, 'output.txt'), 'w').close() |
|
40 |
|
41 def get_target_uri(self, docname, typ=None): |
|
42 return '' |
|
43 |
|
44 def get_outdated_docs(self): |
|
45 return self.env.found_docs |
|
46 |
|
47 def prepare_writing(self, docnames): |
|
48 return |
|
49 |
|
50 def write_doc(self, docname, doctree): |
|
51 self.info() |
|
52 for node in doctree.traverse(nodes.reference): |
|
53 try: |
|
54 self.check(node, docname) |
|
55 except KeyError: |
|
56 continue |
|
57 |
|
58 def check(self, node, docname): |
|
59 uri = node['refuri'] |
|
60 |
|
61 if '#' in uri: |
|
62 uri = uri.split('#')[0] |
|
63 |
|
64 if uri in self.good: |
|
65 return |
|
66 |
|
67 lineno = None |
|
68 while lineno is None and node: |
|
69 node = node.parent |
|
70 lineno = node.line |
|
71 |
|
72 if uri[0:5] == 'http:' or uri[0:6] == 'https:': |
|
73 self.info(uri, nonl=1) |
|
74 |
|
75 if uri in self.broken: |
|
76 (r, s) = self.broken[uri] |
|
77 elif uri in self.redirected: |
|
78 (r, s) = self.redirected[uri] |
|
79 else: |
|
80 (r, s) = self.resolve(uri) |
|
81 |
|
82 if r == 0: |
|
83 self.info(' - ' + darkgreen('working')) |
|
84 self.good.add(uri) |
|
85 elif r == 2: |
|
86 self.info(' - ' + red('broken: ') + s) |
|
87 self.write_entry('broken', docname, lineno, uri + ': ' + s) |
|
88 self.broken[uri] = (r, s) |
|
89 if self.app.quiet: |
|
90 self.warn('%s:%s: broken link: %s' % (docname, lineno, uri)) |
|
91 else: |
|
92 self.info(' - ' + purple('redirected') + ' to ' + s) |
|
93 self.write_entry('redirected', docname, lineno, uri + ' to ' + s) |
|
94 self.redirected[uri] = (r, s) |
|
95 elif len(uri) == 0 or uri[0:7] == 'mailto:' or uri[0:4] == 'ftp:': |
|
96 return |
|
97 else: |
|
98 self.warn(uri + ' - ' + red('malformed!')) |
|
99 self.write_entry('malformed', docname, lineno, uri) |
|
100 if self.app.quiet: |
|
101 self.warn('%s:%s: malformed link: %s' % (docname, lineno, uri)) |
|
102 self.app.statuscode = 1 |
|
103 |
|
104 if self.broken: |
|
105 self.app.statuscode = 1 |
|
106 |
|
107 def write_entry(self, what, docname, line, uri): |
|
108 output = open(path.join(self.outdir, 'output.txt'), 'a') |
|
109 output.write("%s:%s: [%s] %s\n" % (self.env.doc2path(docname, None), |
|
110 line, what, uri)) |
|
111 output.close() |
|
112 |
|
113 def resolve(self, uri): |
|
114 try: |
|
115 f = opener.open(uri) |
|
116 f.close() |
|
117 except HTTPError, err: |
|
118 #if err.code == 403 and uri.startswith('http://en.wikipedia.org/'): |
|
119 # # Wikipedia blocks requests from urllib User-Agent |
|
120 # return (0, 0) |
|
121 return (2, str(err)) |
|
122 except Exception, err: |
|
123 return (2, str(err)) |
|
124 if f.url.rstrip('/') == uri.rstrip('/'): |
|
125 return (0, 0) |
|
126 else: |
|
127 return (1, f.url) |
|
128 |
|
129 def finish(self): |
|
130 return |