|
1 # $Id: frontmatter.py 4564 2006-05-21 20:44:42Z wiemann $ |
|
2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org> |
|
3 # Copyright: This module has been placed in the public domain. |
|
4 |
|
5 """ |
|
6 Transforms related to the front matter of a document or a section |
|
7 (information found before the main text): |
|
8 |
|
9 - `DocTitle`: Used to transform a lone top level section's title to |
|
10 the document title, promote a remaining lone top-level section's |
|
11 title to the document subtitle, and determine the document's title |
|
12 metadata (document['title']) based on the document title and/or the |
|
13 "title" setting. |
|
14 |
|
15 - `SectionSubTitle`: Used to transform a lone subsection into a |
|
16 subtitle. |
|
17 |
|
18 - `DocInfo`: Used to transform a bibliographic field list into docinfo |
|
19 elements. |
|
20 """ |
|
21 |
|
22 __docformat__ = 'reStructuredText' |
|
23 |
|
24 import re |
|
25 from docutils import nodes, utils |
|
26 from docutils.transforms import TransformError, Transform |
|
27 |
|
28 |
|
29 class TitlePromoter(Transform): |
|
30 |
|
31 """ |
|
32 Abstract base class for DocTitle and SectionSubTitle transforms. |
|
33 """ |
|
34 |
|
35 def promote_title(self, node): |
|
36 """ |
|
37 Transform the following tree:: |
|
38 |
|
39 <node> |
|
40 <section> |
|
41 <title> |
|
42 ... |
|
43 |
|
44 into :: |
|
45 |
|
46 <node> |
|
47 <title> |
|
48 ... |
|
49 |
|
50 `node` is normally a document. |
|
51 """ |
|
52 # `node` must not have a title yet. |
|
53 assert not (len(node) and isinstance(node[0], nodes.title)) |
|
54 section, index = self.candidate_index(node) |
|
55 if index is None: |
|
56 return None |
|
57 # Transfer the section's attributes to the node: |
|
58 node.attributes.update(section.attributes) |
|
59 # setup_child is called automatically for all nodes. |
|
60 node[:] = (section[:1] # section title |
|
61 + node[:index] # everything that was in the |
|
62 # node before the section |
|
63 + section[1:]) # everything that was in the section |
|
64 assert isinstance(node[0], nodes.title) |
|
65 return 1 |
|
66 |
|
67 def promote_subtitle(self, node): |
|
68 """ |
|
69 Transform the following node tree:: |
|
70 |
|
71 <node> |
|
72 <title> |
|
73 <section> |
|
74 <title> |
|
75 ... |
|
76 |
|
77 into :: |
|
78 |
|
79 <node> |
|
80 <title> |
|
81 <subtitle> |
|
82 ... |
|
83 """ |
|
84 subsection, index = self.candidate_index(node) |
|
85 if index is None: |
|
86 return None |
|
87 subtitle = nodes.subtitle() |
|
88 # Transfer the subsection's attributes to the new subtitle: |
|
89 # This causes trouble with list attributes! To do: Write a |
|
90 # test case which catches direct access to the `attributes` |
|
91 # dictionary and/or write a test case which shows problems in |
|
92 # this particular case. |
|
93 subtitle.attributes.update(subsection.attributes) |
|
94 # We're losing the subtitle's attributes here! To do: Write a |
|
95 # test case which shows this behavior. |
|
96 # Transfer the contents of the subsection's title to the |
|
97 # subtitle: |
|
98 subtitle[:] = subsection[0][:] |
|
99 node[:] = (node[:1] # title |
|
100 + [subtitle] |
|
101 # everything that was before the section: |
|
102 + node[1:index] |
|
103 # everything that was in the subsection: |
|
104 + subsection[1:]) |
|
105 return 1 |
|
106 |
|
107 def candidate_index(self, node): |
|
108 """ |
|
109 Find and return the promotion candidate and its index. |
|
110 |
|
111 Return (None, None) if no valid candidate was found. |
|
112 """ |
|
113 index = node.first_child_not_matching_class( |
|
114 nodes.PreBibliographic) |
|
115 if index is None or len(node) > (index + 1) or \ |
|
116 not isinstance(node[index], nodes.section): |
|
117 return None, None |
|
118 else: |
|
119 return node[index], index |
|
120 |
|
121 |
|
122 class DocTitle(TitlePromoter): |
|
123 |
|
124 """ |
|
125 In reStructuredText_, there is no way to specify a document title |
|
126 and subtitle explicitly. Instead, we can supply the document title |
|
127 (and possibly the subtitle as well) implicitly, and use this |
|
128 two-step transform to "raise" or "promote" the title(s) (and their |
|
129 corresponding section contents) to the document level. |
|
130 |
|
131 1. If the document contains a single top-level section as its |
|
132 first non-comment element, the top-level section's title |
|
133 becomes the document's title, and the top-level section's |
|
134 contents become the document's immediate contents. The lone |
|
135 top-level section header must be the first non-comment element |
|
136 in the document. |
|
137 |
|
138 For example, take this input text:: |
|
139 |
|
140 ================= |
|
141 Top-Level Title |
|
142 ================= |
|
143 |
|
144 A paragraph. |
|
145 |
|
146 Once parsed, it looks like this:: |
|
147 |
|
148 <document> |
|
149 <section names="top-level title"> |
|
150 <title> |
|
151 Top-Level Title |
|
152 <paragraph> |
|
153 A paragraph. |
|
154 |
|
155 After running the DocTitle transform, we have:: |
|
156 |
|
157 <document names="top-level title"> |
|
158 <title> |
|
159 Top-Level Title |
|
160 <paragraph> |
|
161 A paragraph. |
|
162 |
|
163 2. If step 1 successfully determines the document title, we |
|
164 continue by checking for a subtitle. |
|
165 |
|
166 If the lone top-level section itself contains a single |
|
167 second-level section as its first non-comment element, that |
|
168 section's title is promoted to the document's subtitle, and |
|
169 that section's contents become the document's immediate |
|
170 contents. Given this input text:: |
|
171 |
|
172 ================= |
|
173 Top-Level Title |
|
174 ================= |
|
175 |
|
176 Second-Level Title |
|
177 ~~~~~~~~~~~~~~~~~~ |
|
178 |
|
179 A paragraph. |
|
180 |
|
181 After parsing and running the Section Promotion transform, the |
|
182 result is:: |
|
183 |
|
184 <document names="top-level title"> |
|
185 <title> |
|
186 Top-Level Title |
|
187 <subtitle names="second-level title"> |
|
188 Second-Level Title |
|
189 <paragraph> |
|
190 A paragraph. |
|
191 |
|
192 (Note that the implicit hyperlink target generated by the |
|
193 "Second-Level Title" is preserved on the "subtitle" element |
|
194 itself.) |
|
195 |
|
196 Any comment elements occurring before the document title or |
|
197 subtitle are accumulated and inserted as the first body elements |
|
198 after the title(s). |
|
199 |
|
200 This transform also sets the document's metadata title |
|
201 (document['title']). |
|
202 |
|
203 .. _reStructuredText: http://docutils.sf.net/rst.html |
|
204 """ |
|
205 |
|
206 default_priority = 320 |
|
207 |
|
208 def set_metadata(self): |
|
209 """ |
|
210 Set document['title'] metadata title from the following |
|
211 sources, listed in order of priority: |
|
212 |
|
213 * Existing document['title'] attribute. |
|
214 * "title" setting. |
|
215 * Document title node (as promoted by promote_title). |
|
216 """ |
|
217 if not self.document.hasattr('title'): |
|
218 if self.document.settings.title is not None: |
|
219 self.document['title'] = self.document.settings.title |
|
220 elif len(self.document) and isinstance(self.document[0], nodes.title): |
|
221 self.document['title'] = self.document[0].astext() |
|
222 |
|
223 def apply(self): |
|
224 if getattr(self.document.settings, 'doctitle_xform', 1): |
|
225 # promote_(sub)title defined in TitlePromoter base class. |
|
226 if self.promote_title(self.document): |
|
227 # If a title has been promoted, also try to promote a |
|
228 # subtitle. |
|
229 self.promote_subtitle(self.document) |
|
230 # Set document['title']. |
|
231 self.set_metadata() |
|
232 |
|
233 |
|
234 class SectionSubTitle(TitlePromoter): |
|
235 |
|
236 """ |
|
237 This works like document subtitles, but for sections. For example, :: |
|
238 |
|
239 <section> |
|
240 <title> |
|
241 Title |
|
242 <section> |
|
243 <title> |
|
244 Subtitle |
|
245 ... |
|
246 |
|
247 is transformed into :: |
|
248 |
|
249 <section> |
|
250 <title> |
|
251 Title |
|
252 <subtitle> |
|
253 Subtitle |
|
254 ... |
|
255 |
|
256 For details refer to the docstring of DocTitle. |
|
257 """ |
|
258 |
|
259 default_priority = 350 |
|
260 |
|
261 def apply(self): |
|
262 if not getattr(self.document.settings, 'sectsubtitle_xform', 1): |
|
263 return |
|
264 for section in self.document.traverse(nodes.section): |
|
265 # On our way through the node tree, we are deleting |
|
266 # sections, but we call self.promote_subtitle for those |
|
267 # sections nonetheless. To do: Write a test case which |
|
268 # shows the problem and discuss on Docutils-develop. |
|
269 self.promote_subtitle(section) |
|
270 |
|
271 |
|
272 class DocInfo(Transform): |
|
273 |
|
274 """ |
|
275 This transform is specific to the reStructuredText_ markup syntax; |
|
276 see "Bibliographic Fields" in the `reStructuredText Markup |
|
277 Specification`_ for a high-level description. This transform |
|
278 should be run *after* the `DocTitle` transform. |
|
279 |
|
280 Given a field list as the first non-comment element after the |
|
281 document title and subtitle (if present), registered bibliographic |
|
282 field names are transformed to the corresponding DTD elements, |
|
283 becoming child elements of the "docinfo" element (except for a |
|
284 dedication and/or an abstract, which become "topic" elements after |
|
285 "docinfo"). |
|
286 |
|
287 For example, given this document fragment after parsing:: |
|
288 |
|
289 <document> |
|
290 <title> |
|
291 Document Title |
|
292 <field_list> |
|
293 <field> |
|
294 <field_name> |
|
295 Author |
|
296 <field_body> |
|
297 <paragraph> |
|
298 A. Name |
|
299 <field> |
|
300 <field_name> |
|
301 Status |
|
302 <field_body> |
|
303 <paragraph> |
|
304 $RCSfile$ |
|
305 ... |
|
306 |
|
307 After running the bibliographic field list transform, the |
|
308 resulting document tree would look like this:: |
|
309 |
|
310 <document> |
|
311 <title> |
|
312 Document Title |
|
313 <docinfo> |
|
314 <author> |
|
315 A. Name |
|
316 <status> |
|
317 frontmatter.py |
|
318 ... |
|
319 |
|
320 The "Status" field contained an expanded RCS keyword, which is |
|
321 normally (but optionally) cleaned up by the transform. The sole |
|
322 contents of the field body must be a paragraph containing an |
|
323 expanded RCS keyword of the form "$keyword: expansion text $". Any |
|
324 RCS keyword can be processed in any bibliographic field. The |
|
325 dollar signs and leading RCS keyword name are removed. Extra |
|
326 processing is done for the following RCS keywords: |
|
327 |
|
328 - "RCSfile" expands to the name of the file in the RCS or CVS |
|
329 repository, which is the name of the source file with a ",v" |
|
330 suffix appended. The transform will remove the ",v" suffix. |
|
331 |
|
332 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC |
|
333 time zone). The RCS Keywords transform will extract just the |
|
334 date itself and transform it to an ISO 8601 format date, as in |
|
335 "2000-12-31". |
|
336 |
|
337 (Since the source file for this text is itself stored under CVS, |
|
338 we can't show an example of the "Date" RCS keyword because we |
|
339 can't prevent any RCS keywords used in this explanation from |
|
340 being expanded. Only the "RCSfile" keyword is stable; its |
|
341 expansion text changes only if the file name changes.) |
|
342 |
|
343 .. _reStructuredText: http://docutils.sf.net/rst.html |
|
344 .. _reStructuredText Markup Specification: |
|
345 http://docutils.sf.net/docs/ref/rst/restructuredtext.html |
|
346 """ |
|
347 |
|
348 default_priority = 340 |
|
349 |
|
350 biblio_nodes = { |
|
351 'author': nodes.author, |
|
352 'authors': nodes.authors, |
|
353 'organization': nodes.organization, |
|
354 'address': nodes.address, |
|
355 'contact': nodes.contact, |
|
356 'version': nodes.version, |
|
357 'revision': nodes.revision, |
|
358 'status': nodes.status, |
|
359 'date': nodes.date, |
|
360 'copyright': nodes.copyright, |
|
361 'dedication': nodes.topic, |
|
362 'abstract': nodes.topic} |
|
363 """Canonical field name (lowcased) to node class name mapping for |
|
364 bibliographic fields (field_list).""" |
|
365 |
|
366 def apply(self): |
|
367 if not getattr(self.document.settings, 'docinfo_xform', 1): |
|
368 return |
|
369 document = self.document |
|
370 index = document.first_child_not_matching_class( |
|
371 nodes.PreBibliographic) |
|
372 if index is None: |
|
373 return |
|
374 candidate = document[index] |
|
375 if isinstance(candidate, nodes.field_list): |
|
376 biblioindex = document.first_child_not_matching_class( |
|
377 (nodes.Titular, nodes.Decorative)) |
|
378 nodelist = self.extract_bibliographic(candidate) |
|
379 del document[index] # untransformed field list (candidate) |
|
380 document[biblioindex:biblioindex] = nodelist |
|
381 |
|
382 def extract_bibliographic(self, field_list): |
|
383 docinfo = nodes.docinfo() |
|
384 bibliofields = self.language.bibliographic_fields |
|
385 labels = self.language.labels |
|
386 topics = {'dedication': None, 'abstract': None} |
|
387 for field in field_list: |
|
388 try: |
|
389 name = field[0][0].astext() |
|
390 normedname = nodes.fully_normalize_name(name) |
|
391 if not (len(field) == 2 and bibliofields.has_key(normedname) |
|
392 and self.check_empty_biblio_field(field, name)): |
|
393 raise TransformError |
|
394 canonical = bibliofields[normedname] |
|
395 biblioclass = self.biblio_nodes[canonical] |
|
396 if issubclass(biblioclass, nodes.TextElement): |
|
397 if not self.check_compound_biblio_field(field, name): |
|
398 raise TransformError |
|
399 utils.clean_rcs_keywords( |
|
400 field[1][0], self.rcs_keyword_substitutions) |
|
401 docinfo.append(biblioclass('', '', *field[1][0])) |
|
402 elif issubclass(biblioclass, nodes.authors): |
|
403 self.extract_authors(field, name, docinfo) |
|
404 elif issubclass(biblioclass, nodes.topic): |
|
405 if topics[canonical]: |
|
406 field[-1] += self.document.reporter.warning( |
|
407 'There can only be one "%s" field.' % name, |
|
408 base_node=field) |
|
409 raise TransformError |
|
410 title = nodes.title(name, labels[canonical]) |
|
411 topics[canonical] = biblioclass( |
|
412 '', title, classes=[canonical], *field[1].children) |
|
413 else: |
|
414 docinfo.append(biblioclass('', *field[1].children)) |
|
415 except TransformError: |
|
416 if len(field[-1]) == 1 \ |
|
417 and isinstance(field[-1][0], nodes.paragraph): |
|
418 utils.clean_rcs_keywords( |
|
419 field[-1][0], self.rcs_keyword_substitutions) |
|
420 docinfo.append(field) |
|
421 nodelist = [] |
|
422 if len(docinfo) != 0: |
|
423 nodelist.append(docinfo) |
|
424 for name in ('dedication', 'abstract'): |
|
425 if topics[name]: |
|
426 nodelist.append(topics[name]) |
|
427 return nodelist |
|
428 |
|
429 def check_empty_biblio_field(self, field, name): |
|
430 if len(field[-1]) < 1: |
|
431 field[-1] += self.document.reporter.warning( |
|
432 'Cannot extract empty bibliographic field "%s".' % name, |
|
433 base_node=field) |
|
434 return None |
|
435 return 1 |
|
436 |
|
437 def check_compound_biblio_field(self, field, name): |
|
438 if len(field[-1]) > 1: |
|
439 field[-1] += self.document.reporter.warning( |
|
440 'Cannot extract compound bibliographic field "%s".' % name, |
|
441 base_node=field) |
|
442 return None |
|
443 if not isinstance(field[-1][0], nodes.paragraph): |
|
444 field[-1] += self.document.reporter.warning( |
|
445 'Cannot extract bibliographic field "%s" containing ' |
|
446 'anything other than a single paragraph.' % name, |
|
447 base_node=field) |
|
448 return None |
|
449 return 1 |
|
450 |
|
451 rcs_keyword_substitutions = [ |
|
452 (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' |
|
453 r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), |
|
454 (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), |
|
455 (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] |
|
456 |
|
457 def extract_authors(self, field, name, docinfo): |
|
458 try: |
|
459 if len(field[1]) == 1: |
|
460 if isinstance(field[1][0], nodes.paragraph): |
|
461 authors = self.authors_from_one_paragraph(field) |
|
462 elif isinstance(field[1][0], nodes.bullet_list): |
|
463 authors = self.authors_from_bullet_list(field) |
|
464 else: |
|
465 raise TransformError |
|
466 else: |
|
467 authors = self.authors_from_paragraphs(field) |
|
468 authornodes = [nodes.author('', '', *author) |
|
469 for author in authors if author] |
|
470 if len(authornodes) >= 1: |
|
471 docinfo.append(nodes.authors('', *authornodes)) |
|
472 else: |
|
473 raise TransformError |
|
474 except TransformError: |
|
475 field[-1] += self.document.reporter.warning( |
|
476 'Bibliographic field "%s" incompatible with extraction: ' |
|
477 'it must contain either a single paragraph (with authors ' |
|
478 'separated by one of "%s"), multiple paragraphs (one per ' |
|
479 'author), or a bullet list with one paragraph (one author) ' |
|
480 'per item.' |
|
481 % (name, ''.join(self.language.author_separators)), |
|
482 base_node=field) |
|
483 raise |
|
484 |
|
485 def authors_from_one_paragraph(self, field): |
|
486 text = field[1][0].astext().strip() |
|
487 if not text: |
|
488 raise TransformError |
|
489 for authorsep in self.language.author_separators: |
|
490 authornames = text.split(authorsep) |
|
491 if len(authornames) > 1: |
|
492 break |
|
493 authornames = [author.strip() for author in authornames] |
|
494 authors = [[nodes.Text(author)] for author in authornames if author] |
|
495 return authors |
|
496 |
|
497 def authors_from_bullet_list(self, field): |
|
498 authors = [] |
|
499 for item in field[1][0]: |
|
500 if len(item) != 1 or not isinstance(item[0], nodes.paragraph): |
|
501 raise TransformError |
|
502 authors.append(item[0].children) |
|
503 if not authors: |
|
504 raise TransformError |
|
505 return authors |
|
506 |
|
507 def authors_from_paragraphs(self, field): |
|
508 for item in field[1]: |
|
509 if not isinstance(item, nodes.paragraph): |
|
510 raise TransformError |
|
511 authors = [item.children for item in field[1]] |
|
512 return authors |