buildframework/helium/external/python/lib/common/docutils-0.5-py2.5.egg/docutils/transforms/frontmatter.py
changeset 179 d8ac696cc51f
equal deleted inserted replaced
1:be27ed110b50 179:d8ac696cc51f
       
     1 # $Id: frontmatter.py 4564 2006-05-21 20:44:42Z wiemann $
       
     2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org>
       
     3 # Copyright: This module has been placed in the public domain.
       
     4 
       
     5 """
       
     6 Transforms related to the front matter of a document or a section
       
     7 (information found before the main text):
       
     8 
       
     9 - `DocTitle`: Used to transform a lone top level section's title to
       
    10   the document title, promote a remaining lone top-level section's
       
    11   title to the document subtitle, and determine the document's title
       
    12   metadata (document['title']) based on the document title and/or the
       
    13   "title" setting.
       
    14 
       
    15 - `SectionSubTitle`: Used to transform a lone subsection into a
       
    16   subtitle.
       
    17 
       
    18 - `DocInfo`: Used to transform a bibliographic field list into docinfo
       
    19   elements.
       
    20 """
       
    21 
       
    22 __docformat__ = 'reStructuredText'
       
    23 
       
    24 import re
       
    25 from docutils import nodes, utils
       
    26 from docutils.transforms import TransformError, Transform
       
    27 
       
    28 
       
    29 class TitlePromoter(Transform):
       
    30 
       
    31     """
       
    32     Abstract base class for DocTitle and SectionSubTitle transforms.
       
    33     """
       
    34 
       
    35     def promote_title(self, node):
       
    36         """
       
    37         Transform the following tree::
       
    38 
       
    39             <node>
       
    40                 <section>
       
    41                     <title>
       
    42                     ...
       
    43 
       
    44         into ::
       
    45 
       
    46             <node>
       
    47                 <title>
       
    48                 ...
       
    49 
       
    50         `node` is normally a document.
       
    51         """
       
    52         # `node` must not have a title yet.
       
    53         assert not (len(node) and isinstance(node[0], nodes.title))
       
    54         section, index = self.candidate_index(node)
       
    55         if index is None:
       
    56             return None
       
    57         # Transfer the section's attributes to the node:
       
    58         node.attributes.update(section.attributes)
       
    59         # setup_child is called automatically for all nodes.
       
    60         node[:] = (section[:1]        # section title
       
    61                    + node[:index]     # everything that was in the
       
    62                                       # node before the section
       
    63                    + section[1:])     # everything that was in the section
       
    64         assert isinstance(node[0], nodes.title)
       
    65         return 1
       
    66 
       
    67     def promote_subtitle(self, node):
       
    68         """
       
    69         Transform the following node tree::
       
    70 
       
    71             <node>
       
    72                 <title>
       
    73                 <section>
       
    74                     <title>
       
    75                     ...
       
    76 
       
    77         into ::
       
    78 
       
    79             <node>
       
    80                 <title>
       
    81                 <subtitle>
       
    82                 ...
       
    83         """
       
    84         subsection, index = self.candidate_index(node)
       
    85         if index is None:
       
    86             return None
       
    87         subtitle = nodes.subtitle()
       
    88         # Transfer the subsection's attributes to the new subtitle:
       
    89         # This causes trouble with list attributes!  To do: Write a
       
    90         # test case which catches direct access to the `attributes`
       
    91         # dictionary and/or write a test case which shows problems in
       
    92         # this particular case.
       
    93         subtitle.attributes.update(subsection.attributes)
       
    94         # We're losing the subtitle's attributes here!  To do: Write a
       
    95         # test case which shows this behavior.
       
    96         # Transfer the contents of the subsection's title to the
       
    97         # subtitle:
       
    98         subtitle[:] = subsection[0][:]
       
    99         node[:] = (node[:1]       # title
       
   100                    + [subtitle]
       
   101                    # everything that was before the section:
       
   102                    + node[1:index]
       
   103                    # everything that was in the subsection:
       
   104                    + subsection[1:])
       
   105         return 1
       
   106 
       
   107     def candidate_index(self, node):
       
   108         """
       
   109         Find and return the promotion candidate and its index.
       
   110 
       
   111         Return (None, None) if no valid candidate was found.
       
   112         """
       
   113         index = node.first_child_not_matching_class(
       
   114             nodes.PreBibliographic)
       
   115         if index is None or len(node) > (index + 1) or \
       
   116                not isinstance(node[index], nodes.section):
       
   117             return None, None
       
   118         else:
       
   119             return node[index], index
       
   120 
       
   121 
       
   122 class DocTitle(TitlePromoter):
       
   123 
       
   124     """
       
   125     In reStructuredText_, there is no way to specify a document title
       
   126     and subtitle explicitly. Instead, we can supply the document title
       
   127     (and possibly the subtitle as well) implicitly, and use this
       
   128     two-step transform to "raise" or "promote" the title(s) (and their
       
   129     corresponding section contents) to the document level.
       
   130 
       
   131     1. If the document contains a single top-level section as its
       
   132        first non-comment element, the top-level section's title
       
   133        becomes the document's title, and the top-level section's
       
   134        contents become the document's immediate contents. The lone
       
   135        top-level section header must be the first non-comment element
       
   136        in the document.
       
   137 
       
   138        For example, take this input text::
       
   139 
       
   140            =================
       
   141             Top-Level Title
       
   142            =================
       
   143 
       
   144            A paragraph.
       
   145 
       
   146        Once parsed, it looks like this::
       
   147 
       
   148            <document>
       
   149                <section names="top-level title">
       
   150                    <title>
       
   151                        Top-Level Title
       
   152                    <paragraph>
       
   153                        A paragraph.
       
   154 
       
   155        After running the DocTitle transform, we have::
       
   156 
       
   157            <document names="top-level title">
       
   158                <title>
       
   159                    Top-Level Title
       
   160                <paragraph>
       
   161                    A paragraph.
       
   162 
       
   163     2. If step 1 successfully determines the document title, we
       
   164        continue by checking for a subtitle.
       
   165 
       
   166        If the lone top-level section itself contains a single
       
   167        second-level section as its first non-comment element, that
       
   168        section's title is promoted to the document's subtitle, and
       
   169        that section's contents become the document's immediate
       
   170        contents. Given this input text::
       
   171 
       
   172            =================
       
   173             Top-Level Title
       
   174            =================
       
   175 
       
   176            Second-Level Title
       
   177            ~~~~~~~~~~~~~~~~~~
       
   178 
       
   179            A paragraph.
       
   180 
       
   181        After parsing and running the Section Promotion transform, the
       
   182        result is::
       
   183 
       
   184            <document names="top-level title">
       
   185                <title>
       
   186                    Top-Level Title
       
   187                <subtitle names="second-level title">
       
   188                    Second-Level Title
       
   189                <paragraph>
       
   190                    A paragraph.
       
   191 
       
   192        (Note that the implicit hyperlink target generated by the
       
   193        "Second-Level Title" is preserved on the "subtitle" element
       
   194        itself.)
       
   195 
       
   196     Any comment elements occurring before the document title or
       
   197     subtitle are accumulated and inserted as the first body elements
       
   198     after the title(s).
       
   199 
       
   200     This transform also sets the document's metadata title
       
   201     (document['title']).
       
   202 
       
   203     .. _reStructuredText: http://docutils.sf.net/rst.html
       
   204     """
       
   205 
       
   206     default_priority = 320
       
   207 
       
   208     def set_metadata(self):
       
   209         """
       
   210         Set document['title'] metadata title from the following
       
   211         sources, listed in order of priority:
       
   212 
       
   213         * Existing document['title'] attribute.
       
   214         * "title" setting.
       
   215         * Document title node (as promoted by promote_title).
       
   216         """
       
   217         if not self.document.hasattr('title'):
       
   218             if self.document.settings.title is not None:
       
   219                 self.document['title'] = self.document.settings.title
       
   220             elif len(self.document) and isinstance(self.document[0], nodes.title):
       
   221                 self.document['title'] = self.document[0].astext()
       
   222 
       
   223     def apply(self):
       
   224         if getattr(self.document.settings, 'doctitle_xform', 1):
       
   225             # promote_(sub)title defined in TitlePromoter base class.
       
   226             if self.promote_title(self.document):
       
   227                 # If a title has been promoted, also try to promote a
       
   228                 # subtitle.
       
   229                 self.promote_subtitle(self.document)
       
   230         # Set document['title'].
       
   231         self.set_metadata()
       
   232 
       
   233 
       
   234 class SectionSubTitle(TitlePromoter):
       
   235 
       
   236     """
       
   237     This works like document subtitles, but for sections.  For example, ::
       
   238 
       
   239         <section>
       
   240             <title>
       
   241                 Title
       
   242             <section>
       
   243                 <title>
       
   244                     Subtitle
       
   245                 ...
       
   246 
       
   247     is transformed into ::
       
   248 
       
   249         <section>
       
   250             <title>
       
   251                 Title
       
   252             <subtitle>
       
   253                 Subtitle
       
   254             ...
       
   255 
       
   256     For details refer to the docstring of DocTitle.
       
   257     """
       
   258 
       
   259     default_priority = 350
       
   260 
       
   261     def apply(self):
       
   262         if not getattr(self.document.settings, 'sectsubtitle_xform', 1):
       
   263             return
       
   264         for section in self.document.traverse(nodes.section):
       
   265             # On our way through the node tree, we are deleting
       
   266             # sections, but we call self.promote_subtitle for those
       
   267             # sections nonetheless.  To do: Write a test case which
       
   268             # shows the problem and discuss on Docutils-develop.
       
   269             self.promote_subtitle(section)
       
   270 
       
   271 
       
   272 class DocInfo(Transform):
       
   273 
       
   274     """
       
   275     This transform is specific to the reStructuredText_ markup syntax;
       
   276     see "Bibliographic Fields" in the `reStructuredText Markup
       
   277     Specification`_ for a high-level description. This transform
       
   278     should be run *after* the `DocTitle` transform.
       
   279 
       
   280     Given a field list as the first non-comment element after the
       
   281     document title and subtitle (if present), registered bibliographic
       
   282     field names are transformed to the corresponding DTD elements,
       
   283     becoming child elements of the "docinfo" element (except for a
       
   284     dedication and/or an abstract, which become "topic" elements after
       
   285     "docinfo").
       
   286 
       
   287     For example, given this document fragment after parsing::
       
   288 
       
   289         <document>
       
   290             <title>
       
   291                 Document Title
       
   292             <field_list>
       
   293                 <field>
       
   294                     <field_name>
       
   295                         Author
       
   296                     <field_body>
       
   297                         <paragraph>
       
   298                             A. Name
       
   299                 <field>
       
   300                     <field_name>
       
   301                         Status
       
   302                     <field_body>
       
   303                         <paragraph>
       
   304                             $RCSfile$
       
   305             ...
       
   306 
       
   307     After running the bibliographic field list transform, the
       
   308     resulting document tree would look like this::
       
   309 
       
   310         <document>
       
   311             <title>
       
   312                 Document Title
       
   313             <docinfo>
       
   314                 <author>
       
   315                     A. Name
       
   316                 <status>
       
   317                     frontmatter.py
       
   318             ...
       
   319 
       
   320     The "Status" field contained an expanded RCS keyword, which is
       
   321     normally (but optionally) cleaned up by the transform. The sole
       
   322     contents of the field body must be a paragraph containing an
       
   323     expanded RCS keyword of the form "$keyword: expansion text $". Any
       
   324     RCS keyword can be processed in any bibliographic field. The
       
   325     dollar signs and leading RCS keyword name are removed. Extra
       
   326     processing is done for the following RCS keywords:
       
   327 
       
   328     - "RCSfile" expands to the name of the file in the RCS or CVS
       
   329       repository, which is the name of the source file with a ",v"
       
   330       suffix appended. The transform will remove the ",v" suffix.
       
   331 
       
   332     - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC
       
   333       time zone). The RCS Keywords transform will extract just the
       
   334       date itself and transform it to an ISO 8601 format date, as in
       
   335       "2000-12-31".
       
   336 
       
   337       (Since the source file for this text is itself stored under CVS,
       
   338       we can't show an example of the "Date" RCS keyword because we
       
   339       can't prevent any RCS keywords used in this explanation from
       
   340       being expanded. Only the "RCSfile" keyword is stable; its
       
   341       expansion text changes only if the file name changes.)
       
   342 
       
   343     .. _reStructuredText: http://docutils.sf.net/rst.html
       
   344     .. _reStructuredText Markup Specification:
       
   345        http://docutils.sf.net/docs/ref/rst/restructuredtext.html
       
   346     """
       
   347 
       
   348     default_priority = 340
       
   349 
       
   350     biblio_nodes = {
       
   351           'author': nodes.author,
       
   352           'authors': nodes.authors,
       
   353           'organization': nodes.organization,
       
   354           'address': nodes.address,
       
   355           'contact': nodes.contact,
       
   356           'version': nodes.version,
       
   357           'revision': nodes.revision,
       
   358           'status': nodes.status,
       
   359           'date': nodes.date,
       
   360           'copyright': nodes.copyright,
       
   361           'dedication': nodes.topic,
       
   362           'abstract': nodes.topic}
       
   363     """Canonical field name (lowcased) to node class name mapping for
       
   364     bibliographic fields (field_list)."""
       
   365 
       
   366     def apply(self):
       
   367         if not getattr(self.document.settings, 'docinfo_xform', 1):
       
   368             return
       
   369         document = self.document
       
   370         index = document.first_child_not_matching_class(
       
   371               nodes.PreBibliographic)
       
   372         if index is None:
       
   373             return
       
   374         candidate = document[index]
       
   375         if isinstance(candidate, nodes.field_list):
       
   376             biblioindex = document.first_child_not_matching_class(
       
   377                   (nodes.Titular, nodes.Decorative))
       
   378             nodelist = self.extract_bibliographic(candidate)
       
   379             del document[index]         # untransformed field list (candidate)
       
   380             document[biblioindex:biblioindex] = nodelist
       
   381 
       
   382     def extract_bibliographic(self, field_list):
       
   383         docinfo = nodes.docinfo()
       
   384         bibliofields = self.language.bibliographic_fields
       
   385         labels = self.language.labels
       
   386         topics = {'dedication': None, 'abstract': None}
       
   387         for field in field_list:
       
   388             try:
       
   389                 name = field[0][0].astext()
       
   390                 normedname = nodes.fully_normalize_name(name)
       
   391                 if not (len(field) == 2 and bibliofields.has_key(normedname)
       
   392                         and self.check_empty_biblio_field(field, name)):
       
   393                     raise TransformError
       
   394                 canonical = bibliofields[normedname]
       
   395                 biblioclass = self.biblio_nodes[canonical]
       
   396                 if issubclass(biblioclass, nodes.TextElement):
       
   397                     if not self.check_compound_biblio_field(field, name):
       
   398                         raise TransformError
       
   399                     utils.clean_rcs_keywords(
       
   400                           field[1][0], self.rcs_keyword_substitutions)
       
   401                     docinfo.append(biblioclass('', '', *field[1][0]))
       
   402                 elif issubclass(biblioclass, nodes.authors):
       
   403                     self.extract_authors(field, name, docinfo)
       
   404                 elif issubclass(biblioclass, nodes.topic):
       
   405                     if topics[canonical]:
       
   406                         field[-1] += self.document.reporter.warning(
       
   407                             'There can only be one "%s" field.' % name,
       
   408                             base_node=field)
       
   409                         raise TransformError
       
   410                     title = nodes.title(name, labels[canonical])
       
   411                     topics[canonical] = biblioclass(
       
   412                         '', title, classes=[canonical], *field[1].children)
       
   413                 else:
       
   414                     docinfo.append(biblioclass('', *field[1].children))
       
   415             except TransformError:
       
   416                 if len(field[-1]) == 1 \
       
   417                        and isinstance(field[-1][0], nodes.paragraph):
       
   418                     utils.clean_rcs_keywords(
       
   419                         field[-1][0], self.rcs_keyword_substitutions)
       
   420                 docinfo.append(field)
       
   421         nodelist = []
       
   422         if len(docinfo) != 0:
       
   423             nodelist.append(docinfo)
       
   424         for name in ('dedication', 'abstract'):
       
   425             if topics[name]:
       
   426                 nodelist.append(topics[name])
       
   427         return nodelist
       
   428 
       
   429     def check_empty_biblio_field(self, field, name):
       
   430         if len(field[-1]) < 1:
       
   431             field[-1] += self.document.reporter.warning(
       
   432                   'Cannot extract empty bibliographic field "%s".' % name,
       
   433                   base_node=field)
       
   434             return None
       
   435         return 1
       
   436 
       
   437     def check_compound_biblio_field(self, field, name):
       
   438         if len(field[-1]) > 1:
       
   439             field[-1] += self.document.reporter.warning(
       
   440                   'Cannot extract compound bibliographic field "%s".' % name,
       
   441                   base_node=field)
       
   442             return None
       
   443         if not isinstance(field[-1][0], nodes.paragraph):
       
   444             field[-1] += self.document.reporter.warning(
       
   445                   'Cannot extract bibliographic field "%s" containing '
       
   446                   'anything other than a single paragraph.' % name,
       
   447                   base_node=field)
       
   448             return None
       
   449         return 1
       
   450 
       
   451     rcs_keyword_substitutions = [
       
   452           (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+'
       
   453                       r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'),
       
   454           (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'),
       
   455           (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),]
       
   456 
       
   457     def extract_authors(self, field, name, docinfo):
       
   458         try:
       
   459             if len(field[1]) == 1:
       
   460                 if isinstance(field[1][0], nodes.paragraph):
       
   461                     authors = self.authors_from_one_paragraph(field)
       
   462                 elif isinstance(field[1][0], nodes.bullet_list):
       
   463                     authors = self.authors_from_bullet_list(field)
       
   464                 else:
       
   465                     raise TransformError
       
   466             else:
       
   467                 authors = self.authors_from_paragraphs(field)
       
   468             authornodes = [nodes.author('', '', *author)
       
   469                            for author in authors if author]
       
   470             if len(authornodes) >= 1:
       
   471                 docinfo.append(nodes.authors('', *authornodes))
       
   472             else:
       
   473                 raise TransformError
       
   474         except TransformError:
       
   475             field[-1] += self.document.reporter.warning(
       
   476                   'Bibliographic field "%s" incompatible with extraction: '
       
   477                   'it must contain either a single paragraph (with authors '
       
   478                   'separated by one of "%s"), multiple paragraphs (one per '
       
   479                   'author), or a bullet list with one paragraph (one author) '
       
   480                   'per item.'
       
   481                   % (name, ''.join(self.language.author_separators)),
       
   482                   base_node=field)
       
   483             raise
       
   484 
       
   485     def authors_from_one_paragraph(self, field):
       
   486         text = field[1][0].astext().strip()
       
   487         if not text:
       
   488             raise TransformError
       
   489         for authorsep in self.language.author_separators:
       
   490             authornames = text.split(authorsep)
       
   491             if len(authornames) > 1:
       
   492                 break
       
   493         authornames = [author.strip() for author in authornames]
       
   494         authors = [[nodes.Text(author)] for author in authornames if author]
       
   495         return authors
       
   496 
       
   497     def authors_from_bullet_list(self, field):
       
   498         authors = []
       
   499         for item in field[1][0]:
       
   500             if len(item) != 1 or not isinstance(item[0], nodes.paragraph):
       
   501                 raise TransformError
       
   502             authors.append(item[0].children)
       
   503         if not authors:
       
   504             raise TransformError
       
   505         return authors
       
   506 
       
   507     def authors_from_paragraphs(self, field):
       
   508         for item in field[1]:
       
   509             if not isinstance(item, nodes.paragraph):
       
   510                 raise TransformError
       
   511         authors = [item.children for item in field[1]]
       
   512         return authors