179
|
1 |
# $Id: states.py 4824 2006-12-09 00:59:23Z goodger $
|
|
2 |
# Author: David Goodger <goodger@python.org>
|
|
3 |
# Copyright: This module has been placed in the public domain.
|
|
4 |
|
|
5 |
"""
|
|
6 |
This is the ``docutils.parsers.restructuredtext.states`` module, the core of
|
|
7 |
the reStructuredText parser. It defines the following:
|
|
8 |
|
|
9 |
:Classes:
|
|
10 |
- `RSTStateMachine`: reStructuredText parser's entry point.
|
|
11 |
- `NestedStateMachine`: recursive StateMachine.
|
|
12 |
- `RSTState`: reStructuredText State superclass.
|
|
13 |
- `Inliner`: For parsing inline markup.
|
|
14 |
- `Body`: Generic classifier of the first line of a block.
|
|
15 |
- `SpecializedBody`: Superclass for compound element members.
|
|
16 |
- `BulletList`: Second and subsequent bullet_list list_items
|
|
17 |
- `DefinitionList`: Second+ definition_list_items.
|
|
18 |
- `EnumeratedList`: Second+ enumerated_list list_items.
|
|
19 |
- `FieldList`: Second+ fields.
|
|
20 |
- `OptionList`: Second+ option_list_items.
|
|
21 |
- `RFC2822List`: Second+ RFC2822-style fields.
|
|
22 |
- `ExtensionOptions`: Parses directive option fields.
|
|
23 |
- `Explicit`: Second+ explicit markup constructs.
|
|
24 |
- `SubstitutionDef`: For embedded directives in substitution definitions.
|
|
25 |
- `Text`: Classifier of second line of a text block.
|
|
26 |
- `SpecializedText`: Superclass for continuation lines of Text-variants.
|
|
27 |
- `Definition`: Second line of potential definition_list_item.
|
|
28 |
- `Line`: Second line of overlined section title or transition marker.
|
|
29 |
- `Struct`: An auxiliary collection class.
|
|
30 |
|
|
31 |
:Exception classes:
|
|
32 |
- `MarkupError`
|
|
33 |
- `ParserError`
|
|
34 |
- `MarkupMismatch`
|
|
35 |
|
|
36 |
:Functions:
|
|
37 |
- `escape2null()`: Return a string, escape-backslashes converted to nulls.
|
|
38 |
- `unescape()`: Return a string, nulls removed or restored to backslashes.
|
|
39 |
|
|
40 |
:Attributes:
|
|
41 |
- `state_classes`: set of State classes used with `RSTStateMachine`.
|
|
42 |
|
|
43 |
Parser Overview
|
|
44 |
===============
|
|
45 |
|
|
46 |
The reStructuredText parser is implemented as a recursive state machine,
|
|
47 |
examining its input one line at a time. To understand how the parser works,
|
|
48 |
please first become familiar with the `docutils.statemachine` module. In the
|
|
49 |
description below, references are made to classes defined in this module;
|
|
50 |
please see the individual classes for details.
|
|
51 |
|
|
52 |
Parsing proceeds as follows:
|
|
53 |
|
|
54 |
1. The state machine examines each line of input, checking each of the
|
|
55 |
transition patterns of the state `Body`, in order, looking for a match.
|
|
56 |
The implicit transitions (blank lines and indentation) are checked before
|
|
57 |
any others. The 'text' transition is a catch-all (matches anything).
|
|
58 |
|
|
59 |
2. The method associated with the matched transition pattern is called.
|
|
60 |
|
|
61 |
A. Some transition methods are self-contained, appending elements to the
|
|
62 |
document tree (`Body.doctest` parses a doctest block). The parser's
|
|
63 |
current line index is advanced to the end of the element, and parsing
|
|
64 |
continues with step 1.
|
|
65 |
|
|
66 |
B. Other transition methods trigger the creation of a nested state machine,
|
|
67 |
whose job is to parse a compound construct ('indent' does a block quote,
|
|
68 |
'bullet' does a bullet list, 'overline' does a section [first checking
|
|
69 |
for a valid section header], etc.).
|
|
70 |
|
|
71 |
- In the case of lists and explicit markup, a one-off state machine is
|
|
72 |
created and run to parse contents of the first item.
|
|
73 |
|
|
74 |
- A new state machine is created and its initial state is set to the
|
|
75 |
appropriate specialized state (`BulletList` in the case of the
|
|
76 |
'bullet' transition; see `SpecializedBody` for more detail). This
|
|
77 |
state machine is run to parse the compound element (or series of
|
|
78 |
explicit markup elements), and returns as soon as a non-member element
|
|
79 |
is encountered. For example, the `BulletList` state machine ends as
|
|
80 |
soon as it encounters an element which is not a list item of that
|
|
81 |
bullet list. The optional omission of inter-element blank lines is
|
|
82 |
enabled by this nested state machine.
|
|
83 |
|
|
84 |
- The current line index is advanced to the end of the elements parsed,
|
|
85 |
and parsing continues with step 1.
|
|
86 |
|
|
87 |
C. The result of the 'text' transition depends on the next line of text.
|
|
88 |
The current state is changed to `Text`, under which the second line is
|
|
89 |
examined. If the second line is:
|
|
90 |
|
|
91 |
- Indented: The element is a definition list item, and parsing proceeds
|
|
92 |
similarly to step 2.B, using the `DefinitionList` state.
|
|
93 |
|
|
94 |
- A line of uniform punctuation characters: The element is a section
|
|
95 |
header; again, parsing proceeds as in step 2.B, and `Body` is still
|
|
96 |
used.
|
|
97 |
|
|
98 |
- Anything else: The element is a paragraph, which is examined for
|
|
99 |
inline markup and appended to the parent element. Processing
|
|
100 |
continues with step 1.
|
|
101 |
"""
|
|
102 |
|
|
103 |
__docformat__ = 'reStructuredText'
|
|
104 |
|
|
105 |
|
|
106 |
import sys
|
|
107 |
import re
|
|
108 |
import roman
|
|
109 |
from types import TupleType, FunctionType, MethodType
|
|
110 |
from docutils import nodes, statemachine, utils, urischemes
|
|
111 |
from docutils import ApplicationError, DataError
|
|
112 |
from docutils.statemachine import StateMachineWS, StateWS
|
|
113 |
from docutils.nodes import fully_normalize_name as normalize_name
|
|
114 |
from docutils.nodes import whitespace_normalize_name
|
|
115 |
from docutils.utils import escape2null, unescape, column_width
|
|
116 |
import docutils.parsers.rst
|
|
117 |
from docutils.parsers.rst import directives, languages, tableparser, roles
|
|
118 |
from docutils.parsers.rst.languages import en as _fallback_language_module
|
|
119 |
|
|
120 |
|
|
121 |
class MarkupError(DataError): pass
|
|
122 |
class UnknownInterpretedRoleError(DataError): pass
|
|
123 |
class InterpretedRoleNotImplementedError(DataError): pass
|
|
124 |
class ParserError(ApplicationError): pass
|
|
125 |
class MarkupMismatch(Exception): pass
|
|
126 |
|
|
127 |
|
|
128 |
class Struct:
|
|
129 |
|
|
130 |
"""Stores data attributes for dotted-attribute access."""
|
|
131 |
|
|
132 |
def __init__(self, **keywordargs):
|
|
133 |
self.__dict__.update(keywordargs)
|
|
134 |
|
|
135 |
|
|
136 |
class RSTStateMachine(StateMachineWS):
|
|
137 |
|
|
138 |
"""
|
|
139 |
reStructuredText's master StateMachine.
|
|
140 |
|
|
141 |
The entry point to reStructuredText parsing is the `run()` method.
|
|
142 |
"""
|
|
143 |
|
|
144 |
def run(self, input_lines, document, input_offset=0, match_titles=1,
|
|
145 |
inliner=None):
|
|
146 |
"""
|
|
147 |
Parse `input_lines` and modify the `document` node in place.
|
|
148 |
|
|
149 |
Extend `StateMachineWS.run()`: set up parse-global data and
|
|
150 |
run the StateMachine.
|
|
151 |
"""
|
|
152 |
self.language = languages.get_language(
|
|
153 |
document.settings.language_code)
|
|
154 |
self.match_titles = match_titles
|
|
155 |
if inliner is None:
|
|
156 |
inliner = Inliner()
|
|
157 |
inliner.init_customizations(document.settings)
|
|
158 |
self.memo = Struct(document=document,
|
|
159 |
reporter=document.reporter,
|
|
160 |
language=self.language,
|
|
161 |
title_styles=[],
|
|
162 |
section_level=0,
|
|
163 |
section_bubble_up_kludge=0,
|
|
164 |
inliner=inliner)
|
|
165 |
self.document = document
|
|
166 |
self.attach_observer(document.note_source)
|
|
167 |
self.reporter = self.memo.reporter
|
|
168 |
self.node = document
|
|
169 |
results = StateMachineWS.run(self, input_lines, input_offset,
|
|
170 |
input_source=document['source'])
|
|
171 |
assert results == [], 'RSTStateMachine.run() results should be empty!'
|
|
172 |
self.node = self.memo = None # remove unneeded references
|
|
173 |
|
|
174 |
|
|
175 |
class NestedStateMachine(StateMachineWS):
|
|
176 |
|
|
177 |
"""
|
|
178 |
StateMachine run from within other StateMachine runs, to parse nested
|
|
179 |
document structures.
|
|
180 |
"""
|
|
181 |
|
|
182 |
def run(self, input_lines, input_offset, memo, node, match_titles=1):
|
|
183 |
"""
|
|
184 |
Parse `input_lines` and populate a `docutils.nodes.document` instance.
|
|
185 |
|
|
186 |
Extend `StateMachineWS.run()`: set up document-wide data.
|
|
187 |
"""
|
|
188 |
self.match_titles = match_titles
|
|
189 |
self.memo = memo
|
|
190 |
self.document = memo.document
|
|
191 |
self.attach_observer(self.document.note_source)
|
|
192 |
self.reporter = memo.reporter
|
|
193 |
self.language = memo.language
|
|
194 |
self.node = node
|
|
195 |
results = StateMachineWS.run(self, input_lines, input_offset)
|
|
196 |
assert results == [], ('NestedStateMachine.run() results should be '
|
|
197 |
'empty!')
|
|
198 |
return results
|
|
199 |
|
|
200 |
|
|
201 |
class RSTState(StateWS):
|
|
202 |
|
|
203 |
"""
|
|
204 |
reStructuredText State superclass.
|
|
205 |
|
|
206 |
Contains methods used by all State subclasses.
|
|
207 |
"""
|
|
208 |
|
|
209 |
nested_sm = NestedStateMachine
|
|
210 |
|
|
211 |
def __init__(self, state_machine, debug=0):
|
|
212 |
self.nested_sm_kwargs = {'state_classes': state_classes,
|
|
213 |
'initial_state': 'Body'}
|
|
214 |
StateWS.__init__(self, state_machine, debug)
|
|
215 |
|
|
216 |
def runtime_init(self):
|
|
217 |
StateWS.runtime_init(self)
|
|
218 |
memo = self.state_machine.memo
|
|
219 |
self.memo = memo
|
|
220 |
self.reporter = memo.reporter
|
|
221 |
self.inliner = memo.inliner
|
|
222 |
self.document = memo.document
|
|
223 |
self.parent = self.state_machine.node
|
|
224 |
|
|
225 |
def goto_line(self, abs_line_offset):
|
|
226 |
"""
|
|
227 |
Jump to input line `abs_line_offset`, ignoring jumps past the end.
|
|
228 |
"""
|
|
229 |
try:
|
|
230 |
self.state_machine.goto_line(abs_line_offset)
|
|
231 |
except EOFError:
|
|
232 |
pass
|
|
233 |
|
|
234 |
def no_match(self, context, transitions):
|
|
235 |
"""
|
|
236 |
Override `StateWS.no_match` to generate a system message.
|
|
237 |
|
|
238 |
This code should never be run.
|
|
239 |
"""
|
|
240 |
self.reporter.severe(
|
|
241 |
'Internal error: no transition pattern match. State: "%s"; '
|
|
242 |
'transitions: %s; context: %s; current line: %r.'
|
|
243 |
% (self.__class__.__name__, transitions, context,
|
|
244 |
self.state_machine.line),
|
|
245 |
line=self.state_machine.abs_line_number())
|
|
246 |
return context, None, []
|
|
247 |
|
|
248 |
def bof(self, context):
|
|
249 |
"""Called at beginning of file."""
|
|
250 |
return [], []
|
|
251 |
|
|
252 |
def nested_parse(self, block, input_offset, node, match_titles=0,
|
|
253 |
state_machine_class=None, state_machine_kwargs=None):
|
|
254 |
"""
|
|
255 |
Create a new StateMachine rooted at `node` and run it over the input
|
|
256 |
`block`.
|
|
257 |
"""
|
|
258 |
if state_machine_class is None:
|
|
259 |
state_machine_class = self.nested_sm
|
|
260 |
if state_machine_kwargs is None:
|
|
261 |
state_machine_kwargs = self.nested_sm_kwargs
|
|
262 |
block_length = len(block)
|
|
263 |
state_machine = state_machine_class(debug=self.debug,
|
|
264 |
**state_machine_kwargs)
|
|
265 |
state_machine.run(block, input_offset, memo=self.memo,
|
|
266 |
node=node, match_titles=match_titles)
|
|
267 |
state_machine.unlink()
|
|
268 |
new_offset = state_machine.abs_line_offset()
|
|
269 |
# No `block.parent` implies disconnected -- lines aren't in sync:
|
|
270 |
if block.parent and (len(block) - block_length) != 0:
|
|
271 |
# Adjustment for block if modified in nested parse:
|
|
272 |
self.state_machine.next_line(len(block) - block_length)
|
|
273 |
return new_offset
|
|
274 |
|
|
275 |
def nested_list_parse(self, block, input_offset, node, initial_state,
|
|
276 |
blank_finish,
|
|
277 |
blank_finish_state=None,
|
|
278 |
extra_settings={},
|
|
279 |
match_titles=0,
|
|
280 |
state_machine_class=None,
|
|
281 |
state_machine_kwargs=None):
|
|
282 |
"""
|
|
283 |
Create a new StateMachine rooted at `node` and run it over the input
|
|
284 |
`block`. Also keep track of optional intermediate blank lines and the
|
|
285 |
required final one.
|
|
286 |
"""
|
|
287 |
if state_machine_class is None:
|
|
288 |
state_machine_class = self.nested_sm
|
|
289 |
if state_machine_kwargs is None:
|
|
290 |
state_machine_kwargs = self.nested_sm_kwargs.copy()
|
|
291 |
state_machine_kwargs['initial_state'] = initial_state
|
|
292 |
state_machine = state_machine_class(debug=self.debug,
|
|
293 |
**state_machine_kwargs)
|
|
294 |
if blank_finish_state is None:
|
|
295 |
blank_finish_state = initial_state
|
|
296 |
state_machine.states[blank_finish_state].blank_finish = blank_finish
|
|
297 |
for key, value in extra_settings.items():
|
|
298 |
setattr(state_machine.states[initial_state], key, value)
|
|
299 |
state_machine.run(block, input_offset, memo=self.memo,
|
|
300 |
node=node, match_titles=match_titles)
|
|
301 |
blank_finish = state_machine.states[blank_finish_state].blank_finish
|
|
302 |
state_machine.unlink()
|
|
303 |
return state_machine.abs_line_offset(), blank_finish
|
|
304 |
|
|
305 |
def section(self, title, source, style, lineno, messages):
|
|
306 |
"""Check for a valid subsection and create one if it checks out."""
|
|
307 |
if self.check_subsection(source, style, lineno):
|
|
308 |
self.new_subsection(title, lineno, messages)
|
|
309 |
|
|
310 |
def check_subsection(self, source, style, lineno):
|
|
311 |
"""
|
|
312 |
Check for a valid subsection header. Return 1 (true) or None (false).
|
|
313 |
|
|
314 |
When a new section is reached that isn't a subsection of the current
|
|
315 |
section, back up the line count (use ``previous_line(-x)``), then
|
|
316 |
``raise EOFError``. The current StateMachine will finish, then the
|
|
317 |
calling StateMachine can re-examine the title. This will work its way
|
|
318 |
back up the calling chain until the correct section level isreached.
|
|
319 |
|
|
320 |
@@@ Alternative: Evaluate the title, store the title info & level, and
|
|
321 |
back up the chain until that level is reached. Store in memo? Or
|
|
322 |
return in results?
|
|
323 |
|
|
324 |
:Exception: `EOFError` when a sibling or supersection encountered.
|
|
325 |
"""
|
|
326 |
memo = self.memo
|
|
327 |
title_styles = memo.title_styles
|
|
328 |
mylevel = memo.section_level
|
|
329 |
try: # check for existing title style
|
|
330 |
level = title_styles.index(style) + 1
|
|
331 |
except ValueError: # new title style
|
|
332 |
if len(title_styles) == memo.section_level: # new subsection
|
|
333 |
title_styles.append(style)
|
|
334 |
return 1
|
|
335 |
else: # not at lowest level
|
|
336 |
self.parent += self.title_inconsistent(source, lineno)
|
|
337 |
return None
|
|
338 |
if level <= mylevel: # sibling or supersection
|
|
339 |
memo.section_level = level # bubble up to parent section
|
|
340 |
if len(style) == 2:
|
|
341 |
memo.section_bubble_up_kludge = 1
|
|
342 |
# back up 2 lines for underline title, 3 for overline title
|
|
343 |
self.state_machine.previous_line(len(style) + 1)
|
|
344 |
raise EOFError # let parent section re-evaluate
|
|
345 |
if level == mylevel + 1: # immediate subsection
|
|
346 |
return 1
|
|
347 |
else: # invalid subsection
|
|
348 |
self.parent += self.title_inconsistent(source, lineno)
|
|
349 |
return None
|
|
350 |
|
|
351 |
def title_inconsistent(self, sourcetext, lineno):
|
|
352 |
error = self.reporter.severe(
|
|
353 |
'Title level inconsistent:', nodes.literal_block('', sourcetext),
|
|
354 |
line=lineno)
|
|
355 |
return error
|
|
356 |
|
|
357 |
def new_subsection(self, title, lineno, messages):
|
|
358 |
"""Append new subsection to document tree. On return, check level."""
|
|
359 |
memo = self.memo
|
|
360 |
mylevel = memo.section_level
|
|
361 |
memo.section_level += 1
|
|
362 |
section_node = nodes.section()
|
|
363 |
self.parent += section_node
|
|
364 |
textnodes, title_messages = self.inline_text(title, lineno)
|
|
365 |
titlenode = nodes.title(title, '', *textnodes)
|
|
366 |
name = normalize_name(titlenode.astext())
|
|
367 |
section_node['names'].append(name)
|
|
368 |
section_node += titlenode
|
|
369 |
section_node += messages
|
|
370 |
section_node += title_messages
|
|
371 |
self.document.note_implicit_target(section_node, section_node)
|
|
372 |
offset = self.state_machine.line_offset + 1
|
|
373 |
absoffset = self.state_machine.abs_line_offset() + 1
|
|
374 |
newabsoffset = self.nested_parse(
|
|
375 |
self.state_machine.input_lines[offset:], input_offset=absoffset,
|
|
376 |
node=section_node, match_titles=1)
|
|
377 |
self.goto_line(newabsoffset)
|
|
378 |
if memo.section_level <= mylevel: # can't handle next section?
|
|
379 |
raise EOFError # bubble up to supersection
|
|
380 |
# reset section_level; next pass will detect it properly
|
|
381 |
memo.section_level = mylevel
|
|
382 |
|
|
383 |
def paragraph(self, lines, lineno):
|
|
384 |
"""
|
|
385 |
Return a list (paragraph & messages) & a boolean: literal_block next?
|
|
386 |
"""
|
|
387 |
data = '\n'.join(lines).rstrip()
|
|
388 |
if re.search(r'(?<!\\)(\\\\)*::$', data):
|
|
389 |
if len(data) == 2:
|
|
390 |
return [], 1
|
|
391 |
elif data[-3] in ' \n':
|
|
392 |
text = data[:-3].rstrip()
|
|
393 |
else:
|
|
394 |
text = data[:-1]
|
|
395 |
literalnext = 1
|
|
396 |
else:
|
|
397 |
text = data
|
|
398 |
literalnext = 0
|
|
399 |
textnodes, messages = self.inline_text(text, lineno)
|
|
400 |
p = nodes.paragraph(data, '', *textnodes)
|
|
401 |
p.line = lineno
|
|
402 |
return [p] + messages, literalnext
|
|
403 |
|
|
404 |
def inline_text(self, text, lineno):
|
|
405 |
"""
|
|
406 |
Return 2 lists: nodes (text and inline elements), and system_messages.
|
|
407 |
"""
|
|
408 |
return self.inliner.parse(text, lineno, self.memo, self.parent)
|
|
409 |
|
|
410 |
def unindent_warning(self, node_name):
|
|
411 |
return self.reporter.warning(
|
|
412 |
'%s ends without a blank line; unexpected unindent.' % node_name,
|
|
413 |
line=(self.state_machine.abs_line_number() + 1))
|
|
414 |
|
|
415 |
|
|
416 |
def build_regexp(definition, compile=1):
|
|
417 |
"""
|
|
418 |
Build, compile and return a regular expression based on `definition`.
|
|
419 |
|
|
420 |
:Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
|
|
421 |
where "parts" is a list of regular expressions and/or regular
|
|
422 |
expression definitions to be joined into an or-group.
|
|
423 |
"""
|
|
424 |
name, prefix, suffix, parts = definition
|
|
425 |
part_strings = []
|
|
426 |
for part in parts:
|
|
427 |
if type(part) is TupleType:
|
|
428 |
part_strings.append(build_regexp(part, None))
|
|
429 |
else:
|
|
430 |
part_strings.append(part)
|
|
431 |
or_group = '|'.join(part_strings)
|
|
432 |
regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
|
|
433 |
if compile:
|
|
434 |
return re.compile(regexp, re.UNICODE)
|
|
435 |
else:
|
|
436 |
return regexp
|
|
437 |
|
|
438 |
|
|
439 |
class Inliner:
|
|
440 |
|
|
441 |
"""
|
|
442 |
Parse inline markup; call the `parse()` method.
|
|
443 |
"""
|
|
444 |
|
|
445 |
def __init__(self):
|
|
446 |
self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
|
|
447 |
"""List of (pattern, bound method) tuples, used by
|
|
448 |
`self.implicit_inline`."""
|
|
449 |
|
|
450 |
def init_customizations(self, settings):
|
|
451 |
"""Setting-based customizations; run when parsing begins."""
|
|
452 |
if settings.pep_references:
|
|
453 |
self.implicit_dispatch.append((self.patterns.pep,
|
|
454 |
self.pep_reference))
|
|
455 |
if settings.rfc_references:
|
|
456 |
self.implicit_dispatch.append((self.patterns.rfc,
|
|
457 |
self.rfc_reference))
|
|
458 |
|
|
459 |
def parse(self, text, lineno, memo, parent):
|
|
460 |
# Needs to be refactored for nested inline markup.
|
|
461 |
# Add nested_parse() method?
|
|
462 |
"""
|
|
463 |
Return 2 lists: nodes (text and inline elements), and system_messages.
|
|
464 |
|
|
465 |
Using `self.patterns.initial`, a pattern which matches start-strings
|
|
466 |
(emphasis, strong, interpreted, phrase reference, literal,
|
|
467 |
substitution reference, and inline target) and complete constructs
|
|
468 |
(simple reference, footnote reference), search for a candidate. When
|
|
469 |
one is found, check for validity (e.g., not a quoted '*' character).
|
|
470 |
If valid, search for the corresponding end string if applicable, and
|
|
471 |
check it for validity. If not found or invalid, generate a warning
|
|
472 |
and ignore the start-string. Implicit inline markup (e.g. standalone
|
|
473 |
URIs) is found last.
|
|
474 |
"""
|
|
475 |
self.reporter = memo.reporter
|
|
476 |
self.document = memo.document
|
|
477 |
self.language = memo.language
|
|
478 |
self.parent = parent
|
|
479 |
pattern_search = self.patterns.initial.search
|
|
480 |
dispatch = self.dispatch
|
|
481 |
remaining = escape2null(text)
|
|
482 |
processed = []
|
|
483 |
unprocessed = []
|
|
484 |
messages = []
|
|
485 |
while remaining:
|
|
486 |
match = pattern_search(remaining)
|
|
487 |
if match:
|
|
488 |
groups = match.groupdict()
|
|
489 |
method = dispatch[groups['start'] or groups['backquote']
|
|
490 |
or groups['refend'] or groups['fnend']]
|
|
491 |
before, inlines, remaining, sysmessages = method(self, match,
|
|
492 |
lineno)
|
|
493 |
unprocessed.append(before)
|
|
494 |
messages += sysmessages
|
|
495 |
if inlines:
|
|
496 |
processed += self.implicit_inline(''.join(unprocessed),
|
|
497 |
lineno)
|
|
498 |
processed += inlines
|
|
499 |
unprocessed = []
|
|
500 |
else:
|
|
501 |
break
|
|
502 |
remaining = ''.join(unprocessed) + remaining
|
|
503 |
if remaining:
|
|
504 |
processed += self.implicit_inline(remaining, lineno)
|
|
505 |
return processed, messages
|
|
506 |
|
|
507 |
openers = '\'"([{<'
|
|
508 |
closers = '\'")]}>'
|
|
509 |
start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers))
|
|
510 |
end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))'
|
|
511 |
% re.escape(closers))
|
|
512 |
non_whitespace_before = r'(?<![ \n])'
|
|
513 |
non_whitespace_escape_before = r'(?<![ \n\x00])'
|
|
514 |
non_whitespace_after = r'(?![ \n])'
|
|
515 |
# Alphanumerics with isolated internal [-._] chars (i.e. not 2 together):
|
|
516 |
simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*'
|
|
517 |
# Valid URI characters (see RFC 2396 & RFC 2732);
|
|
518 |
# final \x00 allows backslash escapes in URIs:
|
|
519 |
uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
|
|
520 |
# Delimiter indicating the end of a URI (not part of the URI):
|
|
521 |
uri_end_delim = r"""[>]"""
|
|
522 |
# Last URI character; same as uric but no punctuation:
|
|
523 |
urilast = r"""[_~*/=+a-zA-Z0-9]"""
|
|
524 |
# End of a URI (either 'urilast' or 'uric followed by a
|
|
525 |
# uri_end_delim'):
|
|
526 |
uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
|
|
527 |
emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
|
|
528 |
email_pattern = r"""
|
|
529 |
%(emailc)s+(?:\.%(emailc)s+)* # name
|
|
530 |
(?<!\x00)@ # at
|
|
531 |
%(emailc)s+(?:\.%(emailc)s*)* # host
|
|
532 |
%(uri_end)s # final URI char
|
|
533 |
"""
|
|
534 |
parts = ('initial_inline', start_string_prefix, '',
|
|
535 |
[('start', '', non_whitespace_after, # simple start-strings
|
|
536 |
[r'\*\*', # strong
|
|
537 |
r'\*(?!\*)', # emphasis but not strong
|
|
538 |
r'``', # literal
|
|
539 |
r'_`', # inline internal target
|
|
540 |
r'\|(?!\|)'] # substitution reference
|
|
541 |
),
|
|
542 |
('whole', '', end_string_suffix, # whole constructs
|
|
543 |
[# reference name & end-string
|
|
544 |
r'(?P<refname>%s)(?P<refend>__?)' % simplename,
|
|
545 |
('footnotelabel', r'\[', r'(?P<fnend>\]_)',
|
|
546 |
[r'[0-9]+', # manually numbered
|
|
547 |
r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
|
|
548 |
r'\*', # auto-symbol
|
|
549 |
r'(?P<citationlabel>%s)' % simplename] # citation reference
|
|
550 |
)
|
|
551 |
]
|
|
552 |
),
|
|
553 |
('backquote', # interpreted text or phrase reference
|
|
554 |
'(?P<role>(:%s:)?)' % simplename, # optional role
|
|
555 |
non_whitespace_after,
|
|
556 |
['`(?!`)'] # but not literal
|
|
557 |
)
|
|
558 |
]
|
|
559 |
)
|
|
560 |
patterns = Struct(
|
|
561 |
initial=build_regexp(parts),
|
|
562 |
emphasis=re.compile(non_whitespace_escape_before
|
|
563 |
+ r'(\*)' + end_string_suffix),
|
|
564 |
strong=re.compile(non_whitespace_escape_before
|
|
565 |
+ r'(\*\*)' + end_string_suffix),
|
|
566 |
interpreted_or_phrase_ref=re.compile(
|
|
567 |
r"""
|
|
568 |
%(non_whitespace_escape_before)s
|
|
569 |
(
|
|
570 |
`
|
|
571 |
(?P<suffix>
|
|
572 |
(?P<role>:%(simplename)s:)?
|
|
573 |
(?P<refend>__?)?
|
|
574 |
)
|
|
575 |
)
|
|
576 |
%(end_string_suffix)s
|
|
577 |
""" % locals(), re.VERBOSE | re.UNICODE),
|
|
578 |
embedded_uri=re.compile(
|
|
579 |
r"""
|
|
580 |
(
|
|
581 |
(?:[ \n]+|^) # spaces or beginning of line/string
|
|
582 |
< # open bracket
|
|
583 |
%(non_whitespace_after)s
|
|
584 |
([^<>\x00]+) # anything but angle brackets & nulls
|
|
585 |
%(non_whitespace_before)s
|
|
586 |
> # close bracket w/o whitespace before
|
|
587 |
)
|
|
588 |
$ # end of string
|
|
589 |
""" % locals(), re.VERBOSE),
|
|
590 |
literal=re.compile(non_whitespace_before + '(``)'
|
|
591 |
+ end_string_suffix),
|
|
592 |
target=re.compile(non_whitespace_escape_before
|
|
593 |
+ r'(`)' + end_string_suffix),
|
|
594 |
substitution_ref=re.compile(non_whitespace_escape_before
|
|
595 |
+ r'(\|_{0,2})'
|
|
596 |
+ end_string_suffix),
|
|
597 |
email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
|
|
598 |
uri=re.compile(
|
|
599 |
(r"""
|
|
600 |
%(start_string_prefix)s
|
|
601 |
(?P<whole>
|
|
602 |
(?P<absolute> # absolute URI
|
|
603 |
(?P<scheme> # scheme (http, ftp, mailto)
|
|
604 |
[a-zA-Z][a-zA-Z0-9.+-]*
|
|
605 |
)
|
|
606 |
:
|
|
607 |
(
|
|
608 |
( # either:
|
|
609 |
(//?)? # hierarchical URI
|
|
610 |
%(uric)s* # URI characters
|
|
611 |
%(uri_end)s # final URI char
|
|
612 |
)
|
|
613 |
( # optional query
|
|
614 |
\?%(uric)s*
|
|
615 |
%(uri_end)s
|
|
616 |
)?
|
|
617 |
( # optional fragment
|
|
618 |
\#%(uric)s*
|
|
619 |
%(uri_end)s
|
|
620 |
)?
|
|
621 |
)
|
|
622 |
)
|
|
623 |
| # *OR*
|
|
624 |
(?P<email> # email address
|
|
625 |
""" + email_pattern + r"""
|
|
626 |
)
|
|
627 |
)
|
|
628 |
%(end_string_suffix)s
|
|
629 |
""") % locals(), re.VERBOSE),
|
|
630 |
pep=re.compile(
|
|
631 |
r"""
|
|
632 |
%(start_string_prefix)s
|
|
633 |
(
|
|
634 |
(pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
|
|
635 |
|
|
|
636 |
(PEP\s+(?P<pepnum2>\d+)) # reference by name
|
|
637 |
)
|
|
638 |
%(end_string_suffix)s""" % locals(), re.VERBOSE),
|
|
639 |
rfc=re.compile(
|
|
640 |
r"""
|
|
641 |
%(start_string_prefix)s
|
|
642 |
(RFC(-|\s+)?(?P<rfcnum>\d+))
|
|
643 |
%(end_string_suffix)s""" % locals(), re.VERBOSE))
|
|
644 |
|
|
645 |
def quoted_start(self, match):
|
|
646 |
"""Return 1 if inline markup start-string is 'quoted', 0 if not."""
|
|
647 |
string = match.string
|
|
648 |
start = match.start()
|
|
649 |
end = match.end()
|
|
650 |
if start == 0: # start-string at beginning of text
|
|
651 |
return 0
|
|
652 |
prestart = string[start - 1]
|
|
653 |
try:
|
|
654 |
poststart = string[end]
|
|
655 |
if self.openers.index(prestart) \
|
|
656 |
== self.closers.index(poststart): # quoted
|
|
657 |
return 1
|
|
658 |
except IndexError: # start-string at end of text
|
|
659 |
return 1
|
|
660 |
except ValueError: # not quoted
|
|
661 |
pass
|
|
662 |
return 0
|
|
663 |
|
|
664 |
def inline_obj(self, match, lineno, end_pattern, nodeclass,
|
|
665 |
restore_backslashes=0):
|
|
666 |
string = match.string
|
|
667 |
matchstart = match.start('start')
|
|
668 |
matchend = match.end('start')
|
|
669 |
if self.quoted_start(match):
|
|
670 |
return (string[:matchend], [], string[matchend:], [], '')
|
|
671 |
endmatch = end_pattern.search(string[matchend:])
|
|
672 |
if endmatch and endmatch.start(1): # 1 or more chars
|
|
673 |
text = unescape(endmatch.string[:endmatch.start(1)],
|
|
674 |
restore_backslashes)
|
|
675 |
textend = matchend + endmatch.end(1)
|
|
676 |
rawsource = unescape(string[matchstart:textend], 1)
|
|
677 |
return (string[:matchstart], [nodeclass(rawsource, text)],
|
|
678 |
string[textend:], [], endmatch.group(1))
|
|
679 |
msg = self.reporter.warning(
|
|
680 |
'Inline %s start-string without end-string.'
|
|
681 |
% nodeclass.__name__, line=lineno)
|
|
682 |
text = unescape(string[matchstart:matchend], 1)
|
|
683 |
rawsource = unescape(string[matchstart:matchend], 1)
|
|
684 |
prb = self.problematic(text, rawsource, msg)
|
|
685 |
return string[:matchstart], [prb], string[matchend:], [msg], ''
|
|
686 |
|
|
687 |
def problematic(self, text, rawsource, message):
|
|
688 |
msgid = self.document.set_id(message, self.parent)
|
|
689 |
problematic = nodes.problematic(rawsource, text, refid=msgid)
|
|
690 |
prbid = self.document.set_id(problematic)
|
|
691 |
message.add_backref(prbid)
|
|
692 |
return problematic
|
|
693 |
|
|
694 |
def emphasis(self, match, lineno):
|
|
695 |
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
|
|
696 |
match, lineno, self.patterns.emphasis, nodes.emphasis)
|
|
697 |
return before, inlines, remaining, sysmessages
|
|
698 |
|
|
699 |
def strong(self, match, lineno):
|
|
700 |
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
|
|
701 |
match, lineno, self.patterns.strong, nodes.strong)
|
|
702 |
return before, inlines, remaining, sysmessages
|
|
703 |
|
|
704 |
def interpreted_or_phrase_ref(self, match, lineno):
|
|
705 |
end_pattern = self.patterns.interpreted_or_phrase_ref
|
|
706 |
string = match.string
|
|
707 |
matchstart = match.start('backquote')
|
|
708 |
matchend = match.end('backquote')
|
|
709 |
rolestart = match.start('role')
|
|
710 |
role = match.group('role')
|
|
711 |
position = ''
|
|
712 |
if role:
|
|
713 |
role = role[1:-1]
|
|
714 |
position = 'prefix'
|
|
715 |
elif self.quoted_start(match):
|
|
716 |
return (string[:matchend], [], string[matchend:], [])
|
|
717 |
endmatch = end_pattern.search(string[matchend:])
|
|
718 |
if endmatch and endmatch.start(1): # 1 or more chars
|
|
719 |
textend = matchend + endmatch.end()
|
|
720 |
if endmatch.group('role'):
|
|
721 |
if role:
|
|
722 |
msg = self.reporter.warning(
|
|
723 |
'Multiple roles in interpreted text (both '
|
|
724 |
'prefix and suffix present; only one allowed).',
|
|
725 |
line=lineno)
|
|
726 |
text = unescape(string[rolestart:textend], 1)
|
|
727 |
prb = self.problematic(text, text, msg)
|
|
728 |
return string[:rolestart], [prb], string[textend:], [msg]
|
|
729 |
role = endmatch.group('suffix')[1:-1]
|
|
730 |
position = 'suffix'
|
|
731 |
escaped = endmatch.string[:endmatch.start(1)]
|
|
732 |
rawsource = unescape(string[matchstart:textend], 1)
|
|
733 |
if rawsource[-1:] == '_':
|
|
734 |
if role:
|
|
735 |
msg = self.reporter.warning(
|
|
736 |
'Mismatch: both interpreted text role %s and '
|
|
737 |
'reference suffix.' % position, line=lineno)
|
|
738 |
text = unescape(string[rolestart:textend], 1)
|
|
739 |
prb = self.problematic(text, text, msg)
|
|
740 |
return string[:rolestart], [prb], string[textend:], [msg]
|
|
741 |
return self.phrase_ref(string[:matchstart], string[textend:],
|
|
742 |
rawsource, escaped, unescape(escaped))
|
|
743 |
else:
|
|
744 |
rawsource = unescape(string[rolestart:textend], 1)
|
|
745 |
nodelist, messages = self.interpreted(rawsource, escaped, role,
|
|
746 |
lineno)
|
|
747 |
return (string[:rolestart], nodelist,
|
|
748 |
string[textend:], messages)
|
|
749 |
msg = self.reporter.warning(
|
|
750 |
'Inline interpreted text or phrase reference start-string '
|
|
751 |
'without end-string.', line=lineno)
|
|
752 |
text = unescape(string[matchstart:matchend], 1)
|
|
753 |
prb = self.problematic(text, text, msg)
|
|
754 |
return string[:matchstart], [prb], string[matchend:], [msg]
|
|
755 |
|
|
756 |
def phrase_ref(self, before, after, rawsource, escaped, text):
|
|
757 |
match = self.patterns.embedded_uri.search(escaped)
|
|
758 |
if match:
|
|
759 |
text = unescape(escaped[:match.start(0)])
|
|
760 |
uri_text = match.group(2)
|
|
761 |
uri = ''.join(uri_text.split())
|
|
762 |
uri = self.adjust_uri(uri)
|
|
763 |
if uri:
|
|
764 |
target = nodes.target(match.group(1), refuri=uri)
|
|
765 |
else:
|
|
766 |
raise ApplicationError('problem with URI: %r' % uri_text)
|
|
767 |
if not text:
|
|
768 |
text = uri
|
|
769 |
else:
|
|
770 |
target = None
|
|
771 |
refname = normalize_name(text)
|
|
772 |
reference = nodes.reference(rawsource, text,
|
|
773 |
name=whitespace_normalize_name(text))
|
|
774 |
node_list = [reference]
|
|
775 |
if rawsource[-2:] == '__':
|
|
776 |
if target:
|
|
777 |
reference['refuri'] = uri
|
|
778 |
else:
|
|
779 |
reference['anonymous'] = 1
|
|
780 |
else:
|
|
781 |
if target:
|
|
782 |
reference['refuri'] = uri
|
|
783 |
target['names'].append(refname)
|
|
784 |
self.document.note_explicit_target(target, self.parent)
|
|
785 |
node_list.append(target)
|
|
786 |
else:
|
|
787 |
reference['refname'] = refname
|
|
788 |
self.document.note_refname(reference)
|
|
789 |
return before, node_list, after, []
|
|
790 |
|
|
791 |
def adjust_uri(self, uri):
|
|
792 |
match = self.patterns.email.match(uri)
|
|
793 |
if match:
|
|
794 |
return 'mailto:' + uri
|
|
795 |
else:
|
|
796 |
return uri
|
|
797 |
|
|
798 |
def interpreted(self, rawsource, text, role, lineno):
|
|
799 |
role_fn, messages = roles.role(role, self.language, lineno,
|
|
800 |
self.reporter)
|
|
801 |
if role_fn:
|
|
802 |
nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
|
|
803 |
return nodes, messages + messages2
|
|
804 |
else:
|
|
805 |
msg = self.reporter.error(
|
|
806 |
'Unknown interpreted text role "%s".' % role,
|
|
807 |
line=lineno)
|
|
808 |
return ([self.problematic(rawsource, rawsource, msg)],
|
|
809 |
messages + [msg])
|
|
810 |
|
|
811 |
def literal(self, match, lineno):
|
|
812 |
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
|
|
813 |
match, lineno, self.patterns.literal, nodes.literal,
|
|
814 |
restore_backslashes=1)
|
|
815 |
return before, inlines, remaining, sysmessages
|
|
816 |
|
|
817 |
def inline_internal_target(self, match, lineno):
|
|
818 |
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
|
|
819 |
match, lineno, self.patterns.target, nodes.target)
|
|
820 |
if inlines and isinstance(inlines[0], nodes.target):
|
|
821 |
assert len(inlines) == 1
|
|
822 |
target = inlines[0]
|
|
823 |
name = normalize_name(target.astext())
|
|
824 |
target['names'].append(name)
|
|
825 |
self.document.note_explicit_target(target, self.parent)
|
|
826 |
return before, inlines, remaining, sysmessages
|
|
827 |
|
|
828 |
def substitution_reference(self, match, lineno):
|
|
829 |
before, inlines, remaining, sysmessages, endstring = self.inline_obj(
|
|
830 |
match, lineno, self.patterns.substitution_ref,
|
|
831 |
nodes.substitution_reference)
|
|
832 |
if len(inlines) == 1:
|
|
833 |
subref_node = inlines[0]
|
|
834 |
if isinstance(subref_node, nodes.substitution_reference):
|
|
835 |
subref_text = subref_node.astext()
|
|
836 |
self.document.note_substitution_ref(subref_node, subref_text)
|
|
837 |
if endstring[-1:] == '_':
|
|
838 |
reference_node = nodes.reference(
|
|
839 |
'|%s%s' % (subref_text, endstring), '')
|
|
840 |
if endstring[-2:] == '__':
|
|
841 |
reference_node['anonymous'] = 1
|
|
842 |
else:
|
|
843 |
reference_node['refname'] = normalize_name(subref_text)
|
|
844 |
self.document.note_refname(reference_node)
|
|
845 |
reference_node += subref_node
|
|
846 |
inlines = [reference_node]
|
|
847 |
return before, inlines, remaining, sysmessages
|
|
848 |
|
|
849 |
def footnote_reference(self, match, lineno):
|
|
850 |
"""
|
|
851 |
Handles `nodes.footnote_reference` and `nodes.citation_reference`
|
|
852 |
elements.
|
|
853 |
"""
|
|
854 |
label = match.group('footnotelabel')
|
|
855 |
refname = normalize_name(label)
|
|
856 |
string = match.string
|
|
857 |
before = string[:match.start('whole')]
|
|
858 |
remaining = string[match.end('whole'):]
|
|
859 |
if match.group('citationlabel'):
|
|
860 |
refnode = nodes.citation_reference('[%s]_' % label,
|
|
861 |
refname=refname)
|
|
862 |
refnode += nodes.Text(label)
|
|
863 |
self.document.note_citation_ref(refnode)
|
|
864 |
else:
|
|
865 |
refnode = nodes.footnote_reference('[%s]_' % label)
|
|
866 |
if refname[0] == '#':
|
|
867 |
refname = refname[1:]
|
|
868 |
refnode['auto'] = 1
|
|
869 |
self.document.note_autofootnote_ref(refnode)
|
|
870 |
elif refname == '*':
|
|
871 |
refname = ''
|
|
872 |
refnode['auto'] = '*'
|
|
873 |
self.document.note_symbol_footnote_ref(
|
|
874 |
refnode)
|
|
875 |
else:
|
|
876 |
refnode += nodes.Text(label)
|
|
877 |
if refname:
|
|
878 |
refnode['refname'] = refname
|
|
879 |
self.document.note_footnote_ref(refnode)
|
|
880 |
if utils.get_trim_footnote_ref_space(self.document.settings):
|
|
881 |
before = before.rstrip()
|
|
882 |
return (before, [refnode], remaining, [])
|
|
883 |
|
|
884 |
def reference(self, match, lineno, anonymous=None):
|
|
885 |
referencename = match.group('refname')
|
|
886 |
refname = normalize_name(referencename)
|
|
887 |
referencenode = nodes.reference(
|
|
888 |
referencename + match.group('refend'), referencename,
|
|
889 |
name=whitespace_normalize_name(referencename))
|
|
890 |
if anonymous:
|
|
891 |
referencenode['anonymous'] = 1
|
|
892 |
else:
|
|
893 |
referencenode['refname'] = refname
|
|
894 |
self.document.note_refname(referencenode)
|
|
895 |
string = match.string
|
|
896 |
matchstart = match.start('whole')
|
|
897 |
matchend = match.end('whole')
|
|
898 |
return (string[:matchstart], [referencenode], string[matchend:], [])
|
|
899 |
|
|
900 |
def anonymous_reference(self, match, lineno):
|
|
901 |
return self.reference(match, lineno, anonymous=1)
|
|
902 |
|
|
903 |
def standalone_uri(self, match, lineno):
|
|
904 |
if not match.group('scheme') or urischemes.schemes.has_key(
|
|
905 |
match.group('scheme').lower()):
|
|
906 |
if match.group('email'):
|
|
907 |
addscheme = 'mailto:'
|
|
908 |
else:
|
|
909 |
addscheme = ''
|
|
910 |
text = match.group('whole')
|
|
911 |
unescaped = unescape(text, 0)
|
|
912 |
return [nodes.reference(unescape(text, 1), unescaped,
|
|
913 |
refuri=addscheme + unescaped)]
|
|
914 |
else: # not a valid scheme
|
|
915 |
raise MarkupMismatch
|
|
916 |
|
|
917 |
def pep_reference(self, match, lineno):
|
|
918 |
text = match.group(0)
|
|
919 |
if text.startswith('pep-'):
|
|
920 |
pepnum = int(match.group('pepnum1'))
|
|
921 |
elif text.startswith('PEP'):
|
|
922 |
pepnum = int(match.group('pepnum2'))
|
|
923 |
else:
|
|
924 |
raise MarkupMismatch
|
|
925 |
ref = (self.document.settings.pep_base_url
|
|
926 |
+ self.document.settings.pep_file_url_template % pepnum)
|
|
927 |
unescaped = unescape(text, 0)
|
|
928 |
return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
|
|
929 |
|
|
930 |
rfc_url = 'rfc%d.html'
|
|
931 |
|
|
932 |
def rfc_reference(self, match, lineno):
|
|
933 |
text = match.group(0)
|
|
934 |
if text.startswith('RFC'):
|
|
935 |
rfcnum = int(match.group('rfcnum'))
|
|
936 |
ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
|
|
937 |
else:
|
|
938 |
raise MarkupMismatch
|
|
939 |
unescaped = unescape(text, 0)
|
|
940 |
return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
|
|
941 |
|
|
942 |
def implicit_inline(self, text, lineno):
|
|
943 |
"""
|
|
944 |
Check each of the patterns in `self.implicit_dispatch` for a match,
|
|
945 |
and dispatch to the stored method for the pattern. Recursively check
|
|
946 |
the text before and after the match. Return a list of `nodes.Text`
|
|
947 |
and inline element nodes.
|
|
948 |
"""
|
|
949 |
if not text:
|
|
950 |
return []
|
|
951 |
for pattern, method in self.implicit_dispatch:
|
|
952 |
match = pattern.search(text)
|
|
953 |
if match:
|
|
954 |
try:
|
|
955 |
# Must recurse on strings before *and* after the match;
|
|
956 |
# there may be multiple patterns.
|
|
957 |
return (self.implicit_inline(text[:match.start()], lineno)
|
|
958 |
+ method(match, lineno) +
|
|
959 |
self.implicit_inline(text[match.end():], lineno))
|
|
960 |
except MarkupMismatch:
|
|
961 |
pass
|
|
962 |
return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
|
|
963 |
|
|
964 |
dispatch = {'*': emphasis,
|
|
965 |
'**': strong,
|
|
966 |
'`': interpreted_or_phrase_ref,
|
|
967 |
'``': literal,
|
|
968 |
'_`': inline_internal_target,
|
|
969 |
']_': footnote_reference,
|
|
970 |
'|': substitution_reference,
|
|
971 |
'_': reference,
|
|
972 |
'__': anonymous_reference}
|
|
973 |
|
|
974 |
|
|
975 |
def _loweralpha_to_int(s, _zero=(ord('a')-1)):
|
|
976 |
return ord(s) - _zero
|
|
977 |
|
|
978 |
def _upperalpha_to_int(s, _zero=(ord('A')-1)):
|
|
979 |
return ord(s) - _zero
|
|
980 |
|
|
981 |
def _lowerroman_to_int(s):
|
|
982 |
return roman.fromRoman(s.upper())
|
|
983 |
|
|
984 |
|
|
985 |
class Body(RSTState):
|
|
986 |
|
|
987 |
"""
|
|
988 |
Generic classifier of the first line of a block.
|
|
989 |
"""
|
|
990 |
|
|
991 |
double_width_pad_char = tableparser.TableParser.double_width_pad_char
|
|
992 |
"""Padding character for East Asian double-width text."""
|
|
993 |
|
|
994 |
enum = Struct()
|
|
995 |
"""Enumerated list parsing information."""
|
|
996 |
|
|
997 |
enum.formatinfo = {
|
|
998 |
'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
|
|
999 |
'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
|
|
1000 |
'period': Struct(prefix='', suffix='.', start=0, end=-1)}
|
|
1001 |
enum.formats = enum.formatinfo.keys()
|
|
1002 |
enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
|
|
1003 |
'lowerroman', 'upperroman'] # ORDERED!
|
|
1004 |
enum.sequencepats = {'arabic': '[0-9]+',
|
|
1005 |
'loweralpha': '[a-z]',
|
|
1006 |
'upperalpha': '[A-Z]',
|
|
1007 |
'lowerroman': '[ivxlcdm]+',
|
|
1008 |
'upperroman': '[IVXLCDM]+',}
|
|
1009 |
enum.converters = {'arabic': int,
|
|
1010 |
'loweralpha': _loweralpha_to_int,
|
|
1011 |
'upperalpha': _upperalpha_to_int,
|
|
1012 |
'lowerroman': _lowerroman_to_int,
|
|
1013 |
'upperroman': roman.fromRoman}
|
|
1014 |
|
|
1015 |
enum.sequenceregexps = {}
|
|
1016 |
for sequence in enum.sequences:
|
|
1017 |
enum.sequenceregexps[sequence] = re.compile(
|
|
1018 |
enum.sequencepats[sequence] + '$')
|
|
1019 |
|
|
1020 |
grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
|
|
1021 |
"""Matches the top (& bottom) of a full table)."""
|
|
1022 |
|
|
1023 |
simple_table_top_pat = re.compile('=+( +=+)+ *$')
|
|
1024 |
"""Matches the top of a simple table."""
|
|
1025 |
|
|
1026 |
simple_table_border_pat = re.compile('=+[ =]*$')
|
|
1027 |
"""Matches the bottom & header bottom of a simple table."""
|
|
1028 |
|
|
1029 |
pats = {}
|
|
1030 |
"""Fragments of patterns used by transitions."""
|
|
1031 |
|
|
1032 |
pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
|
|
1033 |
pats['alpha'] = '[a-zA-Z]'
|
|
1034 |
pats['alphanum'] = '[a-zA-Z0-9]'
|
|
1035 |
pats['alphanumplus'] = '[a-zA-Z0-9_-]'
|
|
1036 |
pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
|
|
1037 |
'|%(upperroman)s|#)' % enum.sequencepats)
|
|
1038 |
pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
|
|
1039 |
# @@@ Loosen up the pattern? Allow Unicode?
|
|
1040 |
pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
|
|
1041 |
pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
|
|
1042 |
pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
|
|
1043 |
pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
|
|
1044 |
|
|
1045 |
for format in enum.formats:
|
|
1046 |
pats[format] = '(?P<%s>%s%s%s)' % (
|
|
1047 |
format, re.escape(enum.formatinfo[format].prefix),
|
|
1048 |
pats['enum'], re.escape(enum.formatinfo[format].suffix))
|
|
1049 |
|
|
1050 |
patterns = {
|
|
1051 |
'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
|
|
1052 |
'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
|
|
1053 |
'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
|
|
1054 |
'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
|
|
1055 |
'doctest': r'>>>( +|$)',
|
|
1056 |
'line_block': r'\|( +|$)',
|
|
1057 |
'grid_table_top': grid_table_top_pat,
|
|
1058 |
'simple_table_top': simple_table_top_pat,
|
|
1059 |
'explicit_markup': r'\.\.( +|$)',
|
|
1060 |
'anonymous': r'__( +|$)',
|
|
1061 |
'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
|
|
1062 |
'text': r''}
|
|
1063 |
initial_transitions = (
|
|
1064 |
'bullet',
|
|
1065 |
'enumerator',
|
|
1066 |
'field_marker',
|
|
1067 |
'option_marker',
|
|
1068 |
'doctest',
|
|
1069 |
'line_block',
|
|
1070 |
'grid_table_top',
|
|
1071 |
'simple_table_top',
|
|
1072 |
'explicit_markup',
|
|
1073 |
'anonymous',
|
|
1074 |
'line',
|
|
1075 |
'text')
|
|
1076 |
|
|
1077 |
def indent(self, match, context, next_state):
|
|
1078 |
"""Block quote."""
|
|
1079 |
indented, indent, line_offset, blank_finish = \
|
|
1080 |
self.state_machine.get_indented()
|
|
1081 |
elements = self.block_quote(indented, line_offset)
|
|
1082 |
self.parent += elements
|
|
1083 |
if not blank_finish:
|
|
1084 |
self.parent += self.unindent_warning('Block quote')
|
|
1085 |
return context, next_state, []
|
|
1086 |
|
|
1087 |
def block_quote(self, indented, line_offset):
|
|
1088 |
elements = []
|
|
1089 |
while indented:
|
|
1090 |
(blockquote_lines,
|
|
1091 |
attribution_lines,
|
|
1092 |
attribution_offset,
|
|
1093 |
indented,
|
|
1094 |
new_line_offset) = self.split_attribution(indented, line_offset)
|
|
1095 |
blockquote = nodes.block_quote()
|
|
1096 |
self.nested_parse(blockquote_lines, line_offset, blockquote)
|
|
1097 |
elements.append(blockquote)
|
|
1098 |
if attribution_lines:
|
|
1099 |
attribution, messages = self.parse_attribution(
|
|
1100 |
attribution_lines, attribution_offset)
|
|
1101 |
blockquote += attribution
|
|
1102 |
elements += messages
|
|
1103 |
line_offset = new_line_offset
|
|
1104 |
while indented and not indented[0]:
|
|
1105 |
indented = indented[1:]
|
|
1106 |
line_offset += 1
|
|
1107 |
return elements
|
|
1108 |
|
|
1109 |
# U+2014 is an em-dash:
|
|
1110 |
attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
|
|
1111 |
|
|
1112 |
def split_attribution(self, indented, line_offset):
|
|
1113 |
"""
|
|
1114 |
Check for a block quote attribution and split it off:
|
|
1115 |
|
|
1116 |
* First line after a blank line must begin with a dash ("--", "---",
|
|
1117 |
em-dash; matches `self.attribution_pattern`).
|
|
1118 |
* Every line after that must have consistent indentation.
|
|
1119 |
* Attributions must be preceded by block quote content.
|
|
1120 |
|
|
1121 |
Return a tuple of: (block quote content lines, content offset,
|
|
1122 |
attribution lines, attribution offset, remaining indented lines).
|
|
1123 |
"""
|
|
1124 |
blank = None
|
|
1125 |
nonblank_seen = False
|
|
1126 |
for i in range(len(indented)):
|
|
1127 |
line = indented[i].rstrip()
|
|
1128 |
if line:
|
|
1129 |
if nonblank_seen and blank == i - 1: # last line blank
|
|
1130 |
match = self.attribution_pattern.match(line)
|
|
1131 |
if match:
|
|
1132 |
attribution_end, indent = self.check_attribution(
|
|
1133 |
indented, i)
|
|
1134 |
if attribution_end:
|
|
1135 |
a_lines = indented[i:attribution_end]
|
|
1136 |
a_lines.trim_left(match.end(), end=1)
|
|
1137 |
a_lines.trim_left(indent, start=1)
|
|
1138 |
return (indented[:i], a_lines,
|
|
1139 |
i, indented[attribution_end:],
|
|
1140 |
line_offset + attribution_end)
|
|
1141 |
nonblank_seen = True
|
|
1142 |
else:
|
|
1143 |
blank = i
|
|
1144 |
else:
|
|
1145 |
return (indented, None, None, None, None)
|
|
1146 |
|
|
1147 |
def check_attribution(self, indented, attribution_start):
|
|
1148 |
"""
|
|
1149 |
Check attribution shape.
|
|
1150 |
Return the index past the end of the attribution, and the indent.
|
|
1151 |
"""
|
|
1152 |
indent = None
|
|
1153 |
i = attribution_start + 1
|
|
1154 |
for i in range(attribution_start + 1, len(indented)):
|
|
1155 |
line = indented[i].rstrip()
|
|
1156 |
if not line:
|
|
1157 |
break
|
|
1158 |
if indent is None:
|
|
1159 |
indent = len(line) - len(line.lstrip())
|
|
1160 |
elif len(line) - len(line.lstrip()) != indent:
|
|
1161 |
return None, None # bad shape; not an attribution
|
|
1162 |
else:
|
|
1163 |
# return index of line after last attribution line:
|
|
1164 |
i += 1
|
|
1165 |
return i, (indent or 0)
|
|
1166 |
|
|
1167 |
def parse_attribution(self, indented, line_offset):
|
|
1168 |
text = '\n'.join(indented).rstrip()
|
|
1169 |
lineno = self.state_machine.abs_line_number() + line_offset
|
|
1170 |
textnodes, messages = self.inline_text(text, lineno)
|
|
1171 |
node = nodes.attribution(text, '', *textnodes)
|
|
1172 |
node.line = lineno
|
|
1173 |
return node, messages
|
|
1174 |
|
|
1175 |
def bullet(self, match, context, next_state):
|
|
1176 |
"""Bullet list item."""
|
|
1177 |
bulletlist = nodes.bullet_list()
|
|
1178 |
self.parent += bulletlist
|
|
1179 |
bulletlist['bullet'] = match.string[0]
|
|
1180 |
i, blank_finish = self.list_item(match.end())
|
|
1181 |
bulletlist += i
|
|
1182 |
offset = self.state_machine.line_offset + 1 # next line
|
|
1183 |
new_line_offset, blank_finish = self.nested_list_parse(
|
|
1184 |
self.state_machine.input_lines[offset:],
|
|
1185 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
1186 |
node=bulletlist, initial_state='BulletList',
|
|
1187 |
blank_finish=blank_finish)
|
|
1188 |
self.goto_line(new_line_offset)
|
|
1189 |
if not blank_finish:
|
|
1190 |
self.parent += self.unindent_warning('Bullet list')
|
|
1191 |
return [], next_state, []
|
|
1192 |
|
|
1193 |
def list_item(self, indent):
|
|
1194 |
if self.state_machine.line[indent:]:
|
|
1195 |
indented, line_offset, blank_finish = (
|
|
1196 |
self.state_machine.get_known_indented(indent))
|
|
1197 |
else:
|
|
1198 |
indented, indent, line_offset, blank_finish = (
|
|
1199 |
self.state_machine.get_first_known_indented(indent))
|
|
1200 |
listitem = nodes.list_item('\n'.join(indented))
|
|
1201 |
if indented:
|
|
1202 |
self.nested_parse(indented, input_offset=line_offset,
|
|
1203 |
node=listitem)
|
|
1204 |
return listitem, blank_finish
|
|
1205 |
|
|
1206 |
def enumerator(self, match, context, next_state):
|
|
1207 |
"""Enumerated List Item"""
|
|
1208 |
format, sequence, text, ordinal = self.parse_enumerator(match)
|
|
1209 |
if not self.is_enumerated_list_item(ordinal, sequence, format):
|
|
1210 |
raise statemachine.TransitionCorrection('text')
|
|
1211 |
enumlist = nodes.enumerated_list()
|
|
1212 |
self.parent += enumlist
|
|
1213 |
if sequence == '#':
|
|
1214 |
enumlist['enumtype'] = 'arabic'
|
|
1215 |
else:
|
|
1216 |
enumlist['enumtype'] = sequence
|
|
1217 |
enumlist['prefix'] = self.enum.formatinfo[format].prefix
|
|
1218 |
enumlist['suffix'] = self.enum.formatinfo[format].suffix
|
|
1219 |
if ordinal != 1:
|
|
1220 |
enumlist['start'] = ordinal
|
|
1221 |
msg = self.reporter.info(
|
|
1222 |
'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
|
|
1223 |
% (text, ordinal), line=self.state_machine.abs_line_number())
|
|
1224 |
self.parent += msg
|
|
1225 |
listitem, blank_finish = self.list_item(match.end())
|
|
1226 |
enumlist += listitem
|
|
1227 |
offset = self.state_machine.line_offset + 1 # next line
|
|
1228 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
1229 |
self.state_machine.input_lines[offset:],
|
|
1230 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
1231 |
node=enumlist, initial_state='EnumeratedList',
|
|
1232 |
blank_finish=blank_finish,
|
|
1233 |
extra_settings={'lastordinal': ordinal,
|
|
1234 |
'format': format,
|
|
1235 |
'auto': sequence == '#'})
|
|
1236 |
self.goto_line(newline_offset)
|
|
1237 |
if not blank_finish:
|
|
1238 |
self.parent += self.unindent_warning('Enumerated list')
|
|
1239 |
return [], next_state, []
|
|
1240 |
|
|
1241 |
def parse_enumerator(self, match, expected_sequence=None):
|
|
1242 |
"""
|
|
1243 |
Analyze an enumerator and return the results.
|
|
1244 |
|
|
1245 |
:Return:
|
|
1246 |
- the enumerator format ('period', 'parens', or 'rparen'),
|
|
1247 |
- the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
|
|
1248 |
- the text of the enumerator, stripped of formatting, and
|
|
1249 |
- the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
|
|
1250 |
``None`` is returned for invalid enumerator text).
|
|
1251 |
|
|
1252 |
The enumerator format has already been determined by the regular
|
|
1253 |
expression match. If `expected_sequence` is given, that sequence is
|
|
1254 |
tried first. If not, we check for Roman numeral 1. This way,
|
|
1255 |
single-character Roman numerals (which are also alphabetical) can be
|
|
1256 |
matched. If no sequence has been matched, all sequences are checked in
|
|
1257 |
order.
|
|
1258 |
"""
|
|
1259 |
groupdict = match.groupdict()
|
|
1260 |
sequence = ''
|
|
1261 |
for format in self.enum.formats:
|
|
1262 |
if groupdict[format]: # was this the format matched?
|
|
1263 |
break # yes; keep `format`
|
|
1264 |
else: # shouldn't happen
|
|
1265 |
raise ParserError('enumerator format not matched')
|
|
1266 |
text = groupdict[format][self.enum.formatinfo[format].start
|
|
1267 |
:self.enum.formatinfo[format].end]
|
|
1268 |
if text == '#':
|
|
1269 |
sequence = '#'
|
|
1270 |
elif expected_sequence:
|
|
1271 |
try:
|
|
1272 |
if self.enum.sequenceregexps[expected_sequence].match(text):
|
|
1273 |
sequence = expected_sequence
|
|
1274 |
except KeyError: # shouldn't happen
|
|
1275 |
raise ParserError('unknown enumerator sequence: %s'
|
|
1276 |
% sequence)
|
|
1277 |
elif text == 'i':
|
|
1278 |
sequence = 'lowerroman'
|
|
1279 |
elif text == 'I':
|
|
1280 |
sequence = 'upperroman'
|
|
1281 |
if not sequence:
|
|
1282 |
for sequence in self.enum.sequences:
|
|
1283 |
if self.enum.sequenceregexps[sequence].match(text):
|
|
1284 |
break
|
|
1285 |
else: # shouldn't happen
|
|
1286 |
raise ParserError('enumerator sequence not matched')
|
|
1287 |
if sequence == '#':
|
|
1288 |
ordinal = 1
|
|
1289 |
else:
|
|
1290 |
try:
|
|
1291 |
ordinal = self.enum.converters[sequence](text)
|
|
1292 |
except roman.InvalidRomanNumeralError:
|
|
1293 |
ordinal = None
|
|
1294 |
return format, sequence, text, ordinal
|
|
1295 |
|
|
1296 |
def is_enumerated_list_item(self, ordinal, sequence, format):
|
|
1297 |
"""
|
|
1298 |
Check validity based on the ordinal value and the second line.
|
|
1299 |
|
|
1300 |
Return true iff the ordinal is valid and the second line is blank,
|
|
1301 |
indented, or starts with the next enumerator or an auto-enumerator.
|
|
1302 |
"""
|
|
1303 |
if ordinal is None:
|
|
1304 |
return None
|
|
1305 |
try:
|
|
1306 |
next_line = self.state_machine.next_line()
|
|
1307 |
except EOFError: # end of input lines
|
|
1308 |
self.state_machine.previous_line()
|
|
1309 |
return 1
|
|
1310 |
else:
|
|
1311 |
self.state_machine.previous_line()
|
|
1312 |
if not next_line[:1].strip(): # blank or indented
|
|
1313 |
return 1
|
|
1314 |
result = self.make_enumerator(ordinal + 1, sequence, format)
|
|
1315 |
if result:
|
|
1316 |
next_enumerator, auto_enumerator = result
|
|
1317 |
try:
|
|
1318 |
if ( next_line.startswith(next_enumerator) or
|
|
1319 |
next_line.startswith(auto_enumerator) ):
|
|
1320 |
return 1
|
|
1321 |
except TypeError:
|
|
1322 |
pass
|
|
1323 |
return None
|
|
1324 |
|
|
1325 |
def make_enumerator(self, ordinal, sequence, format):
|
|
1326 |
"""
|
|
1327 |
Construct and return the next enumerated list item marker, and an
|
|
1328 |
auto-enumerator ("#" instead of the regular enumerator).
|
|
1329 |
|
|
1330 |
Return ``None`` for invalid (out of range) ordinals.
|
|
1331 |
""" #"
|
|
1332 |
if sequence == '#':
|
|
1333 |
enumerator = '#'
|
|
1334 |
elif sequence == 'arabic':
|
|
1335 |
enumerator = str(ordinal)
|
|
1336 |
else:
|
|
1337 |
if sequence.endswith('alpha'):
|
|
1338 |
if ordinal > 26:
|
|
1339 |
return None
|
|
1340 |
enumerator = chr(ordinal + ord('a') - 1)
|
|
1341 |
elif sequence.endswith('roman'):
|
|
1342 |
try:
|
|
1343 |
enumerator = roman.toRoman(ordinal)
|
|
1344 |
except roman.RomanError:
|
|
1345 |
return None
|
|
1346 |
else: # shouldn't happen
|
|
1347 |
raise ParserError('unknown enumerator sequence: "%s"'
|
|
1348 |
% sequence)
|
|
1349 |
if sequence.startswith('lower'):
|
|
1350 |
enumerator = enumerator.lower()
|
|
1351 |
elif sequence.startswith('upper'):
|
|
1352 |
enumerator = enumerator.upper()
|
|
1353 |
else: # shouldn't happen
|
|
1354 |
raise ParserError('unknown enumerator sequence: "%s"'
|
|
1355 |
% sequence)
|
|
1356 |
formatinfo = self.enum.formatinfo[format]
|
|
1357 |
next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
|
|
1358 |
+ ' ')
|
|
1359 |
auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
|
|
1360 |
return next_enumerator, auto_enumerator
|
|
1361 |
|
|
1362 |
def field_marker(self, match, context, next_state):
|
|
1363 |
"""Field list item."""
|
|
1364 |
field_list = nodes.field_list()
|
|
1365 |
self.parent += field_list
|
|
1366 |
field, blank_finish = self.field(match)
|
|
1367 |
field_list += field
|
|
1368 |
offset = self.state_machine.line_offset + 1 # next line
|
|
1369 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
1370 |
self.state_machine.input_lines[offset:],
|
|
1371 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
1372 |
node=field_list, initial_state='FieldList',
|
|
1373 |
blank_finish=blank_finish)
|
|
1374 |
self.goto_line(newline_offset)
|
|
1375 |
if not blank_finish:
|
|
1376 |
self.parent += self.unindent_warning('Field list')
|
|
1377 |
return [], next_state, []
|
|
1378 |
|
|
1379 |
def field(self, match):
|
|
1380 |
name = self.parse_field_marker(match)
|
|
1381 |
lineno = self.state_machine.abs_line_number()
|
|
1382 |
indented, indent, line_offset, blank_finish = \
|
|
1383 |
self.state_machine.get_first_known_indented(match.end())
|
|
1384 |
field_node = nodes.field()
|
|
1385 |
field_node.line = lineno
|
|
1386 |
name_nodes, name_messages = self.inline_text(name, lineno)
|
|
1387 |
field_node += nodes.field_name(name, '', *name_nodes)
|
|
1388 |
field_body = nodes.field_body('\n'.join(indented), *name_messages)
|
|
1389 |
field_node += field_body
|
|
1390 |
if indented:
|
|
1391 |
self.parse_field_body(indented, line_offset, field_body)
|
|
1392 |
return field_node, blank_finish
|
|
1393 |
|
|
1394 |
def parse_field_marker(self, match):
|
|
1395 |
"""Extract & return field name from a field marker match."""
|
|
1396 |
field = match.group()[1:] # strip off leading ':'
|
|
1397 |
field = field[:field.rfind(':')] # strip off trailing ':' etc.
|
|
1398 |
return field
|
|
1399 |
|
|
1400 |
def parse_field_body(self, indented, offset, node):
|
|
1401 |
self.nested_parse(indented, input_offset=offset, node=node)
|
|
1402 |
|
|
1403 |
def option_marker(self, match, context, next_state):
|
|
1404 |
"""Option list item."""
|
|
1405 |
optionlist = nodes.option_list()
|
|
1406 |
try:
|
|
1407 |
listitem, blank_finish = self.option_list_item(match)
|
|
1408 |
except MarkupError, (message, lineno):
|
|
1409 |
# This shouldn't happen; pattern won't match.
|
|
1410 |
msg = self.reporter.error(
|
|
1411 |
'Invalid option list marker: %s' % message, line=lineno)
|
|
1412 |
self.parent += msg
|
|
1413 |
indented, indent, line_offset, blank_finish = \
|
|
1414 |
self.state_machine.get_first_known_indented(match.end())
|
|
1415 |
elements = self.block_quote(indented, line_offset)
|
|
1416 |
self.parent += elements
|
|
1417 |
if not blank_finish:
|
|
1418 |
self.parent += self.unindent_warning('Option list')
|
|
1419 |
return [], next_state, []
|
|
1420 |
self.parent += optionlist
|
|
1421 |
optionlist += listitem
|
|
1422 |
offset = self.state_machine.line_offset + 1 # next line
|
|
1423 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
1424 |
self.state_machine.input_lines[offset:],
|
|
1425 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
1426 |
node=optionlist, initial_state='OptionList',
|
|
1427 |
blank_finish=blank_finish)
|
|
1428 |
self.goto_line(newline_offset)
|
|
1429 |
if not blank_finish:
|
|
1430 |
self.parent += self.unindent_warning('Option list')
|
|
1431 |
return [], next_state, []
|
|
1432 |
|
|
1433 |
def option_list_item(self, match):
|
|
1434 |
offset = self.state_machine.abs_line_offset()
|
|
1435 |
options = self.parse_option_marker(match)
|
|
1436 |
indented, indent, line_offset, blank_finish = \
|
|
1437 |
self.state_machine.get_first_known_indented(match.end())
|
|
1438 |
if not indented: # not an option list item
|
|
1439 |
self.goto_line(offset)
|
|
1440 |
raise statemachine.TransitionCorrection('text')
|
|
1441 |
option_group = nodes.option_group('', *options)
|
|
1442 |
description = nodes.description('\n'.join(indented))
|
|
1443 |
option_list_item = nodes.option_list_item('', option_group,
|
|
1444 |
description)
|
|
1445 |
if indented:
|
|
1446 |
self.nested_parse(indented, input_offset=line_offset,
|
|
1447 |
node=description)
|
|
1448 |
return option_list_item, blank_finish
|
|
1449 |
|
|
1450 |
def parse_option_marker(self, match):
|
|
1451 |
"""
|
|
1452 |
Return a list of `node.option` and `node.option_argument` objects,
|
|
1453 |
parsed from an option marker match.
|
|
1454 |
|
|
1455 |
:Exception: `MarkupError` for invalid option markers.
|
|
1456 |
"""
|
|
1457 |
optlist = []
|
|
1458 |
optionstrings = match.group().rstrip().split(', ')
|
|
1459 |
for optionstring in optionstrings:
|
|
1460 |
tokens = optionstring.split()
|
|
1461 |
delimiter = ' '
|
|
1462 |
firstopt = tokens[0].split('=')
|
|
1463 |
if len(firstopt) > 1:
|
|
1464 |
# "--opt=value" form
|
|
1465 |
tokens[:1] = firstopt
|
|
1466 |
delimiter = '='
|
|
1467 |
elif (len(tokens[0]) > 2
|
|
1468 |
and ((tokens[0].startswith('-')
|
|
1469 |
and not tokens[0].startswith('--'))
|
|
1470 |
or tokens[0].startswith('+'))):
|
|
1471 |
# "-ovalue" form
|
|
1472 |
tokens[:1] = [tokens[0][:2], tokens[0][2:]]
|
|
1473 |
delimiter = ''
|
|
1474 |
if len(tokens) > 1 and (tokens[1].startswith('<')
|
|
1475 |
and tokens[-1].endswith('>')):
|
|
1476 |
# "-o <value1 value2>" form; join all values into one token
|
|
1477 |
tokens[1:] = [' '.join(tokens[1:])]
|
|
1478 |
if 0 < len(tokens) <= 2:
|
|
1479 |
option = nodes.option(optionstring)
|
|
1480 |
option += nodes.option_string(tokens[0], tokens[0])
|
|
1481 |
if len(tokens) > 1:
|
|
1482 |
option += nodes.option_argument(tokens[1], tokens[1],
|
|
1483 |
delimiter=delimiter)
|
|
1484 |
optlist.append(option)
|
|
1485 |
else:
|
|
1486 |
raise MarkupError(
|
|
1487 |
'wrong number of option tokens (=%s), should be 1 or 2: '
|
|
1488 |
'"%s"' % (len(tokens), optionstring),
|
|
1489 |
self.state_machine.abs_line_number() + 1)
|
|
1490 |
return optlist
|
|
1491 |
|
|
1492 |
def doctest(self, match, context, next_state):
|
|
1493 |
data = '\n'.join(self.state_machine.get_text_block())
|
|
1494 |
self.parent += nodes.doctest_block(data, data)
|
|
1495 |
return [], next_state, []
|
|
1496 |
|
|
1497 |
def line_block(self, match, context, next_state):
|
|
1498 |
"""First line of a line block."""
|
|
1499 |
block = nodes.line_block()
|
|
1500 |
self.parent += block
|
|
1501 |
lineno = self.state_machine.abs_line_number()
|
|
1502 |
line, messages, blank_finish = self.line_block_line(match, lineno)
|
|
1503 |
block += line
|
|
1504 |
self.parent += messages
|
|
1505 |
if not blank_finish:
|
|
1506 |
offset = self.state_machine.line_offset + 1 # next line
|
|
1507 |
new_line_offset, blank_finish = self.nested_list_parse(
|
|
1508 |
self.state_machine.input_lines[offset:],
|
|
1509 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
1510 |
node=block, initial_state='LineBlock',
|
|
1511 |
blank_finish=0)
|
|
1512 |
self.goto_line(new_line_offset)
|
|
1513 |
if not blank_finish:
|
|
1514 |
self.parent += self.reporter.warning(
|
|
1515 |
'Line block ends without a blank line.',
|
|
1516 |
line=(self.state_machine.abs_line_number() + 1))
|
|
1517 |
if len(block):
|
|
1518 |
if block[0].indent is None:
|
|
1519 |
block[0].indent = 0
|
|
1520 |
self.nest_line_block_lines(block)
|
|
1521 |
return [], next_state, []
|
|
1522 |
|
|
1523 |
def line_block_line(self, match, lineno):
|
|
1524 |
"""Return one line element of a line_block."""
|
|
1525 |
indented, indent, line_offset, blank_finish = \
|
|
1526 |
self.state_machine.get_first_known_indented(match.end(),
|
|
1527 |
until_blank=1)
|
|
1528 |
text = u'\n'.join(indented)
|
|
1529 |
text_nodes, messages = self.inline_text(text, lineno)
|
|
1530 |
line = nodes.line(text, '', *text_nodes)
|
|
1531 |
if match.string.rstrip() != '|': # not empty
|
|
1532 |
line.indent = len(match.group(1)) - 1
|
|
1533 |
return line, messages, blank_finish
|
|
1534 |
|
|
1535 |
def nest_line_block_lines(self, block):
|
|
1536 |
for index in range(1, len(block)):
|
|
1537 |
if block[index].indent is None:
|
|
1538 |
block[index].indent = block[index - 1].indent
|
|
1539 |
self.nest_line_block_segment(block)
|
|
1540 |
|
|
1541 |
def nest_line_block_segment(self, block):
|
|
1542 |
indents = [item.indent for item in block]
|
|
1543 |
least = min(indents)
|
|
1544 |
new_items = []
|
|
1545 |
new_block = nodes.line_block()
|
|
1546 |
for item in block:
|
|
1547 |
if item.indent > least:
|
|
1548 |
new_block.append(item)
|
|
1549 |
else:
|
|
1550 |
if len(new_block):
|
|
1551 |
self.nest_line_block_segment(new_block)
|
|
1552 |
new_items.append(new_block)
|
|
1553 |
new_block = nodes.line_block()
|
|
1554 |
new_items.append(item)
|
|
1555 |
if len(new_block):
|
|
1556 |
self.nest_line_block_segment(new_block)
|
|
1557 |
new_items.append(new_block)
|
|
1558 |
block[:] = new_items
|
|
1559 |
|
|
1560 |
def grid_table_top(self, match, context, next_state):
|
|
1561 |
"""Top border of a full table."""
|
|
1562 |
return self.table_top(match, context, next_state,
|
|
1563 |
self.isolate_grid_table,
|
|
1564 |
tableparser.GridTableParser)
|
|
1565 |
|
|
1566 |
def simple_table_top(self, match, context, next_state):
|
|
1567 |
"""Top border of a simple table."""
|
|
1568 |
return self.table_top(match, context, next_state,
|
|
1569 |
self.isolate_simple_table,
|
|
1570 |
tableparser.SimpleTableParser)
|
|
1571 |
|
|
1572 |
def table_top(self, match, context, next_state,
|
|
1573 |
isolate_function, parser_class):
|
|
1574 |
"""Top border of a generic table."""
|
|
1575 |
nodelist, blank_finish = self.table(isolate_function, parser_class)
|
|
1576 |
self.parent += nodelist
|
|
1577 |
if not blank_finish:
|
|
1578 |
msg = self.reporter.warning(
|
|
1579 |
'Blank line required after table.',
|
|
1580 |
line=self.state_machine.abs_line_number() + 1)
|
|
1581 |
self.parent += msg
|
|
1582 |
return [], next_state, []
|
|
1583 |
|
|
1584 |
def table(self, isolate_function, parser_class):
|
|
1585 |
"""Parse a table."""
|
|
1586 |
block, messages, blank_finish = isolate_function()
|
|
1587 |
if block:
|
|
1588 |
try:
|
|
1589 |
parser = parser_class()
|
|
1590 |
tabledata = parser.parse(block)
|
|
1591 |
tableline = (self.state_machine.abs_line_number() - len(block)
|
|
1592 |
+ 1)
|
|
1593 |
table = self.build_table(tabledata, tableline)
|
|
1594 |
nodelist = [table] + messages
|
|
1595 |
except tableparser.TableMarkupError, detail:
|
|
1596 |
nodelist = self.malformed_table(
|
|
1597 |
block, ' '.join(detail.args)) + messages
|
|
1598 |
else:
|
|
1599 |
nodelist = messages
|
|
1600 |
return nodelist, blank_finish
|
|
1601 |
|
|
1602 |
def isolate_grid_table(self):
|
|
1603 |
messages = []
|
|
1604 |
blank_finish = 1
|
|
1605 |
try:
|
|
1606 |
block = self.state_machine.get_text_block(flush_left=1)
|
|
1607 |
except statemachine.UnexpectedIndentationError, instance:
|
|
1608 |
block, source, lineno = instance.args
|
|
1609 |
messages.append(self.reporter.error('Unexpected indentation.',
|
|
1610 |
source=source, line=lineno))
|
|
1611 |
blank_finish = 0
|
|
1612 |
block.disconnect()
|
|
1613 |
# for East Asian chars:
|
|
1614 |
block.pad_double_width(self.double_width_pad_char)
|
|
1615 |
width = len(block[0].strip())
|
|
1616 |
for i in range(len(block)):
|
|
1617 |
block[i] = block[i].strip()
|
|
1618 |
if block[i][0] not in '+|': # check left edge
|
|
1619 |
blank_finish = 0
|
|
1620 |
self.state_machine.previous_line(len(block) - i)
|
|
1621 |
del block[i:]
|
|
1622 |
break
|
|
1623 |
if not self.grid_table_top_pat.match(block[-1]): # find bottom
|
|
1624 |
blank_finish = 0
|
|
1625 |
# from second-last to third line of table:
|
|
1626 |
for i in range(len(block) - 2, 1, -1):
|
|
1627 |
if self.grid_table_top_pat.match(block[i]):
|
|
1628 |
self.state_machine.previous_line(len(block) - i + 1)
|
|
1629 |
del block[i+1:]
|
|
1630 |
break
|
|
1631 |
else:
|
|
1632 |
messages.extend(self.malformed_table(block))
|
|
1633 |
return [], messages, blank_finish
|
|
1634 |
for i in range(len(block)): # check right edge
|
|
1635 |
if len(block[i]) != width or block[i][-1] not in '+|':
|
|
1636 |
messages.extend(self.malformed_table(block))
|
|
1637 |
return [], messages, blank_finish
|
|
1638 |
return block, messages, blank_finish
|
|
1639 |
|
|
1640 |
def isolate_simple_table(self):
|
|
1641 |
start = self.state_machine.line_offset
|
|
1642 |
lines = self.state_machine.input_lines
|
|
1643 |
limit = len(lines) - 1
|
|
1644 |
toplen = len(lines[start].strip())
|
|
1645 |
pattern_match = self.simple_table_border_pat.match
|
|
1646 |
found = 0
|
|
1647 |
found_at = None
|
|
1648 |
i = start + 1
|
|
1649 |
while i <= limit:
|
|
1650 |
line = lines[i]
|
|
1651 |
match = pattern_match(line)
|
|
1652 |
if match:
|
|
1653 |
if len(line.strip()) != toplen:
|
|
1654 |
self.state_machine.next_line(i - start)
|
|
1655 |
messages = self.malformed_table(
|
|
1656 |
lines[start:i+1], 'Bottom/header table border does '
|
|
1657 |
'not match top border.')
|
|
1658 |
return [], messages, i == limit or not lines[i+1].strip()
|
|
1659 |
found += 1
|
|
1660 |
found_at = i
|
|
1661 |
if found == 2 or i == limit or not lines[i+1].strip():
|
|
1662 |
end = i
|
|
1663 |
break
|
|
1664 |
i += 1
|
|
1665 |
else: # reached end of input_lines
|
|
1666 |
if found:
|
|
1667 |
extra = ' or no blank line after table bottom'
|
|
1668 |
self.state_machine.next_line(found_at - start)
|
|
1669 |
block = lines[start:found_at+1]
|
|
1670 |
else:
|
|
1671 |
extra = ''
|
|
1672 |
self.state_machine.next_line(i - start - 1)
|
|
1673 |
block = lines[start:]
|
|
1674 |
messages = self.malformed_table(
|
|
1675 |
block, 'No bottom table border found%s.' % extra)
|
|
1676 |
return [], messages, not extra
|
|
1677 |
self.state_machine.next_line(end - start)
|
|
1678 |
block = lines[start:end+1]
|
|
1679 |
# for East Asian chars:
|
|
1680 |
block.pad_double_width(self.double_width_pad_char)
|
|
1681 |
return block, [], end == limit or not lines[end+1].strip()
|
|
1682 |
|
|
1683 |
def malformed_table(self, block, detail=''):
|
|
1684 |
block.replace(self.double_width_pad_char, '')
|
|
1685 |
data = '\n'.join(block)
|
|
1686 |
message = 'Malformed table.'
|
|
1687 |
lineno = self.state_machine.abs_line_number() - len(block) + 1
|
|
1688 |
if detail:
|
|
1689 |
message += '\n' + detail
|
|
1690 |
error = self.reporter.error(message, nodes.literal_block(data, data),
|
|
1691 |
line=lineno)
|
|
1692 |
return [error]
|
|
1693 |
|
|
1694 |
def build_table(self, tabledata, tableline, stub_columns=0):
|
|
1695 |
colwidths, headrows, bodyrows = tabledata
|
|
1696 |
table = nodes.table()
|
|
1697 |
tgroup = nodes.tgroup(cols=len(colwidths))
|
|
1698 |
table += tgroup
|
|
1699 |
for colwidth in colwidths:
|
|
1700 |
colspec = nodes.colspec(colwidth=colwidth)
|
|
1701 |
if stub_columns:
|
|
1702 |
colspec.attributes['stub'] = 1
|
|
1703 |
stub_columns -= 1
|
|
1704 |
tgroup += colspec
|
|
1705 |
if headrows:
|
|
1706 |
thead = nodes.thead()
|
|
1707 |
tgroup += thead
|
|
1708 |
for row in headrows:
|
|
1709 |
thead += self.build_table_row(row, tableline)
|
|
1710 |
tbody = nodes.tbody()
|
|
1711 |
tgroup += tbody
|
|
1712 |
for row in bodyrows:
|
|
1713 |
tbody += self.build_table_row(row, tableline)
|
|
1714 |
return table
|
|
1715 |
|
|
1716 |
def build_table_row(self, rowdata, tableline):
|
|
1717 |
row = nodes.row()
|
|
1718 |
for cell in rowdata:
|
|
1719 |
if cell is None:
|
|
1720 |
continue
|
|
1721 |
morerows, morecols, offset, cellblock = cell
|
|
1722 |
attributes = {}
|
|
1723 |
if morerows:
|
|
1724 |
attributes['morerows'] = morerows
|
|
1725 |
if morecols:
|
|
1726 |
attributes['morecols'] = morecols
|
|
1727 |
entry = nodes.entry(**attributes)
|
|
1728 |
row += entry
|
|
1729 |
if ''.join(cellblock):
|
|
1730 |
self.nested_parse(cellblock, input_offset=tableline+offset,
|
|
1731 |
node=entry)
|
|
1732 |
return row
|
|
1733 |
|
|
1734 |
|
|
1735 |
explicit = Struct()
|
|
1736 |
"""Patterns and constants used for explicit markup recognition."""
|
|
1737 |
|
|
1738 |
explicit.patterns = Struct(
|
|
1739 |
target=re.compile(r"""
|
|
1740 |
(
|
|
1741 |
_ # anonymous target
|
|
1742 |
| # *OR*
|
|
1743 |
(?!_) # no underscore at the beginning
|
|
1744 |
(?P<quote>`?) # optional open quote
|
|
1745 |
(?![ `]) # first char. not space or
|
|
1746 |
# backquote
|
|
1747 |
(?P<name> # reference name
|
|
1748 |
.+?
|
|
1749 |
)
|
|
1750 |
%(non_whitespace_escape_before)s
|
|
1751 |
(?P=quote) # close quote if open quote used
|
|
1752 |
)
|
|
1753 |
(?<!(?<!\x00):) # no unescaped colon at end
|
|
1754 |
%(non_whitespace_escape_before)s
|
|
1755 |
[ ]? # optional space
|
|
1756 |
: # end of reference name
|
|
1757 |
([ ]+|$) # followed by whitespace
|
|
1758 |
""" % vars(Inliner), re.VERBOSE),
|
|
1759 |
reference=re.compile(r"""
|
|
1760 |
(
|
|
1761 |
(?P<simple>%(simplename)s)_
|
|
1762 |
| # *OR*
|
|
1763 |
` # open backquote
|
|
1764 |
(?![ ]) # not space
|
|
1765 |
(?P<phrase>.+?) # hyperlink phrase
|
|
1766 |
%(non_whitespace_escape_before)s
|
|
1767 |
`_ # close backquote,
|
|
1768 |
# reference mark
|
|
1769 |
)
|
|
1770 |
$ # end of string
|
|
1771 |
""" % vars(Inliner), re.VERBOSE | re.UNICODE),
|
|
1772 |
substitution=re.compile(r"""
|
|
1773 |
(
|
|
1774 |
(?![ ]) # first char. not space
|
|
1775 |
(?P<name>.+?) # substitution text
|
|
1776 |
%(non_whitespace_escape_before)s
|
|
1777 |
\| # close delimiter
|
|
1778 |
)
|
|
1779 |
([ ]+|$) # followed by whitespace
|
|
1780 |
""" % vars(Inliner), re.VERBOSE),)
|
|
1781 |
|
|
1782 |
def footnote(self, match):
|
|
1783 |
lineno = self.state_machine.abs_line_number()
|
|
1784 |
indented, indent, offset, blank_finish = \
|
|
1785 |
self.state_machine.get_first_known_indented(match.end())
|
|
1786 |
label = match.group(1)
|
|
1787 |
name = normalize_name(label)
|
|
1788 |
footnote = nodes.footnote('\n'.join(indented))
|
|
1789 |
footnote.line = lineno
|
|
1790 |
if name[0] == '#': # auto-numbered
|
|
1791 |
name = name[1:] # autonumber label
|
|
1792 |
footnote['auto'] = 1
|
|
1793 |
if name:
|
|
1794 |
footnote['names'].append(name)
|
|
1795 |
self.document.note_autofootnote(footnote)
|
|
1796 |
elif name == '*': # auto-symbol
|
|
1797 |
name = ''
|
|
1798 |
footnote['auto'] = '*'
|
|
1799 |
self.document.note_symbol_footnote(footnote)
|
|
1800 |
else: # manually numbered
|
|
1801 |
footnote += nodes.label('', label)
|
|
1802 |
footnote['names'].append(name)
|
|
1803 |
self.document.note_footnote(footnote)
|
|
1804 |
if name:
|
|
1805 |
self.document.note_explicit_target(footnote, footnote)
|
|
1806 |
else:
|
|
1807 |
self.document.set_id(footnote, footnote)
|
|
1808 |
if indented:
|
|
1809 |
self.nested_parse(indented, input_offset=offset, node=footnote)
|
|
1810 |
return [footnote], blank_finish
|
|
1811 |
|
|
1812 |
def citation(self, match):
|
|
1813 |
lineno = self.state_machine.abs_line_number()
|
|
1814 |
indented, indent, offset, blank_finish = \
|
|
1815 |
self.state_machine.get_first_known_indented(match.end())
|
|
1816 |
label = match.group(1)
|
|
1817 |
name = normalize_name(label)
|
|
1818 |
citation = nodes.citation('\n'.join(indented))
|
|
1819 |
citation.line = lineno
|
|
1820 |
citation += nodes.label('', label)
|
|
1821 |
citation['names'].append(name)
|
|
1822 |
self.document.note_citation(citation)
|
|
1823 |
self.document.note_explicit_target(citation, citation)
|
|
1824 |
if indented:
|
|
1825 |
self.nested_parse(indented, input_offset=offset, node=citation)
|
|
1826 |
return [citation], blank_finish
|
|
1827 |
|
|
1828 |
def hyperlink_target(self, match):
|
|
1829 |
pattern = self.explicit.patterns.target
|
|
1830 |
lineno = self.state_machine.abs_line_number()
|
|
1831 |
block, indent, offset, blank_finish = \
|
|
1832 |
self.state_machine.get_first_known_indented(
|
|
1833 |
match.end(), until_blank=1, strip_indent=0)
|
|
1834 |
blocktext = match.string[:match.end()] + '\n'.join(block)
|
|
1835 |
block = [escape2null(line) for line in block]
|
|
1836 |
escaped = block[0]
|
|
1837 |
blockindex = 0
|
|
1838 |
while 1:
|
|
1839 |
targetmatch = pattern.match(escaped)
|
|
1840 |
if targetmatch:
|
|
1841 |
break
|
|
1842 |
blockindex += 1
|
|
1843 |
try:
|
|
1844 |
escaped += block[blockindex]
|
|
1845 |
except IndexError:
|
|
1846 |
raise MarkupError('malformed hyperlink target.', lineno)
|
|
1847 |
del block[:blockindex]
|
|
1848 |
block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
|
|
1849 |
target = self.make_target(block, blocktext, lineno,
|
|
1850 |
targetmatch.group('name'))
|
|
1851 |
return [target], blank_finish
|
|
1852 |
|
|
1853 |
def make_target(self, block, block_text, lineno, target_name):
|
|
1854 |
target_type, data = self.parse_target(block, block_text, lineno)
|
|
1855 |
if target_type == 'refname':
|
|
1856 |
target = nodes.target(block_text, '', refname=normalize_name(data))
|
|
1857 |
target.indirect_reference_name = data
|
|
1858 |
self.add_target(target_name, '', target, lineno)
|
|
1859 |
self.document.note_indirect_target(target)
|
|
1860 |
return target
|
|
1861 |
elif target_type == 'refuri':
|
|
1862 |
target = nodes.target(block_text, '')
|
|
1863 |
self.add_target(target_name, data, target, lineno)
|
|
1864 |
return target
|
|
1865 |
else:
|
|
1866 |
return data
|
|
1867 |
|
|
1868 |
def parse_target(self, block, block_text, lineno):
|
|
1869 |
"""
|
|
1870 |
Determine the type of reference of a target.
|
|
1871 |
|
|
1872 |
:Return: A 2-tuple, one of:
|
|
1873 |
|
|
1874 |
- 'refname' and the indirect reference name
|
|
1875 |
- 'refuri' and the URI
|
|
1876 |
- 'malformed' and a system_message node
|
|
1877 |
"""
|
|
1878 |
if block and block[-1].strip()[-1:] == '_': # possible indirect target
|
|
1879 |
reference = ' '.join([line.strip() for line in block])
|
|
1880 |
refname = self.is_reference(reference)
|
|
1881 |
if refname:
|
|
1882 |
return 'refname', refname
|
|
1883 |
reference = ''.join([''.join(line.split()) for line in block])
|
|
1884 |
return 'refuri', unescape(reference)
|
|
1885 |
|
|
1886 |
def is_reference(self, reference):
|
|
1887 |
match = self.explicit.patterns.reference.match(
|
|
1888 |
whitespace_normalize_name(reference))
|
|
1889 |
if not match:
|
|
1890 |
return None
|
|
1891 |
return unescape(match.group('simple') or match.group('phrase'))
|
|
1892 |
|
|
1893 |
def add_target(self, targetname, refuri, target, lineno):
|
|
1894 |
target.line = lineno
|
|
1895 |
if targetname:
|
|
1896 |
name = normalize_name(unescape(targetname))
|
|
1897 |
target['names'].append(name)
|
|
1898 |
if refuri:
|
|
1899 |
uri = self.inliner.adjust_uri(refuri)
|
|
1900 |
if uri:
|
|
1901 |
target['refuri'] = uri
|
|
1902 |
else:
|
|
1903 |
raise ApplicationError('problem with URI: %r' % refuri)
|
|
1904 |
self.document.note_explicit_target(target, self.parent)
|
|
1905 |
else: # anonymous target
|
|
1906 |
if refuri:
|
|
1907 |
target['refuri'] = refuri
|
|
1908 |
target['anonymous'] = 1
|
|
1909 |
self.document.note_anonymous_target(target)
|
|
1910 |
|
|
1911 |
def substitution_def(self, match):
|
|
1912 |
pattern = self.explicit.patterns.substitution
|
|
1913 |
lineno = self.state_machine.abs_line_number()
|
|
1914 |
block, indent, offset, blank_finish = \
|
|
1915 |
self.state_machine.get_first_known_indented(match.end(),
|
|
1916 |
strip_indent=0)
|
|
1917 |
blocktext = (match.string[:match.end()] + '\n'.join(block))
|
|
1918 |
block.disconnect()
|
|
1919 |
escaped = escape2null(block[0].rstrip())
|
|
1920 |
blockindex = 0
|
|
1921 |
while 1:
|
|
1922 |
subdefmatch = pattern.match(escaped)
|
|
1923 |
if subdefmatch:
|
|
1924 |
break
|
|
1925 |
blockindex += 1
|
|
1926 |
try:
|
|
1927 |
escaped = escaped + ' ' + escape2null(block[blockindex].strip())
|
|
1928 |
except IndexError:
|
|
1929 |
raise MarkupError('malformed substitution definition.',
|
|
1930 |
lineno)
|
|
1931 |
del block[:blockindex] # strip out the substitution marker
|
|
1932 |
block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
|
|
1933 |
if not block[0]:
|
|
1934 |
del block[0]
|
|
1935 |
offset += 1
|
|
1936 |
while block and not block[-1].strip():
|
|
1937 |
block.pop()
|
|
1938 |
subname = subdefmatch.group('name')
|
|
1939 |
substitution_node = nodes.substitution_definition(blocktext)
|
|
1940 |
substitution_node.line = lineno
|
|
1941 |
if not block:
|
|
1942 |
msg = self.reporter.warning(
|
|
1943 |
'Substitution definition "%s" missing contents.' % subname,
|
|
1944 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
1945 |
return [msg], blank_finish
|
|
1946 |
block[0] = block[0].strip()
|
|
1947 |
substitution_node['names'].append(
|
|
1948 |
nodes.whitespace_normalize_name(subname))
|
|
1949 |
new_abs_offset, blank_finish = self.nested_list_parse(
|
|
1950 |
block, input_offset=offset, node=substitution_node,
|
|
1951 |
initial_state='SubstitutionDef', blank_finish=blank_finish)
|
|
1952 |
i = 0
|
|
1953 |
for node in substitution_node[:]:
|
|
1954 |
if not (isinstance(node, nodes.Inline) or
|
|
1955 |
isinstance(node, nodes.Text)):
|
|
1956 |
self.parent += substitution_node[i]
|
|
1957 |
del substitution_node[i]
|
|
1958 |
else:
|
|
1959 |
i += 1
|
|
1960 |
for node in substitution_node.traverse(nodes.Element):
|
|
1961 |
if self.disallowed_inside_substitution_definitions(node):
|
|
1962 |
pformat = nodes.literal_block('', node.pformat().rstrip())
|
|
1963 |
msg = self.reporter.error(
|
|
1964 |
'Substitution definition contains illegal element:',
|
|
1965 |
pformat, nodes.literal_block(blocktext, blocktext),
|
|
1966 |
line=lineno)
|
|
1967 |
return [msg], blank_finish
|
|
1968 |
if len(substitution_node) == 0:
|
|
1969 |
msg = self.reporter.warning(
|
|
1970 |
'Substitution definition "%s" empty or invalid.'
|
|
1971 |
% subname,
|
|
1972 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
1973 |
return [msg], blank_finish
|
|
1974 |
self.document.note_substitution_def(
|
|
1975 |
substitution_node, subname, self.parent)
|
|
1976 |
return [substitution_node], blank_finish
|
|
1977 |
|
|
1978 |
def disallowed_inside_substitution_definitions(self, node):
|
|
1979 |
if (node['ids'] or
|
|
1980 |
isinstance(node, nodes.reference) and node.get('anonymous') or
|
|
1981 |
isinstance(node, nodes.footnote_reference) and node.get('auto')):
|
|
1982 |
return 1
|
|
1983 |
else:
|
|
1984 |
return 0
|
|
1985 |
|
|
1986 |
def directive(self, match, **option_presets):
|
|
1987 |
"""Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
|
|
1988 |
type_name = match.group(1)
|
|
1989 |
directive_class, messages = directives.directive(
|
|
1990 |
type_name, self.memo.language, self.document)
|
|
1991 |
self.parent += messages
|
|
1992 |
if directive_class:
|
|
1993 |
return self.run_directive(
|
|
1994 |
directive_class, match, type_name, option_presets)
|
|
1995 |
else:
|
|
1996 |
return self.unknown_directive(type_name)
|
|
1997 |
|
|
1998 |
def run_directive(self, directive, match, type_name, option_presets):
|
|
1999 |
"""
|
|
2000 |
Parse a directive then run its directive function.
|
|
2001 |
|
|
2002 |
Parameters:
|
|
2003 |
|
|
2004 |
- `directive`: The class implementing the directive. Must be
|
|
2005 |
a subclass of `rst.Directive`.
|
|
2006 |
|
|
2007 |
- `match`: A regular expression match object which matched the first
|
|
2008 |
line of the directive.
|
|
2009 |
|
|
2010 |
- `type_name`: The directive name, as used in the source text.
|
|
2011 |
|
|
2012 |
- `option_presets`: A dictionary of preset options, defaults for the
|
|
2013 |
directive options. Currently, only an "alt" option is passed by
|
|
2014 |
substitution definitions (value: the substitution name), which may
|
|
2015 |
be used by an embedded image directive.
|
|
2016 |
|
|
2017 |
Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
|
|
2018 |
"""
|
|
2019 |
if isinstance(directive, (FunctionType, MethodType)):
|
|
2020 |
from docutils.parsers.rst import convert_directive_function
|
|
2021 |
directive = convert_directive_function(directive)
|
|
2022 |
lineno = self.state_machine.abs_line_number()
|
|
2023 |
initial_line_offset = self.state_machine.line_offset
|
|
2024 |
indented, indent, line_offset, blank_finish \
|
|
2025 |
= self.state_machine.get_first_known_indented(match.end(),
|
|
2026 |
strip_top=0)
|
|
2027 |
block_text = '\n'.join(self.state_machine.input_lines[
|
|
2028 |
initial_line_offset : self.state_machine.line_offset + 1])
|
|
2029 |
try:
|
|
2030 |
arguments, options, content, content_offset = (
|
|
2031 |
self.parse_directive_block(indented, line_offset,
|
|
2032 |
directive, option_presets))
|
|
2033 |
except MarkupError, detail:
|
|
2034 |
error = self.reporter.error(
|
|
2035 |
'Error in "%s" directive:\n%s.' % (type_name,
|
|
2036 |
' '.join(detail.args)),
|
|
2037 |
nodes.literal_block(block_text, block_text), line=lineno)
|
|
2038 |
return [error], blank_finish
|
|
2039 |
directive_instance = directive(
|
|
2040 |
type_name, arguments, options, content, lineno,
|
|
2041 |
content_offset, block_text, self, self.state_machine)
|
|
2042 |
try:
|
|
2043 |
result = directive_instance.run()
|
|
2044 |
except docutils.parsers.rst.DirectiveError, directive_error:
|
|
2045 |
msg_node = self.reporter.system_message(directive_error.level,
|
|
2046 |
directive_error.message)
|
|
2047 |
msg_node += nodes.literal_block(block_text, block_text)
|
|
2048 |
msg_node['line'] = lineno
|
|
2049 |
result = [msg_node]
|
|
2050 |
assert isinstance(result, list), \
|
|
2051 |
'Directive "%s" must return a list of nodes.' % type_name
|
|
2052 |
for i in range(len(result)):
|
|
2053 |
assert isinstance(result[i], nodes.Node), \
|
|
2054 |
('Directive "%s" returned non-Node object (index %s): %r'
|
|
2055 |
% (type_name, i, result[i]))
|
|
2056 |
return (result,
|
|
2057 |
blank_finish or self.state_machine.is_next_line_blank())
|
|
2058 |
|
|
2059 |
def parse_directive_block(self, indented, line_offset, directive,
|
|
2060 |
option_presets):
|
|
2061 |
option_spec = directive.option_spec
|
|
2062 |
has_content = directive.has_content
|
|
2063 |
if indented and not indented[0].strip():
|
|
2064 |
indented.trim_start()
|
|
2065 |
line_offset += 1
|
|
2066 |
while indented and not indented[-1].strip():
|
|
2067 |
indented.trim_end()
|
|
2068 |
if indented and (directive.required_arguments
|
|
2069 |
or directive.optional_arguments
|
|
2070 |
or option_spec):
|
|
2071 |
for i in range(len(indented)):
|
|
2072 |
if not indented[i].strip():
|
|
2073 |
break
|
|
2074 |
else:
|
|
2075 |
i += 1
|
|
2076 |
arg_block = indented[:i]
|
|
2077 |
content = indented[i+1:]
|
|
2078 |
content_offset = line_offset + i + 1
|
|
2079 |
else:
|
|
2080 |
content = indented
|
|
2081 |
content_offset = line_offset
|
|
2082 |
arg_block = []
|
|
2083 |
while content and not content[0].strip():
|
|
2084 |
content.trim_start()
|
|
2085 |
content_offset += 1
|
|
2086 |
if option_spec:
|
|
2087 |
options, arg_block = self.parse_directive_options(
|
|
2088 |
option_presets, option_spec, arg_block)
|
|
2089 |
if arg_block and not (directive.required_arguments
|
|
2090 |
or directive.optional_arguments):
|
|
2091 |
raise MarkupError('no arguments permitted; blank line '
|
|
2092 |
'required before content block')
|
|
2093 |
else:
|
|
2094 |
options = {}
|
|
2095 |
if directive.required_arguments or directive.optional_arguments:
|
|
2096 |
arguments = self.parse_directive_arguments(
|
|
2097 |
directive, arg_block)
|
|
2098 |
else:
|
|
2099 |
arguments = []
|
|
2100 |
if content and not has_content:
|
|
2101 |
raise MarkupError('no content permitted')
|
|
2102 |
return (arguments, options, content, content_offset)
|
|
2103 |
|
|
2104 |
def parse_directive_options(self, option_presets, option_spec, arg_block):
|
|
2105 |
options = option_presets.copy()
|
|
2106 |
for i in range(len(arg_block)):
|
|
2107 |
if arg_block[i][:1] == ':':
|
|
2108 |
opt_block = arg_block[i:]
|
|
2109 |
arg_block = arg_block[:i]
|
|
2110 |
break
|
|
2111 |
else:
|
|
2112 |
opt_block = []
|
|
2113 |
if opt_block:
|
|
2114 |
success, data = self.parse_extension_options(option_spec,
|
|
2115 |
opt_block)
|
|
2116 |
if success: # data is a dict of options
|
|
2117 |
options.update(data)
|
|
2118 |
else: # data is an error string
|
|
2119 |
raise MarkupError(data)
|
|
2120 |
return options, arg_block
|
|
2121 |
|
|
2122 |
def parse_directive_arguments(self, directive, arg_block):
|
|
2123 |
required = directive.required_arguments
|
|
2124 |
optional = directive.optional_arguments
|
|
2125 |
arg_text = '\n'.join(arg_block)
|
|
2126 |
arguments = arg_text.split()
|
|
2127 |
if len(arguments) < required:
|
|
2128 |
raise MarkupError('%s argument(s) required, %s supplied'
|
|
2129 |
% (required, len(arguments)))
|
|
2130 |
elif len(arguments) > required + optional:
|
|
2131 |
if directive.final_argument_whitespace:
|
|
2132 |
arguments = arg_text.split(None, required + optional - 1)
|
|
2133 |
else:
|
|
2134 |
raise MarkupError(
|
|
2135 |
'maximum %s argument(s) allowed, %s supplied'
|
|
2136 |
% (required + optional, len(arguments)))
|
|
2137 |
return arguments
|
|
2138 |
|
|
2139 |
def parse_extension_options(self, option_spec, datalines):
|
|
2140 |
"""
|
|
2141 |
Parse `datalines` for a field list containing extension options
|
|
2142 |
matching `option_spec`.
|
|
2143 |
|
|
2144 |
:Parameters:
|
|
2145 |
- `option_spec`: a mapping of option name to conversion
|
|
2146 |
function, which should raise an exception on bad input.
|
|
2147 |
- `datalines`: a list of input strings.
|
|
2148 |
|
|
2149 |
:Return:
|
|
2150 |
- Success value, 1 or 0.
|
|
2151 |
- An option dictionary on success, an error string on failure.
|
|
2152 |
"""
|
|
2153 |
node = nodes.field_list()
|
|
2154 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
2155 |
datalines, 0, node, initial_state='ExtensionOptions',
|
|
2156 |
blank_finish=1)
|
|
2157 |
if newline_offset != len(datalines): # incomplete parse of block
|
|
2158 |
return 0, 'invalid option block'
|
|
2159 |
try:
|
|
2160 |
options = utils.extract_extension_options(node, option_spec)
|
|
2161 |
except KeyError, detail:
|
|
2162 |
return 0, ('unknown option: "%s"' % detail.args[0])
|
|
2163 |
except (ValueError, TypeError), detail:
|
|
2164 |
return 0, ('invalid option value: %s' % ' '.join(detail.args))
|
|
2165 |
except utils.ExtensionOptionError, detail:
|
|
2166 |
return 0, ('invalid option data: %s' % ' '.join(detail.args))
|
|
2167 |
if blank_finish:
|
|
2168 |
return 1, options
|
|
2169 |
else:
|
|
2170 |
return 0, 'option data incompletely parsed'
|
|
2171 |
|
|
2172 |
def unknown_directive(self, type_name):
|
|
2173 |
lineno = self.state_machine.abs_line_number()
|
|
2174 |
indented, indent, offset, blank_finish = \
|
|
2175 |
self.state_machine.get_first_known_indented(0, strip_indent=0)
|
|
2176 |
text = '\n'.join(indented)
|
|
2177 |
error = self.reporter.error(
|
|
2178 |
'Unknown directive type "%s".' % type_name,
|
|
2179 |
nodes.literal_block(text, text), line=lineno)
|
|
2180 |
return [error], blank_finish
|
|
2181 |
|
|
2182 |
def comment(self, match):
|
|
2183 |
if not match.string[match.end():].strip() \
|
|
2184 |
and self.state_machine.is_next_line_blank(): # an empty comment?
|
|
2185 |
return [nodes.comment()], 1 # "A tiny but practical wart."
|
|
2186 |
indented, indent, offset, blank_finish = \
|
|
2187 |
self.state_machine.get_first_known_indented(match.end())
|
|
2188 |
while indented and not indented[-1].strip():
|
|
2189 |
indented.trim_end()
|
|
2190 |
text = '\n'.join(indented)
|
|
2191 |
return [nodes.comment(text, text)], blank_finish
|
|
2192 |
|
|
2193 |
explicit.constructs = [
|
|
2194 |
(footnote,
|
|
2195 |
re.compile(r"""
|
|
2196 |
\.\.[ ]+ # explicit markup start
|
|
2197 |
\[
|
|
2198 |
( # footnote label:
|
|
2199 |
[0-9]+ # manually numbered footnote
|
|
2200 |
| # *OR*
|
|
2201 |
\# # anonymous auto-numbered footnote
|
|
2202 |
| # *OR*
|
|
2203 |
\#%s # auto-number ed?) footnote label
|
|
2204 |
| # *OR*
|
|
2205 |
\* # auto-symbol footnote
|
|
2206 |
)
|
|
2207 |
\]
|
|
2208 |
([ ]+|$) # whitespace or end of line
|
|
2209 |
""" % Inliner.simplename, re.VERBOSE | re.UNICODE)),
|
|
2210 |
(citation,
|
|
2211 |
re.compile(r"""
|
|
2212 |
\.\.[ ]+ # explicit markup start
|
|
2213 |
\[(%s)\] # citation label
|
|
2214 |
([ ]+|$) # whitespace or end of line
|
|
2215 |
""" % Inliner.simplename, re.VERBOSE | re.UNICODE)),
|
|
2216 |
(hyperlink_target,
|
|
2217 |
re.compile(r"""
|
|
2218 |
\.\.[ ]+ # explicit markup start
|
|
2219 |
_ # target indicator
|
|
2220 |
(?![ ]|$) # first char. not space or EOL
|
|
2221 |
""", re.VERBOSE)),
|
|
2222 |
(substitution_def,
|
|
2223 |
re.compile(r"""
|
|
2224 |
\.\.[ ]+ # explicit markup start
|
|
2225 |
\| # substitution indicator
|
|
2226 |
(?![ ]|$) # first char. not space or EOL
|
|
2227 |
""", re.VERBOSE)),
|
|
2228 |
(directive,
|
|
2229 |
re.compile(r"""
|
|
2230 |
\.\.[ ]+ # explicit markup start
|
|
2231 |
(%s) # directive name
|
|
2232 |
[ ]? # optional space
|
|
2233 |
:: # directive delimiter
|
|
2234 |
([ ]+|$) # whitespace or end of line
|
|
2235 |
""" % Inliner.simplename, re.VERBOSE | re.UNICODE))]
|
|
2236 |
|
|
2237 |
def explicit_markup(self, match, context, next_state):
|
|
2238 |
"""Footnotes, hyperlink targets, directives, comments."""
|
|
2239 |
nodelist, blank_finish = self.explicit_construct(match)
|
|
2240 |
self.parent += nodelist
|
|
2241 |
self.explicit_list(blank_finish)
|
|
2242 |
return [], next_state, []
|
|
2243 |
|
|
2244 |
def explicit_construct(self, match):
|
|
2245 |
"""Determine which explicit construct this is, parse & return it."""
|
|
2246 |
errors = []
|
|
2247 |
for method, pattern in self.explicit.constructs:
|
|
2248 |
expmatch = pattern.match(match.string)
|
|
2249 |
if expmatch:
|
|
2250 |
try:
|
|
2251 |
return method(self, expmatch)
|
|
2252 |
except MarkupError, (message, lineno): # never reached?
|
|
2253 |
errors.append(self.reporter.warning(message, line=lineno))
|
|
2254 |
break
|
|
2255 |
nodelist, blank_finish = self.comment(match)
|
|
2256 |
return nodelist + errors, blank_finish
|
|
2257 |
|
|
2258 |
def explicit_list(self, blank_finish):
|
|
2259 |
"""
|
|
2260 |
Create a nested state machine for a series of explicit markup
|
|
2261 |
constructs (including anonymous hyperlink targets).
|
|
2262 |
"""
|
|
2263 |
offset = self.state_machine.line_offset + 1 # next line
|
|
2264 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
2265 |
self.state_machine.input_lines[offset:],
|
|
2266 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
2267 |
node=self.parent, initial_state='Explicit',
|
|
2268 |
blank_finish=blank_finish,
|
|
2269 |
match_titles=self.state_machine.match_titles)
|
|
2270 |
self.goto_line(newline_offset)
|
|
2271 |
if not blank_finish:
|
|
2272 |
self.parent += self.unindent_warning('Explicit markup')
|
|
2273 |
|
|
2274 |
def anonymous(self, match, context, next_state):
|
|
2275 |
"""Anonymous hyperlink targets."""
|
|
2276 |
nodelist, blank_finish = self.anonymous_target(match)
|
|
2277 |
self.parent += nodelist
|
|
2278 |
self.explicit_list(blank_finish)
|
|
2279 |
return [], next_state, []
|
|
2280 |
|
|
2281 |
def anonymous_target(self, match):
|
|
2282 |
lineno = self.state_machine.abs_line_number()
|
|
2283 |
block, indent, offset, blank_finish \
|
|
2284 |
= self.state_machine.get_first_known_indented(match.end(),
|
|
2285 |
until_blank=1)
|
|
2286 |
blocktext = match.string[:match.end()] + '\n'.join(block)
|
|
2287 |
block = [escape2null(line) for line in block]
|
|
2288 |
target = self.make_target(block, blocktext, lineno, '')
|
|
2289 |
return [target], blank_finish
|
|
2290 |
|
|
2291 |
def line(self, match, context, next_state):
|
|
2292 |
"""Section title overline or transition marker."""
|
|
2293 |
if self.state_machine.match_titles:
|
|
2294 |
return [match.string], 'Line', []
|
|
2295 |
elif match.string.strip() == '::':
|
|
2296 |
raise statemachine.TransitionCorrection('text')
|
|
2297 |
elif len(match.string.strip()) < 4:
|
|
2298 |
msg = self.reporter.info(
|
|
2299 |
'Unexpected possible title overline or transition.\n'
|
|
2300 |
"Treating it as ordinary text because it's so short.",
|
|
2301 |
line=self.state_machine.abs_line_number())
|
|
2302 |
self.parent += msg
|
|
2303 |
raise statemachine.TransitionCorrection('text')
|
|
2304 |
else:
|
|
2305 |
blocktext = self.state_machine.line
|
|
2306 |
msg = self.reporter.severe(
|
|
2307 |
'Unexpected section title or transition.',
|
|
2308 |
nodes.literal_block(blocktext, blocktext),
|
|
2309 |
line=self.state_machine.abs_line_number())
|
|
2310 |
self.parent += msg
|
|
2311 |
return [], next_state, []
|
|
2312 |
|
|
2313 |
def text(self, match, context, next_state):
|
|
2314 |
"""Titles, definition lists, paragraphs."""
|
|
2315 |
return [match.string], 'Text', []
|
|
2316 |
|
|
2317 |
|
|
2318 |
class RFC2822Body(Body):
|
|
2319 |
|
|
2320 |
"""
|
|
2321 |
RFC2822 headers are only valid as the first constructs in documents. As
|
|
2322 |
soon as anything else appears, the `Body` state should take over.
|
|
2323 |
"""
|
|
2324 |
|
|
2325 |
patterns = Body.patterns.copy() # can't modify the original
|
|
2326 |
patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
|
|
2327 |
initial_transitions = [(name, 'Body')
|
|
2328 |
for name in Body.initial_transitions]
|
|
2329 |
initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
|
|
2330 |
|
|
2331 |
def rfc2822(self, match, context, next_state):
|
|
2332 |
"""RFC2822-style field list item."""
|
|
2333 |
fieldlist = nodes.field_list(classes=['rfc2822'])
|
|
2334 |
self.parent += fieldlist
|
|
2335 |
field, blank_finish = self.rfc2822_field(match)
|
|
2336 |
fieldlist += field
|
|
2337 |
offset = self.state_machine.line_offset + 1 # next line
|
|
2338 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
2339 |
self.state_machine.input_lines[offset:],
|
|
2340 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
2341 |
node=fieldlist, initial_state='RFC2822List',
|
|
2342 |
blank_finish=blank_finish)
|
|
2343 |
self.goto_line(newline_offset)
|
|
2344 |
if not blank_finish:
|
|
2345 |
self.parent += self.unindent_warning(
|
|
2346 |
'RFC2822-style field list')
|
|
2347 |
return [], next_state, []
|
|
2348 |
|
|
2349 |
def rfc2822_field(self, match):
|
|
2350 |
name = match.string[:match.string.find(':')]
|
|
2351 |
indented, indent, line_offset, blank_finish = \
|
|
2352 |
self.state_machine.get_first_known_indented(match.end(),
|
|
2353 |
until_blank=1)
|
|
2354 |
fieldnode = nodes.field()
|
|
2355 |
fieldnode += nodes.field_name(name, name)
|
|
2356 |
fieldbody = nodes.field_body('\n'.join(indented))
|
|
2357 |
fieldnode += fieldbody
|
|
2358 |
if indented:
|
|
2359 |
self.nested_parse(indented, input_offset=line_offset,
|
|
2360 |
node=fieldbody)
|
|
2361 |
return fieldnode, blank_finish
|
|
2362 |
|
|
2363 |
|
|
2364 |
class SpecializedBody(Body):
|
|
2365 |
|
|
2366 |
"""
|
|
2367 |
Superclass for second and subsequent compound element members. Compound
|
|
2368 |
elements are lists and list-like constructs.
|
|
2369 |
|
|
2370 |
All transition methods are disabled (redefined as `invalid_input`).
|
|
2371 |
Override individual methods in subclasses to re-enable.
|
|
2372 |
|
|
2373 |
For example, once an initial bullet list item, say, is recognized, the
|
|
2374 |
`BulletList` subclass takes over, with a "bullet_list" node as its
|
|
2375 |
container. Upon encountering the initial bullet list item, `Body.bullet`
|
|
2376 |
calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
|
|
2377 |
starts up a nested parsing session with `BulletList` as the initial state.
|
|
2378 |
Only the ``bullet`` transition method is enabled in `BulletList`; as long
|
|
2379 |
as only bullet list items are encountered, they are parsed and inserted
|
|
2380 |
into the container. The first construct which is *not* a bullet list item
|
|
2381 |
triggers the `invalid_input` method, which ends the nested parse and
|
|
2382 |
closes the container. `BulletList` needs to recognize input that is
|
|
2383 |
invalid in the context of a bullet list, which means everything *other
|
|
2384 |
than* bullet list items, so it inherits the transition list created in
|
|
2385 |
`Body`.
|
|
2386 |
"""
|
|
2387 |
|
|
2388 |
def invalid_input(self, match=None, context=None, next_state=None):
|
|
2389 |
"""Not a compound element member. Abort this state machine."""
|
|
2390 |
self.state_machine.previous_line() # back up so parent SM can reassess
|
|
2391 |
raise EOFError
|
|
2392 |
|
|
2393 |
indent = invalid_input
|
|
2394 |
bullet = invalid_input
|
|
2395 |
enumerator = invalid_input
|
|
2396 |
field_marker = invalid_input
|
|
2397 |
option_marker = invalid_input
|
|
2398 |
doctest = invalid_input
|
|
2399 |
line_block = invalid_input
|
|
2400 |
grid_table_top = invalid_input
|
|
2401 |
simple_table_top = invalid_input
|
|
2402 |
explicit_markup = invalid_input
|
|
2403 |
anonymous = invalid_input
|
|
2404 |
line = invalid_input
|
|
2405 |
text = invalid_input
|
|
2406 |
|
|
2407 |
|
|
2408 |
class BulletList(SpecializedBody):
|
|
2409 |
|
|
2410 |
"""Second and subsequent bullet_list list_items."""
|
|
2411 |
|
|
2412 |
def bullet(self, match, context, next_state):
|
|
2413 |
"""Bullet list item."""
|
|
2414 |
if match.string[0] != self.parent['bullet']:
|
|
2415 |
# different bullet: new list
|
|
2416 |
self.invalid_input()
|
|
2417 |
listitem, blank_finish = self.list_item(match.end())
|
|
2418 |
self.parent += listitem
|
|
2419 |
self.blank_finish = blank_finish
|
|
2420 |
return [], next_state, []
|
|
2421 |
|
|
2422 |
|
|
2423 |
class DefinitionList(SpecializedBody):
|
|
2424 |
|
|
2425 |
"""Second and subsequent definition_list_items."""
|
|
2426 |
|
|
2427 |
def text(self, match, context, next_state):
|
|
2428 |
"""Definition lists."""
|
|
2429 |
return [match.string], 'Definition', []
|
|
2430 |
|
|
2431 |
|
|
2432 |
class EnumeratedList(SpecializedBody):
|
|
2433 |
|
|
2434 |
"""Second and subsequent enumerated_list list_items."""
|
|
2435 |
|
|
2436 |
def enumerator(self, match, context, next_state):
|
|
2437 |
"""Enumerated list item."""
|
|
2438 |
format, sequence, text, ordinal = self.parse_enumerator(
|
|
2439 |
match, self.parent['enumtype'])
|
|
2440 |
if ( format != self.format
|
|
2441 |
or (sequence != '#' and (sequence != self.parent['enumtype']
|
|
2442 |
or self.auto
|
|
2443 |
or ordinal != (self.lastordinal + 1)))
|
|
2444 |
or not self.is_enumerated_list_item(ordinal, sequence, format)):
|
|
2445 |
# different enumeration: new list
|
|
2446 |
self.invalid_input()
|
|
2447 |
if sequence == '#':
|
|
2448 |
self.auto = 1
|
|
2449 |
listitem, blank_finish = self.list_item(match.end())
|
|
2450 |
self.parent += listitem
|
|
2451 |
self.blank_finish = blank_finish
|
|
2452 |
self.lastordinal = ordinal
|
|
2453 |
return [], next_state, []
|
|
2454 |
|
|
2455 |
|
|
2456 |
class FieldList(SpecializedBody):
|
|
2457 |
|
|
2458 |
"""Second and subsequent field_list fields."""
|
|
2459 |
|
|
2460 |
def field_marker(self, match, context, next_state):
|
|
2461 |
"""Field list field."""
|
|
2462 |
field, blank_finish = self.field(match)
|
|
2463 |
self.parent += field
|
|
2464 |
self.blank_finish = blank_finish
|
|
2465 |
return [], next_state, []
|
|
2466 |
|
|
2467 |
|
|
2468 |
class OptionList(SpecializedBody):
|
|
2469 |
|
|
2470 |
"""Second and subsequent option_list option_list_items."""
|
|
2471 |
|
|
2472 |
def option_marker(self, match, context, next_state):
|
|
2473 |
"""Option list item."""
|
|
2474 |
try:
|
|
2475 |
option_list_item, blank_finish = self.option_list_item(match)
|
|
2476 |
except MarkupError, (message, lineno):
|
|
2477 |
self.invalid_input()
|
|
2478 |
self.parent += option_list_item
|
|
2479 |
self.blank_finish = blank_finish
|
|
2480 |
return [], next_state, []
|
|
2481 |
|
|
2482 |
|
|
2483 |
class RFC2822List(SpecializedBody, RFC2822Body):
|
|
2484 |
|
|
2485 |
"""Second and subsequent RFC2822-style field_list fields."""
|
|
2486 |
|
|
2487 |
patterns = RFC2822Body.patterns
|
|
2488 |
initial_transitions = RFC2822Body.initial_transitions
|
|
2489 |
|
|
2490 |
def rfc2822(self, match, context, next_state):
|
|
2491 |
"""RFC2822-style field list item."""
|
|
2492 |
field, blank_finish = self.rfc2822_field(match)
|
|
2493 |
self.parent += field
|
|
2494 |
self.blank_finish = blank_finish
|
|
2495 |
return [], 'RFC2822List', []
|
|
2496 |
|
|
2497 |
blank = SpecializedBody.invalid_input
|
|
2498 |
|
|
2499 |
|
|
2500 |
class ExtensionOptions(FieldList):
|
|
2501 |
|
|
2502 |
"""
|
|
2503 |
Parse field_list fields for extension options.
|
|
2504 |
|
|
2505 |
No nested parsing is done (including inline markup parsing).
|
|
2506 |
"""
|
|
2507 |
|
|
2508 |
def parse_field_body(self, indented, offset, node):
|
|
2509 |
"""Override `Body.parse_field_body` for simpler parsing."""
|
|
2510 |
lines = []
|
|
2511 |
for line in list(indented) + ['']:
|
|
2512 |
if line.strip():
|
|
2513 |
lines.append(line)
|
|
2514 |
elif lines:
|
|
2515 |
text = '\n'.join(lines)
|
|
2516 |
node += nodes.paragraph(text, text)
|
|
2517 |
lines = []
|
|
2518 |
|
|
2519 |
|
|
2520 |
class LineBlock(SpecializedBody):
|
|
2521 |
|
|
2522 |
"""Second and subsequent lines of a line_block."""
|
|
2523 |
|
|
2524 |
blank = SpecializedBody.invalid_input
|
|
2525 |
|
|
2526 |
def line_block(self, match, context, next_state):
|
|
2527 |
"""New line of line block."""
|
|
2528 |
lineno = self.state_machine.abs_line_number()
|
|
2529 |
line, messages, blank_finish = self.line_block_line(match, lineno)
|
|
2530 |
self.parent += line
|
|
2531 |
self.parent.parent += messages
|
|
2532 |
self.blank_finish = blank_finish
|
|
2533 |
return [], next_state, []
|
|
2534 |
|
|
2535 |
|
|
2536 |
class Explicit(SpecializedBody):
|
|
2537 |
|
|
2538 |
"""Second and subsequent explicit markup construct."""
|
|
2539 |
|
|
2540 |
def explicit_markup(self, match, context, next_state):
|
|
2541 |
"""Footnotes, hyperlink targets, directives, comments."""
|
|
2542 |
nodelist, blank_finish = self.explicit_construct(match)
|
|
2543 |
self.parent += nodelist
|
|
2544 |
self.blank_finish = blank_finish
|
|
2545 |
return [], next_state, []
|
|
2546 |
|
|
2547 |
def anonymous(self, match, context, next_state):
|
|
2548 |
"""Anonymous hyperlink targets."""
|
|
2549 |
nodelist, blank_finish = self.anonymous_target(match)
|
|
2550 |
self.parent += nodelist
|
|
2551 |
self.blank_finish = blank_finish
|
|
2552 |
return [], next_state, []
|
|
2553 |
|
|
2554 |
blank = SpecializedBody.invalid_input
|
|
2555 |
|
|
2556 |
|
|
2557 |
class SubstitutionDef(Body):
|
|
2558 |
|
|
2559 |
"""
|
|
2560 |
Parser for the contents of a substitution_definition element.
|
|
2561 |
"""
|
|
2562 |
|
|
2563 |
patterns = {
|
|
2564 |
'embedded_directive': re.compile(r'(%s)::( +|$)'
|
|
2565 |
% Inliner.simplename, re.UNICODE),
|
|
2566 |
'text': r''}
|
|
2567 |
initial_transitions = ['embedded_directive', 'text']
|
|
2568 |
|
|
2569 |
def embedded_directive(self, match, context, next_state):
|
|
2570 |
nodelist, blank_finish = self.directive(match,
|
|
2571 |
alt=self.parent['names'][0])
|
|
2572 |
self.parent += nodelist
|
|
2573 |
if not self.state_machine.at_eof():
|
|
2574 |
self.blank_finish = blank_finish
|
|
2575 |
raise EOFError
|
|
2576 |
|
|
2577 |
def text(self, match, context, next_state):
|
|
2578 |
if not self.state_machine.at_eof():
|
|
2579 |
self.blank_finish = self.state_machine.is_next_line_blank()
|
|
2580 |
raise EOFError
|
|
2581 |
|
|
2582 |
|
|
2583 |
class Text(RSTState):
|
|
2584 |
|
|
2585 |
"""
|
|
2586 |
Classifier of second line of a text block.
|
|
2587 |
|
|
2588 |
Could be a paragraph, a definition list item, or a title.
|
|
2589 |
"""
|
|
2590 |
|
|
2591 |
patterns = {'underline': Body.patterns['line'],
|
|
2592 |
'text': r''}
|
|
2593 |
initial_transitions = [('underline', 'Body'), ('text', 'Body')]
|
|
2594 |
|
|
2595 |
def blank(self, match, context, next_state):
|
|
2596 |
"""End of paragraph."""
|
|
2597 |
paragraph, literalnext = self.paragraph(
|
|
2598 |
context, self.state_machine.abs_line_number() - 1)
|
|
2599 |
self.parent += paragraph
|
|
2600 |
if literalnext:
|
|
2601 |
self.parent += self.literal_block()
|
|
2602 |
return [], 'Body', []
|
|
2603 |
|
|
2604 |
def eof(self, context):
|
|
2605 |
if context:
|
|
2606 |
self.blank(None, context, None)
|
|
2607 |
return []
|
|
2608 |
|
|
2609 |
def indent(self, match, context, next_state):
|
|
2610 |
"""Definition list item."""
|
|
2611 |
definitionlist = nodes.definition_list()
|
|
2612 |
definitionlistitem, blank_finish = self.definition_list_item(context)
|
|
2613 |
definitionlist += definitionlistitem
|
|
2614 |
self.parent += definitionlist
|
|
2615 |
offset = self.state_machine.line_offset + 1 # next line
|
|
2616 |
newline_offset, blank_finish = self.nested_list_parse(
|
|
2617 |
self.state_machine.input_lines[offset:],
|
|
2618 |
input_offset=self.state_machine.abs_line_offset() + 1,
|
|
2619 |
node=definitionlist, initial_state='DefinitionList',
|
|
2620 |
blank_finish=blank_finish, blank_finish_state='Definition')
|
|
2621 |
self.goto_line(newline_offset)
|
|
2622 |
if not blank_finish:
|
|
2623 |
self.parent += self.unindent_warning('Definition list')
|
|
2624 |
return [], 'Body', []
|
|
2625 |
|
|
2626 |
def underline(self, match, context, next_state):
|
|
2627 |
"""Section title."""
|
|
2628 |
lineno = self.state_machine.abs_line_number()
|
|
2629 |
title = context[0].rstrip()
|
|
2630 |
underline = match.string.rstrip()
|
|
2631 |
source = title + '\n' + underline
|
|
2632 |
messages = []
|
|
2633 |
if column_width(title) > len(underline):
|
|
2634 |
if len(underline) < 4:
|
|
2635 |
if self.state_machine.match_titles:
|
|
2636 |
msg = self.reporter.info(
|
|
2637 |
'Possible title underline, too short for the title.\n'
|
|
2638 |
"Treating it as ordinary text because it's so short.",
|
|
2639 |
line=lineno)
|
|
2640 |
self.parent += msg
|
|
2641 |
raise statemachine.TransitionCorrection('text')
|
|
2642 |
else:
|
|
2643 |
blocktext = context[0] + '\n' + self.state_machine.line
|
|
2644 |
msg = self.reporter.warning(
|
|
2645 |
'Title underline too short.',
|
|
2646 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
2647 |
messages.append(msg)
|
|
2648 |
if not self.state_machine.match_titles:
|
|
2649 |
blocktext = context[0] + '\n' + self.state_machine.line
|
|
2650 |
msg = self.reporter.severe(
|
|
2651 |
'Unexpected section title.',
|
|
2652 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
2653 |
self.parent += messages
|
|
2654 |
self.parent += msg
|
|
2655 |
return [], next_state, []
|
|
2656 |
style = underline[0]
|
|
2657 |
context[:] = []
|
|
2658 |
self.section(title, source, style, lineno - 1, messages)
|
|
2659 |
return [], next_state, []
|
|
2660 |
|
|
2661 |
def text(self, match, context, next_state):
|
|
2662 |
"""Paragraph."""
|
|
2663 |
startline = self.state_machine.abs_line_number() - 1
|
|
2664 |
msg = None
|
|
2665 |
try:
|
|
2666 |
block = self.state_machine.get_text_block(flush_left=1)
|
|
2667 |
except statemachine.UnexpectedIndentationError, instance:
|
|
2668 |
block, source, lineno = instance.args
|
|
2669 |
msg = self.reporter.error('Unexpected indentation.',
|
|
2670 |
source=source, line=lineno)
|
|
2671 |
lines = context + list(block)
|
|
2672 |
paragraph, literalnext = self.paragraph(lines, startline)
|
|
2673 |
self.parent += paragraph
|
|
2674 |
self.parent += msg
|
|
2675 |
if literalnext:
|
|
2676 |
try:
|
|
2677 |
self.state_machine.next_line()
|
|
2678 |
except EOFError:
|
|
2679 |
pass
|
|
2680 |
self.parent += self.literal_block()
|
|
2681 |
return [], next_state, []
|
|
2682 |
|
|
2683 |
def literal_block(self):
|
|
2684 |
"""Return a list of nodes."""
|
|
2685 |
indented, indent, offset, blank_finish = \
|
|
2686 |
self.state_machine.get_indented()
|
|
2687 |
while indented and not indented[-1].strip():
|
|
2688 |
indented.trim_end()
|
|
2689 |
if not indented:
|
|
2690 |
return self.quoted_literal_block()
|
|
2691 |
data = '\n'.join(indented)
|
|
2692 |
literal_block = nodes.literal_block(data, data)
|
|
2693 |
literal_block.line = offset + 1
|
|
2694 |
nodelist = [literal_block]
|
|
2695 |
if not blank_finish:
|
|
2696 |
nodelist.append(self.unindent_warning('Literal block'))
|
|
2697 |
return nodelist
|
|
2698 |
|
|
2699 |
def quoted_literal_block(self):
|
|
2700 |
abs_line_offset = self.state_machine.abs_line_offset()
|
|
2701 |
offset = self.state_machine.line_offset
|
|
2702 |
parent_node = nodes.Element()
|
|
2703 |
new_abs_offset = self.nested_parse(
|
|
2704 |
self.state_machine.input_lines[offset:],
|
|
2705 |
input_offset=abs_line_offset, node=parent_node, match_titles=0,
|
|
2706 |
state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
|
|
2707 |
'initial_state': 'QuotedLiteralBlock'})
|
|
2708 |
self.goto_line(new_abs_offset)
|
|
2709 |
return parent_node.children
|
|
2710 |
|
|
2711 |
def definition_list_item(self, termline):
|
|
2712 |
indented, indent, line_offset, blank_finish = \
|
|
2713 |
self.state_machine.get_indented()
|
|
2714 |
definitionlistitem = nodes.definition_list_item(
|
|
2715 |
'\n'.join(termline + list(indented)))
|
|
2716 |
lineno = self.state_machine.abs_line_number() - 1
|
|
2717 |
definitionlistitem.line = lineno
|
|
2718 |
termlist, messages = self.term(termline, lineno)
|
|
2719 |
definitionlistitem += termlist
|
|
2720 |
definition = nodes.definition('', *messages)
|
|
2721 |
definitionlistitem += definition
|
|
2722 |
if termline[0][-2:] == '::':
|
|
2723 |
definition += self.reporter.info(
|
|
2724 |
'Blank line missing before literal block (after the "::")? '
|
|
2725 |
'Interpreted as a definition list item.', line=line_offset+1)
|
|
2726 |
self.nested_parse(indented, input_offset=line_offset, node=definition)
|
|
2727 |
return definitionlistitem, blank_finish
|
|
2728 |
|
|
2729 |
classifier_delimiter = re.compile(' +: +')
|
|
2730 |
|
|
2731 |
def term(self, lines, lineno):
|
|
2732 |
"""Return a definition_list's term and optional classifiers."""
|
|
2733 |
assert len(lines) == 1
|
|
2734 |
text_nodes, messages = self.inline_text(lines[0], lineno)
|
|
2735 |
term_node = nodes.term()
|
|
2736 |
node_list = [term_node]
|
|
2737 |
for i in range(len(text_nodes)):
|
|
2738 |
node = text_nodes[i]
|
|
2739 |
if isinstance(node, nodes.Text):
|
|
2740 |
parts = self.classifier_delimiter.split(node.rawsource)
|
|
2741 |
if len(parts) == 1:
|
|
2742 |
node_list[-1] += node
|
|
2743 |
else:
|
|
2744 |
|
|
2745 |
node_list[-1] += nodes.Text(parts[0].rstrip())
|
|
2746 |
for part in parts[1:]:
|
|
2747 |
classifier_node = nodes.classifier('', part)
|
|
2748 |
node_list.append(classifier_node)
|
|
2749 |
else:
|
|
2750 |
node_list[-1] += node
|
|
2751 |
return node_list, messages
|
|
2752 |
|
|
2753 |
|
|
2754 |
class SpecializedText(Text):
|
|
2755 |
|
|
2756 |
"""
|
|
2757 |
Superclass for second and subsequent lines of Text-variants.
|
|
2758 |
|
|
2759 |
All transition methods are disabled. Override individual methods in
|
|
2760 |
subclasses to re-enable.
|
|
2761 |
"""
|
|
2762 |
|
|
2763 |
def eof(self, context):
|
|
2764 |
"""Incomplete construct."""
|
|
2765 |
return []
|
|
2766 |
|
|
2767 |
def invalid_input(self, match=None, context=None, next_state=None):
|
|
2768 |
"""Not a compound element member. Abort this state machine."""
|
|
2769 |
raise EOFError
|
|
2770 |
|
|
2771 |
blank = invalid_input
|
|
2772 |
indent = invalid_input
|
|
2773 |
underline = invalid_input
|
|
2774 |
text = invalid_input
|
|
2775 |
|
|
2776 |
|
|
2777 |
class Definition(SpecializedText):
|
|
2778 |
|
|
2779 |
"""Second line of potential definition_list_item."""
|
|
2780 |
|
|
2781 |
def eof(self, context):
|
|
2782 |
"""Not a definition."""
|
|
2783 |
self.state_machine.previous_line(2) # so parent SM can reassess
|
|
2784 |
return []
|
|
2785 |
|
|
2786 |
def indent(self, match, context, next_state):
|
|
2787 |
"""Definition list item."""
|
|
2788 |
definitionlistitem, blank_finish = self.definition_list_item(context)
|
|
2789 |
self.parent += definitionlistitem
|
|
2790 |
self.blank_finish = blank_finish
|
|
2791 |
return [], 'DefinitionList', []
|
|
2792 |
|
|
2793 |
|
|
2794 |
class Line(SpecializedText):
|
|
2795 |
|
|
2796 |
"""
|
|
2797 |
Second line of over- & underlined section title or transition marker.
|
|
2798 |
"""
|
|
2799 |
|
|
2800 |
eofcheck = 1 # @@@ ???
|
|
2801 |
"""Set to 0 while parsing sections, so that we don't catch the EOF."""
|
|
2802 |
|
|
2803 |
def eof(self, context):
|
|
2804 |
"""Transition marker at end of section or document."""
|
|
2805 |
marker = context[0].strip()
|
|
2806 |
if self.memo.section_bubble_up_kludge:
|
|
2807 |
self.memo.section_bubble_up_kludge = 0
|
|
2808 |
elif len(marker) < 4:
|
|
2809 |
self.state_correction(context)
|
|
2810 |
if self.eofcheck: # ignore EOFError with sections
|
|
2811 |
lineno = self.state_machine.abs_line_number() - 1
|
|
2812 |
transition = nodes.transition(rawsource=context[0])
|
|
2813 |
transition.line = lineno
|
|
2814 |
self.parent += transition
|
|
2815 |
self.eofcheck = 1
|
|
2816 |
return []
|
|
2817 |
|
|
2818 |
def blank(self, match, context, next_state):
|
|
2819 |
"""Transition marker."""
|
|
2820 |
lineno = self.state_machine.abs_line_number() - 1
|
|
2821 |
marker = context[0].strip()
|
|
2822 |
if len(marker) < 4:
|
|
2823 |
self.state_correction(context)
|
|
2824 |
transition = nodes.transition(rawsource=marker)
|
|
2825 |
transition.line = lineno
|
|
2826 |
self.parent += transition
|
|
2827 |
return [], 'Body', []
|
|
2828 |
|
|
2829 |
def text(self, match, context, next_state):
|
|
2830 |
"""Potential over- & underlined title."""
|
|
2831 |
lineno = self.state_machine.abs_line_number() - 1
|
|
2832 |
overline = context[0]
|
|
2833 |
title = match.string
|
|
2834 |
underline = ''
|
|
2835 |
try:
|
|
2836 |
underline = self.state_machine.next_line()
|
|
2837 |
except EOFError:
|
|
2838 |
blocktext = overline + '\n' + title
|
|
2839 |
if len(overline.rstrip()) < 4:
|
|
2840 |
self.short_overline(context, blocktext, lineno, 2)
|
|
2841 |
else:
|
|
2842 |
msg = self.reporter.severe(
|
|
2843 |
'Incomplete section title.',
|
|
2844 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
2845 |
self.parent += msg
|
|
2846 |
return [], 'Body', []
|
|
2847 |
source = '%s\n%s\n%s' % (overline, title, underline)
|
|
2848 |
overline = overline.rstrip()
|
|
2849 |
underline = underline.rstrip()
|
|
2850 |
if not self.transitions['underline'][0].match(underline):
|
|
2851 |
blocktext = overline + '\n' + title + '\n' + underline
|
|
2852 |
if len(overline.rstrip()) < 4:
|
|
2853 |
self.short_overline(context, blocktext, lineno, 2)
|
|
2854 |
else:
|
|
2855 |
msg = self.reporter.severe(
|
|
2856 |
'Missing matching underline for section title overline.',
|
|
2857 |
nodes.literal_block(source, source), line=lineno)
|
|
2858 |
self.parent += msg
|
|
2859 |
return [], 'Body', []
|
|
2860 |
elif overline != underline:
|
|
2861 |
blocktext = overline + '\n' + title + '\n' + underline
|
|
2862 |
if len(overline.rstrip()) < 4:
|
|
2863 |
self.short_overline(context, blocktext, lineno, 2)
|
|
2864 |
else:
|
|
2865 |
msg = self.reporter.severe(
|
|
2866 |
'Title overline & underline mismatch.',
|
|
2867 |
nodes.literal_block(source, source), line=lineno)
|
|
2868 |
self.parent += msg
|
|
2869 |
return [], 'Body', []
|
|
2870 |
title = title.rstrip()
|
|
2871 |
messages = []
|
|
2872 |
if column_width(title) > len(overline):
|
|
2873 |
blocktext = overline + '\n' + title + '\n' + underline
|
|
2874 |
if len(overline.rstrip()) < 4:
|
|
2875 |
self.short_overline(context, blocktext, lineno, 2)
|
|
2876 |
else:
|
|
2877 |
msg = self.reporter.warning(
|
|
2878 |
'Title overline too short.',
|
|
2879 |
nodes.literal_block(source, source), line=lineno)
|
|
2880 |
messages.append(msg)
|
|
2881 |
style = (overline[0], underline[0])
|
|
2882 |
self.eofcheck = 0 # @@@ not sure this is correct
|
|
2883 |
self.section(title.lstrip(), source, style, lineno + 1, messages)
|
|
2884 |
self.eofcheck = 1
|
|
2885 |
return [], 'Body', []
|
|
2886 |
|
|
2887 |
indent = text # indented title
|
|
2888 |
|
|
2889 |
def underline(self, match, context, next_state):
|
|
2890 |
overline = context[0]
|
|
2891 |
blocktext = overline + '\n' + self.state_machine.line
|
|
2892 |
lineno = self.state_machine.abs_line_number() - 1
|
|
2893 |
if len(overline.rstrip()) < 4:
|
|
2894 |
self.short_overline(context, blocktext, lineno, 1)
|
|
2895 |
msg = self.reporter.error(
|
|
2896 |
'Invalid section title or transition marker.',
|
|
2897 |
nodes.literal_block(blocktext, blocktext), line=lineno)
|
|
2898 |
self.parent += msg
|
|
2899 |
return [], 'Body', []
|
|
2900 |
|
|
2901 |
def short_overline(self, context, blocktext, lineno, lines=1):
|
|
2902 |
msg = self.reporter.info(
|
|
2903 |
'Possible incomplete section title.\nTreating the overline as '
|
|
2904 |
"ordinary text because it's so short.", line=lineno)
|
|
2905 |
self.parent += msg
|
|
2906 |
self.state_correction(context, lines)
|
|
2907 |
|
|
2908 |
def state_correction(self, context, lines=1):
|
|
2909 |
self.state_machine.previous_line(lines)
|
|
2910 |
context[:] = []
|
|
2911 |
raise statemachine.StateCorrection('Body', 'text')
|
|
2912 |
|
|
2913 |
|
|
2914 |
class QuotedLiteralBlock(RSTState):
|
|
2915 |
|
|
2916 |
"""
|
|
2917 |
Nested parse handler for quoted (unindented) literal blocks.
|
|
2918 |
|
|
2919 |
Special-purpose. Not for inclusion in `state_classes`.
|
|
2920 |
"""
|
|
2921 |
|
|
2922 |
patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
|
|
2923 |
'text': r''}
|
|
2924 |
initial_transitions = ('initial_quoted', 'text')
|
|
2925 |
|
|
2926 |
def __init__(self, state_machine, debug=0):
|
|
2927 |
RSTState.__init__(self, state_machine, debug)
|
|
2928 |
self.messages = []
|
|
2929 |
self.initial_lineno = None
|
|
2930 |
|
|
2931 |
def blank(self, match, context, next_state):
|
|
2932 |
if context:
|
|
2933 |
raise EOFError
|
|
2934 |
else:
|
|
2935 |
return context, next_state, []
|
|
2936 |
|
|
2937 |
def eof(self, context):
|
|
2938 |
if context:
|
|
2939 |
text = '\n'.join(context)
|
|
2940 |
literal_block = nodes.literal_block(text, text)
|
|
2941 |
literal_block.line = self.initial_lineno
|
|
2942 |
self.parent += literal_block
|
|
2943 |
else:
|
|
2944 |
self.parent += self.reporter.warning(
|
|
2945 |
'Literal block expected; none found.',
|
|
2946 |
line=self.state_machine.abs_line_number())
|
|
2947 |
self.state_machine.previous_line()
|
|
2948 |
self.parent += self.messages
|
|
2949 |
return []
|
|
2950 |
|
|
2951 |
def indent(self, match, context, next_state):
|
|
2952 |
assert context, ('QuotedLiteralBlock.indent: context should not '
|
|
2953 |
'be empty!')
|
|
2954 |
self.messages.append(
|
|
2955 |
self.reporter.error('Unexpected indentation.',
|
|
2956 |
line=self.state_machine.abs_line_number()))
|
|
2957 |
self.state_machine.previous_line()
|
|
2958 |
raise EOFError
|
|
2959 |
|
|
2960 |
def initial_quoted(self, match, context, next_state):
|
|
2961 |
"""Match arbitrary quote character on the first line only."""
|
|
2962 |
self.remove_transition('initial_quoted')
|
|
2963 |
quote = match.string[0]
|
|
2964 |
pattern = re.compile(re.escape(quote))
|
|
2965 |
# New transition matches consistent quotes only:
|
|
2966 |
self.add_transition('quoted',
|
|
2967 |
(pattern, self.quoted, self.__class__.__name__))
|
|
2968 |
self.initial_lineno = self.state_machine.abs_line_number()
|
|
2969 |
return [match.string], next_state, []
|
|
2970 |
|
|
2971 |
def quoted(self, match, context, next_state):
|
|
2972 |
"""Match consistent quotes on subsequent lines."""
|
|
2973 |
context.append(match.string)
|
|
2974 |
return context, next_state, []
|
|
2975 |
|
|
2976 |
def text(self, match, context, next_state):
|
|
2977 |
if context:
|
|
2978 |
self.messages.append(
|
|
2979 |
self.reporter.error('Inconsistent literal block quoting.',
|
|
2980 |
line=self.state_machine.abs_line_number()))
|
|
2981 |
self.state_machine.previous_line()
|
|
2982 |
raise EOFError
|
|
2983 |
|
|
2984 |
|
|
2985 |
state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
|
|
2986 |
OptionList, LineBlock, ExtensionOptions, Explicit, Text,
|
|
2987 |
Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
|
|
2988 |
"""Standard set of State classes used to start `RSTStateMachine`."""
|