|
1 # $Id: states.py 4824 2006-12-09 00:59:23Z goodger $ |
|
2 # Author: David Goodger <goodger@python.org> |
|
3 # Copyright: This module has been placed in the public domain. |
|
4 |
|
5 """ |
|
6 This is the ``docutils.parsers.restructuredtext.states`` module, the core of |
|
7 the reStructuredText parser. It defines the following: |
|
8 |
|
9 :Classes: |
|
10 - `RSTStateMachine`: reStructuredText parser's entry point. |
|
11 - `NestedStateMachine`: recursive StateMachine. |
|
12 - `RSTState`: reStructuredText State superclass. |
|
13 - `Inliner`: For parsing inline markup. |
|
14 - `Body`: Generic classifier of the first line of a block. |
|
15 - `SpecializedBody`: Superclass for compound element members. |
|
16 - `BulletList`: Second and subsequent bullet_list list_items |
|
17 - `DefinitionList`: Second+ definition_list_items. |
|
18 - `EnumeratedList`: Second+ enumerated_list list_items. |
|
19 - `FieldList`: Second+ fields. |
|
20 - `OptionList`: Second+ option_list_items. |
|
21 - `RFC2822List`: Second+ RFC2822-style fields. |
|
22 - `ExtensionOptions`: Parses directive option fields. |
|
23 - `Explicit`: Second+ explicit markup constructs. |
|
24 - `SubstitutionDef`: For embedded directives in substitution definitions. |
|
25 - `Text`: Classifier of second line of a text block. |
|
26 - `SpecializedText`: Superclass for continuation lines of Text-variants. |
|
27 - `Definition`: Second line of potential definition_list_item. |
|
28 - `Line`: Second line of overlined section title or transition marker. |
|
29 - `Struct`: An auxiliary collection class. |
|
30 |
|
31 :Exception classes: |
|
32 - `MarkupError` |
|
33 - `ParserError` |
|
34 - `MarkupMismatch` |
|
35 |
|
36 :Functions: |
|
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls. |
|
38 - `unescape()`: Return a string, nulls removed or restored to backslashes. |
|
39 |
|
40 :Attributes: |
|
41 - `state_classes`: set of State classes used with `RSTStateMachine`. |
|
42 |
|
43 Parser Overview |
|
44 =============== |
|
45 |
|
46 The reStructuredText parser is implemented as a recursive state machine, |
|
47 examining its input one line at a time. To understand how the parser works, |
|
48 please first become familiar with the `docutils.statemachine` module. In the |
|
49 description below, references are made to classes defined in this module; |
|
50 please see the individual classes for details. |
|
51 |
|
52 Parsing proceeds as follows: |
|
53 |
|
54 1. The state machine examines each line of input, checking each of the |
|
55 transition patterns of the state `Body`, in order, looking for a match. |
|
56 The implicit transitions (blank lines and indentation) are checked before |
|
57 any others. The 'text' transition is a catch-all (matches anything). |
|
58 |
|
59 2. The method associated with the matched transition pattern is called. |
|
60 |
|
61 A. Some transition methods are self-contained, appending elements to the |
|
62 document tree (`Body.doctest` parses a doctest block). The parser's |
|
63 current line index is advanced to the end of the element, and parsing |
|
64 continues with step 1. |
|
65 |
|
66 B. Other transition methods trigger the creation of a nested state machine, |
|
67 whose job is to parse a compound construct ('indent' does a block quote, |
|
68 'bullet' does a bullet list, 'overline' does a section [first checking |
|
69 for a valid section header], etc.). |
|
70 |
|
71 - In the case of lists and explicit markup, a one-off state machine is |
|
72 created and run to parse contents of the first item. |
|
73 |
|
74 - A new state machine is created and its initial state is set to the |
|
75 appropriate specialized state (`BulletList` in the case of the |
|
76 'bullet' transition; see `SpecializedBody` for more detail). This |
|
77 state machine is run to parse the compound element (or series of |
|
78 explicit markup elements), and returns as soon as a non-member element |
|
79 is encountered. For example, the `BulletList` state machine ends as |
|
80 soon as it encounters an element which is not a list item of that |
|
81 bullet list. The optional omission of inter-element blank lines is |
|
82 enabled by this nested state machine. |
|
83 |
|
84 - The current line index is advanced to the end of the elements parsed, |
|
85 and parsing continues with step 1. |
|
86 |
|
87 C. The result of the 'text' transition depends on the next line of text. |
|
88 The current state is changed to `Text`, under which the second line is |
|
89 examined. If the second line is: |
|
90 |
|
91 - Indented: The element is a definition list item, and parsing proceeds |
|
92 similarly to step 2.B, using the `DefinitionList` state. |
|
93 |
|
94 - A line of uniform punctuation characters: The element is a section |
|
95 header; again, parsing proceeds as in step 2.B, and `Body` is still |
|
96 used. |
|
97 |
|
98 - Anything else: The element is a paragraph, which is examined for |
|
99 inline markup and appended to the parent element. Processing |
|
100 continues with step 1. |
|
101 """ |
|
102 |
|
103 __docformat__ = 'reStructuredText' |
|
104 |
|
105 |
|
106 import sys |
|
107 import re |
|
108 import roman |
|
109 from types import TupleType, FunctionType, MethodType |
|
110 from docutils import nodes, statemachine, utils, urischemes |
|
111 from docutils import ApplicationError, DataError |
|
112 from docutils.statemachine import StateMachineWS, StateWS |
|
113 from docutils.nodes import fully_normalize_name as normalize_name |
|
114 from docutils.nodes import whitespace_normalize_name |
|
115 from docutils.utils import escape2null, unescape, column_width |
|
116 import docutils.parsers.rst |
|
117 from docutils.parsers.rst import directives, languages, tableparser, roles |
|
118 from docutils.parsers.rst.languages import en as _fallback_language_module |
|
119 |
|
120 |
|
121 class MarkupError(DataError): pass |
|
122 class UnknownInterpretedRoleError(DataError): pass |
|
123 class InterpretedRoleNotImplementedError(DataError): pass |
|
124 class ParserError(ApplicationError): pass |
|
125 class MarkupMismatch(Exception): pass |
|
126 |
|
127 |
|
128 class Struct: |
|
129 |
|
130 """Stores data attributes for dotted-attribute access.""" |
|
131 |
|
132 def __init__(self, **keywordargs): |
|
133 self.__dict__.update(keywordargs) |
|
134 |
|
135 |
|
136 class RSTStateMachine(StateMachineWS): |
|
137 |
|
138 """ |
|
139 reStructuredText's master StateMachine. |
|
140 |
|
141 The entry point to reStructuredText parsing is the `run()` method. |
|
142 """ |
|
143 |
|
144 def run(self, input_lines, document, input_offset=0, match_titles=1, |
|
145 inliner=None): |
|
146 """ |
|
147 Parse `input_lines` and modify the `document` node in place. |
|
148 |
|
149 Extend `StateMachineWS.run()`: set up parse-global data and |
|
150 run the StateMachine. |
|
151 """ |
|
152 self.language = languages.get_language( |
|
153 document.settings.language_code) |
|
154 self.match_titles = match_titles |
|
155 if inliner is None: |
|
156 inliner = Inliner() |
|
157 inliner.init_customizations(document.settings) |
|
158 self.memo = Struct(document=document, |
|
159 reporter=document.reporter, |
|
160 language=self.language, |
|
161 title_styles=[], |
|
162 section_level=0, |
|
163 section_bubble_up_kludge=0, |
|
164 inliner=inliner) |
|
165 self.document = document |
|
166 self.attach_observer(document.note_source) |
|
167 self.reporter = self.memo.reporter |
|
168 self.node = document |
|
169 results = StateMachineWS.run(self, input_lines, input_offset, |
|
170 input_source=document['source']) |
|
171 assert results == [], 'RSTStateMachine.run() results should be empty!' |
|
172 self.node = self.memo = None # remove unneeded references |
|
173 |
|
174 |
|
175 class NestedStateMachine(StateMachineWS): |
|
176 |
|
177 """ |
|
178 StateMachine run from within other StateMachine runs, to parse nested |
|
179 document structures. |
|
180 """ |
|
181 |
|
182 def run(self, input_lines, input_offset, memo, node, match_titles=1): |
|
183 """ |
|
184 Parse `input_lines` and populate a `docutils.nodes.document` instance. |
|
185 |
|
186 Extend `StateMachineWS.run()`: set up document-wide data. |
|
187 """ |
|
188 self.match_titles = match_titles |
|
189 self.memo = memo |
|
190 self.document = memo.document |
|
191 self.attach_observer(self.document.note_source) |
|
192 self.reporter = memo.reporter |
|
193 self.language = memo.language |
|
194 self.node = node |
|
195 results = StateMachineWS.run(self, input_lines, input_offset) |
|
196 assert results == [], ('NestedStateMachine.run() results should be ' |
|
197 'empty!') |
|
198 return results |
|
199 |
|
200 |
|
201 class RSTState(StateWS): |
|
202 |
|
203 """ |
|
204 reStructuredText State superclass. |
|
205 |
|
206 Contains methods used by all State subclasses. |
|
207 """ |
|
208 |
|
209 nested_sm = NestedStateMachine |
|
210 |
|
211 def __init__(self, state_machine, debug=0): |
|
212 self.nested_sm_kwargs = {'state_classes': state_classes, |
|
213 'initial_state': 'Body'} |
|
214 StateWS.__init__(self, state_machine, debug) |
|
215 |
|
216 def runtime_init(self): |
|
217 StateWS.runtime_init(self) |
|
218 memo = self.state_machine.memo |
|
219 self.memo = memo |
|
220 self.reporter = memo.reporter |
|
221 self.inliner = memo.inliner |
|
222 self.document = memo.document |
|
223 self.parent = self.state_machine.node |
|
224 |
|
225 def goto_line(self, abs_line_offset): |
|
226 """ |
|
227 Jump to input line `abs_line_offset`, ignoring jumps past the end. |
|
228 """ |
|
229 try: |
|
230 self.state_machine.goto_line(abs_line_offset) |
|
231 except EOFError: |
|
232 pass |
|
233 |
|
234 def no_match(self, context, transitions): |
|
235 """ |
|
236 Override `StateWS.no_match` to generate a system message. |
|
237 |
|
238 This code should never be run. |
|
239 """ |
|
240 self.reporter.severe( |
|
241 'Internal error: no transition pattern match. State: "%s"; ' |
|
242 'transitions: %s; context: %s; current line: %r.' |
|
243 % (self.__class__.__name__, transitions, context, |
|
244 self.state_machine.line), |
|
245 line=self.state_machine.abs_line_number()) |
|
246 return context, None, [] |
|
247 |
|
248 def bof(self, context): |
|
249 """Called at beginning of file.""" |
|
250 return [], [] |
|
251 |
|
252 def nested_parse(self, block, input_offset, node, match_titles=0, |
|
253 state_machine_class=None, state_machine_kwargs=None): |
|
254 """ |
|
255 Create a new StateMachine rooted at `node` and run it over the input |
|
256 `block`. |
|
257 """ |
|
258 if state_machine_class is None: |
|
259 state_machine_class = self.nested_sm |
|
260 if state_machine_kwargs is None: |
|
261 state_machine_kwargs = self.nested_sm_kwargs |
|
262 block_length = len(block) |
|
263 state_machine = state_machine_class(debug=self.debug, |
|
264 **state_machine_kwargs) |
|
265 state_machine.run(block, input_offset, memo=self.memo, |
|
266 node=node, match_titles=match_titles) |
|
267 state_machine.unlink() |
|
268 new_offset = state_machine.abs_line_offset() |
|
269 # No `block.parent` implies disconnected -- lines aren't in sync: |
|
270 if block.parent and (len(block) - block_length) != 0: |
|
271 # Adjustment for block if modified in nested parse: |
|
272 self.state_machine.next_line(len(block) - block_length) |
|
273 return new_offset |
|
274 |
|
275 def nested_list_parse(self, block, input_offset, node, initial_state, |
|
276 blank_finish, |
|
277 blank_finish_state=None, |
|
278 extra_settings={}, |
|
279 match_titles=0, |
|
280 state_machine_class=None, |
|
281 state_machine_kwargs=None): |
|
282 """ |
|
283 Create a new StateMachine rooted at `node` and run it over the input |
|
284 `block`. Also keep track of optional intermediate blank lines and the |
|
285 required final one. |
|
286 """ |
|
287 if state_machine_class is None: |
|
288 state_machine_class = self.nested_sm |
|
289 if state_machine_kwargs is None: |
|
290 state_machine_kwargs = self.nested_sm_kwargs.copy() |
|
291 state_machine_kwargs['initial_state'] = initial_state |
|
292 state_machine = state_machine_class(debug=self.debug, |
|
293 **state_machine_kwargs) |
|
294 if blank_finish_state is None: |
|
295 blank_finish_state = initial_state |
|
296 state_machine.states[blank_finish_state].blank_finish = blank_finish |
|
297 for key, value in extra_settings.items(): |
|
298 setattr(state_machine.states[initial_state], key, value) |
|
299 state_machine.run(block, input_offset, memo=self.memo, |
|
300 node=node, match_titles=match_titles) |
|
301 blank_finish = state_machine.states[blank_finish_state].blank_finish |
|
302 state_machine.unlink() |
|
303 return state_machine.abs_line_offset(), blank_finish |
|
304 |
|
305 def section(self, title, source, style, lineno, messages): |
|
306 """Check for a valid subsection and create one if it checks out.""" |
|
307 if self.check_subsection(source, style, lineno): |
|
308 self.new_subsection(title, lineno, messages) |
|
309 |
|
310 def check_subsection(self, source, style, lineno): |
|
311 """ |
|
312 Check for a valid subsection header. Return 1 (true) or None (false). |
|
313 |
|
314 When a new section is reached that isn't a subsection of the current |
|
315 section, back up the line count (use ``previous_line(-x)``), then |
|
316 ``raise EOFError``. The current StateMachine will finish, then the |
|
317 calling StateMachine can re-examine the title. This will work its way |
|
318 back up the calling chain until the correct section level isreached. |
|
319 |
|
320 @@@ Alternative: Evaluate the title, store the title info & level, and |
|
321 back up the chain until that level is reached. Store in memo? Or |
|
322 return in results? |
|
323 |
|
324 :Exception: `EOFError` when a sibling or supersection encountered. |
|
325 """ |
|
326 memo = self.memo |
|
327 title_styles = memo.title_styles |
|
328 mylevel = memo.section_level |
|
329 try: # check for existing title style |
|
330 level = title_styles.index(style) + 1 |
|
331 except ValueError: # new title style |
|
332 if len(title_styles) == memo.section_level: # new subsection |
|
333 title_styles.append(style) |
|
334 return 1 |
|
335 else: # not at lowest level |
|
336 self.parent += self.title_inconsistent(source, lineno) |
|
337 return None |
|
338 if level <= mylevel: # sibling or supersection |
|
339 memo.section_level = level # bubble up to parent section |
|
340 if len(style) == 2: |
|
341 memo.section_bubble_up_kludge = 1 |
|
342 # back up 2 lines for underline title, 3 for overline title |
|
343 self.state_machine.previous_line(len(style) + 1) |
|
344 raise EOFError # let parent section re-evaluate |
|
345 if level == mylevel + 1: # immediate subsection |
|
346 return 1 |
|
347 else: # invalid subsection |
|
348 self.parent += self.title_inconsistent(source, lineno) |
|
349 return None |
|
350 |
|
351 def title_inconsistent(self, sourcetext, lineno): |
|
352 error = self.reporter.severe( |
|
353 'Title level inconsistent:', nodes.literal_block('', sourcetext), |
|
354 line=lineno) |
|
355 return error |
|
356 |
|
357 def new_subsection(self, title, lineno, messages): |
|
358 """Append new subsection to document tree. On return, check level.""" |
|
359 memo = self.memo |
|
360 mylevel = memo.section_level |
|
361 memo.section_level += 1 |
|
362 section_node = nodes.section() |
|
363 self.parent += section_node |
|
364 textnodes, title_messages = self.inline_text(title, lineno) |
|
365 titlenode = nodes.title(title, '', *textnodes) |
|
366 name = normalize_name(titlenode.astext()) |
|
367 section_node['names'].append(name) |
|
368 section_node += titlenode |
|
369 section_node += messages |
|
370 section_node += title_messages |
|
371 self.document.note_implicit_target(section_node, section_node) |
|
372 offset = self.state_machine.line_offset + 1 |
|
373 absoffset = self.state_machine.abs_line_offset() + 1 |
|
374 newabsoffset = self.nested_parse( |
|
375 self.state_machine.input_lines[offset:], input_offset=absoffset, |
|
376 node=section_node, match_titles=1) |
|
377 self.goto_line(newabsoffset) |
|
378 if memo.section_level <= mylevel: # can't handle next section? |
|
379 raise EOFError # bubble up to supersection |
|
380 # reset section_level; next pass will detect it properly |
|
381 memo.section_level = mylevel |
|
382 |
|
383 def paragraph(self, lines, lineno): |
|
384 """ |
|
385 Return a list (paragraph & messages) & a boolean: literal_block next? |
|
386 """ |
|
387 data = '\n'.join(lines).rstrip() |
|
388 if re.search(r'(?<!\\)(\\\\)*::$', data): |
|
389 if len(data) == 2: |
|
390 return [], 1 |
|
391 elif data[-3] in ' \n': |
|
392 text = data[:-3].rstrip() |
|
393 else: |
|
394 text = data[:-1] |
|
395 literalnext = 1 |
|
396 else: |
|
397 text = data |
|
398 literalnext = 0 |
|
399 textnodes, messages = self.inline_text(text, lineno) |
|
400 p = nodes.paragraph(data, '', *textnodes) |
|
401 p.line = lineno |
|
402 return [p] + messages, literalnext |
|
403 |
|
404 def inline_text(self, text, lineno): |
|
405 """ |
|
406 Return 2 lists: nodes (text and inline elements), and system_messages. |
|
407 """ |
|
408 return self.inliner.parse(text, lineno, self.memo, self.parent) |
|
409 |
|
410 def unindent_warning(self, node_name): |
|
411 return self.reporter.warning( |
|
412 '%s ends without a blank line; unexpected unindent.' % node_name, |
|
413 line=(self.state_machine.abs_line_number() + 1)) |
|
414 |
|
415 |
|
416 def build_regexp(definition, compile=1): |
|
417 """ |
|
418 Build, compile and return a regular expression based on `definition`. |
|
419 |
|
420 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), |
|
421 where "parts" is a list of regular expressions and/or regular |
|
422 expression definitions to be joined into an or-group. |
|
423 """ |
|
424 name, prefix, suffix, parts = definition |
|
425 part_strings = [] |
|
426 for part in parts: |
|
427 if type(part) is TupleType: |
|
428 part_strings.append(build_regexp(part, None)) |
|
429 else: |
|
430 part_strings.append(part) |
|
431 or_group = '|'.join(part_strings) |
|
432 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() |
|
433 if compile: |
|
434 return re.compile(regexp, re.UNICODE) |
|
435 else: |
|
436 return regexp |
|
437 |
|
438 |
|
439 class Inliner: |
|
440 |
|
441 """ |
|
442 Parse inline markup; call the `parse()` method. |
|
443 """ |
|
444 |
|
445 def __init__(self): |
|
446 self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] |
|
447 """List of (pattern, bound method) tuples, used by |
|
448 `self.implicit_inline`.""" |
|
449 |
|
450 def init_customizations(self, settings): |
|
451 """Setting-based customizations; run when parsing begins.""" |
|
452 if settings.pep_references: |
|
453 self.implicit_dispatch.append((self.patterns.pep, |
|
454 self.pep_reference)) |
|
455 if settings.rfc_references: |
|
456 self.implicit_dispatch.append((self.patterns.rfc, |
|
457 self.rfc_reference)) |
|
458 |
|
459 def parse(self, text, lineno, memo, parent): |
|
460 # Needs to be refactored for nested inline markup. |
|
461 # Add nested_parse() method? |
|
462 """ |
|
463 Return 2 lists: nodes (text and inline elements), and system_messages. |
|
464 |
|
465 Using `self.patterns.initial`, a pattern which matches start-strings |
|
466 (emphasis, strong, interpreted, phrase reference, literal, |
|
467 substitution reference, and inline target) and complete constructs |
|
468 (simple reference, footnote reference), search for a candidate. When |
|
469 one is found, check for validity (e.g., not a quoted '*' character). |
|
470 If valid, search for the corresponding end string if applicable, and |
|
471 check it for validity. If not found or invalid, generate a warning |
|
472 and ignore the start-string. Implicit inline markup (e.g. standalone |
|
473 URIs) is found last. |
|
474 """ |
|
475 self.reporter = memo.reporter |
|
476 self.document = memo.document |
|
477 self.language = memo.language |
|
478 self.parent = parent |
|
479 pattern_search = self.patterns.initial.search |
|
480 dispatch = self.dispatch |
|
481 remaining = escape2null(text) |
|
482 processed = [] |
|
483 unprocessed = [] |
|
484 messages = [] |
|
485 while remaining: |
|
486 match = pattern_search(remaining) |
|
487 if match: |
|
488 groups = match.groupdict() |
|
489 method = dispatch[groups['start'] or groups['backquote'] |
|
490 or groups['refend'] or groups['fnend']] |
|
491 before, inlines, remaining, sysmessages = method(self, match, |
|
492 lineno) |
|
493 unprocessed.append(before) |
|
494 messages += sysmessages |
|
495 if inlines: |
|
496 processed += self.implicit_inline(''.join(unprocessed), |
|
497 lineno) |
|
498 processed += inlines |
|
499 unprocessed = [] |
|
500 else: |
|
501 break |
|
502 remaining = ''.join(unprocessed) + remaining |
|
503 if remaining: |
|
504 processed += self.implicit_inline(remaining, lineno) |
|
505 return processed, messages |
|
506 |
|
507 openers = '\'"([{<' |
|
508 closers = '\'")]}>' |
|
509 start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) |
|
510 end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))' |
|
511 % re.escape(closers)) |
|
512 non_whitespace_before = r'(?<![ \n])' |
|
513 non_whitespace_escape_before = r'(?<![ \n\x00])' |
|
514 non_whitespace_after = r'(?![ \n])' |
|
515 # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together): |
|
516 simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*' |
|
517 # Valid URI characters (see RFC 2396 & RFC 2732); |
|
518 # final \x00 allows backslash escapes in URIs: |
|
519 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" |
|
520 # Delimiter indicating the end of a URI (not part of the URI): |
|
521 uri_end_delim = r"""[>]""" |
|
522 # Last URI character; same as uric but no punctuation: |
|
523 urilast = r"""[_~*/=+a-zA-Z0-9]""" |
|
524 # End of a URI (either 'urilast' or 'uric followed by a |
|
525 # uri_end_delim'): |
|
526 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() |
|
527 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" |
|
528 email_pattern = r""" |
|
529 %(emailc)s+(?:\.%(emailc)s+)* # name |
|
530 (?<!\x00)@ # at |
|
531 %(emailc)s+(?:\.%(emailc)s*)* # host |
|
532 %(uri_end)s # final URI char |
|
533 """ |
|
534 parts = ('initial_inline', start_string_prefix, '', |
|
535 [('start', '', non_whitespace_after, # simple start-strings |
|
536 [r'\*\*', # strong |
|
537 r'\*(?!\*)', # emphasis but not strong |
|
538 r'``', # literal |
|
539 r'_`', # inline internal target |
|
540 r'\|(?!\|)'] # substitution reference |
|
541 ), |
|
542 ('whole', '', end_string_suffix, # whole constructs |
|
543 [# reference name & end-string |
|
544 r'(?P<refname>%s)(?P<refend>__?)' % simplename, |
|
545 ('footnotelabel', r'\[', r'(?P<fnend>\]_)', |
|
546 [r'[0-9]+', # manually numbered |
|
547 r'\#(%s)?' % simplename, # auto-numbered (w/ label?) |
|
548 r'\*', # auto-symbol |
|
549 r'(?P<citationlabel>%s)' % simplename] # citation reference |
|
550 ) |
|
551 ] |
|
552 ), |
|
553 ('backquote', # interpreted text or phrase reference |
|
554 '(?P<role>(:%s:)?)' % simplename, # optional role |
|
555 non_whitespace_after, |
|
556 ['`(?!`)'] # but not literal |
|
557 ) |
|
558 ] |
|
559 ) |
|
560 patterns = Struct( |
|
561 initial=build_regexp(parts), |
|
562 emphasis=re.compile(non_whitespace_escape_before |
|
563 + r'(\*)' + end_string_suffix), |
|
564 strong=re.compile(non_whitespace_escape_before |
|
565 + r'(\*\*)' + end_string_suffix), |
|
566 interpreted_or_phrase_ref=re.compile( |
|
567 r""" |
|
568 %(non_whitespace_escape_before)s |
|
569 ( |
|
570 ` |
|
571 (?P<suffix> |
|
572 (?P<role>:%(simplename)s:)? |
|
573 (?P<refend>__?)? |
|
574 ) |
|
575 ) |
|
576 %(end_string_suffix)s |
|
577 """ % locals(), re.VERBOSE | re.UNICODE), |
|
578 embedded_uri=re.compile( |
|
579 r""" |
|
580 ( |
|
581 (?:[ \n]+|^) # spaces or beginning of line/string |
|
582 < # open bracket |
|
583 %(non_whitespace_after)s |
|
584 ([^<>\x00]+) # anything but angle brackets & nulls |
|
585 %(non_whitespace_before)s |
|
586 > # close bracket w/o whitespace before |
|
587 ) |
|
588 $ # end of string |
|
589 """ % locals(), re.VERBOSE), |
|
590 literal=re.compile(non_whitespace_before + '(``)' |
|
591 + end_string_suffix), |
|
592 target=re.compile(non_whitespace_escape_before |
|
593 + r'(`)' + end_string_suffix), |
|
594 substitution_ref=re.compile(non_whitespace_escape_before |
|
595 + r'(\|_{0,2})' |
|
596 + end_string_suffix), |
|
597 email=re.compile(email_pattern % locals() + '$', re.VERBOSE), |
|
598 uri=re.compile( |
|
599 (r""" |
|
600 %(start_string_prefix)s |
|
601 (?P<whole> |
|
602 (?P<absolute> # absolute URI |
|
603 (?P<scheme> # scheme (http, ftp, mailto) |
|
604 [a-zA-Z][a-zA-Z0-9.+-]* |
|
605 ) |
|
606 : |
|
607 ( |
|
608 ( # either: |
|
609 (//?)? # hierarchical URI |
|
610 %(uric)s* # URI characters |
|
611 %(uri_end)s # final URI char |
|
612 ) |
|
613 ( # optional query |
|
614 \?%(uric)s* |
|
615 %(uri_end)s |
|
616 )? |
|
617 ( # optional fragment |
|
618 \#%(uric)s* |
|
619 %(uri_end)s |
|
620 )? |
|
621 ) |
|
622 ) |
|
623 | # *OR* |
|
624 (?P<email> # email address |
|
625 """ + email_pattern + r""" |
|
626 ) |
|
627 ) |
|
628 %(end_string_suffix)s |
|
629 """) % locals(), re.VERBOSE), |
|
630 pep=re.compile( |
|
631 r""" |
|
632 %(start_string_prefix)s |
|
633 ( |
|
634 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file |
|
635 | |
|
636 (PEP\s+(?P<pepnum2>\d+)) # reference by name |
|
637 ) |
|
638 %(end_string_suffix)s""" % locals(), re.VERBOSE), |
|
639 rfc=re.compile( |
|
640 r""" |
|
641 %(start_string_prefix)s |
|
642 (RFC(-|\s+)?(?P<rfcnum>\d+)) |
|
643 %(end_string_suffix)s""" % locals(), re.VERBOSE)) |
|
644 |
|
645 def quoted_start(self, match): |
|
646 """Return 1 if inline markup start-string is 'quoted', 0 if not.""" |
|
647 string = match.string |
|
648 start = match.start() |
|
649 end = match.end() |
|
650 if start == 0: # start-string at beginning of text |
|
651 return 0 |
|
652 prestart = string[start - 1] |
|
653 try: |
|
654 poststart = string[end] |
|
655 if self.openers.index(prestart) \ |
|
656 == self.closers.index(poststart): # quoted |
|
657 return 1 |
|
658 except IndexError: # start-string at end of text |
|
659 return 1 |
|
660 except ValueError: # not quoted |
|
661 pass |
|
662 return 0 |
|
663 |
|
664 def inline_obj(self, match, lineno, end_pattern, nodeclass, |
|
665 restore_backslashes=0): |
|
666 string = match.string |
|
667 matchstart = match.start('start') |
|
668 matchend = match.end('start') |
|
669 if self.quoted_start(match): |
|
670 return (string[:matchend], [], string[matchend:], [], '') |
|
671 endmatch = end_pattern.search(string[matchend:]) |
|
672 if endmatch and endmatch.start(1): # 1 or more chars |
|
673 text = unescape(endmatch.string[:endmatch.start(1)], |
|
674 restore_backslashes) |
|
675 textend = matchend + endmatch.end(1) |
|
676 rawsource = unescape(string[matchstart:textend], 1) |
|
677 return (string[:matchstart], [nodeclass(rawsource, text)], |
|
678 string[textend:], [], endmatch.group(1)) |
|
679 msg = self.reporter.warning( |
|
680 'Inline %s start-string without end-string.' |
|
681 % nodeclass.__name__, line=lineno) |
|
682 text = unescape(string[matchstart:matchend], 1) |
|
683 rawsource = unescape(string[matchstart:matchend], 1) |
|
684 prb = self.problematic(text, rawsource, msg) |
|
685 return string[:matchstart], [prb], string[matchend:], [msg], '' |
|
686 |
|
687 def problematic(self, text, rawsource, message): |
|
688 msgid = self.document.set_id(message, self.parent) |
|
689 problematic = nodes.problematic(rawsource, text, refid=msgid) |
|
690 prbid = self.document.set_id(problematic) |
|
691 message.add_backref(prbid) |
|
692 return problematic |
|
693 |
|
694 def emphasis(self, match, lineno): |
|
695 before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
|
696 match, lineno, self.patterns.emphasis, nodes.emphasis) |
|
697 return before, inlines, remaining, sysmessages |
|
698 |
|
699 def strong(self, match, lineno): |
|
700 before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
|
701 match, lineno, self.patterns.strong, nodes.strong) |
|
702 return before, inlines, remaining, sysmessages |
|
703 |
|
704 def interpreted_or_phrase_ref(self, match, lineno): |
|
705 end_pattern = self.patterns.interpreted_or_phrase_ref |
|
706 string = match.string |
|
707 matchstart = match.start('backquote') |
|
708 matchend = match.end('backquote') |
|
709 rolestart = match.start('role') |
|
710 role = match.group('role') |
|
711 position = '' |
|
712 if role: |
|
713 role = role[1:-1] |
|
714 position = 'prefix' |
|
715 elif self.quoted_start(match): |
|
716 return (string[:matchend], [], string[matchend:], []) |
|
717 endmatch = end_pattern.search(string[matchend:]) |
|
718 if endmatch and endmatch.start(1): # 1 or more chars |
|
719 textend = matchend + endmatch.end() |
|
720 if endmatch.group('role'): |
|
721 if role: |
|
722 msg = self.reporter.warning( |
|
723 'Multiple roles in interpreted text (both ' |
|
724 'prefix and suffix present; only one allowed).', |
|
725 line=lineno) |
|
726 text = unescape(string[rolestart:textend], 1) |
|
727 prb = self.problematic(text, text, msg) |
|
728 return string[:rolestart], [prb], string[textend:], [msg] |
|
729 role = endmatch.group('suffix')[1:-1] |
|
730 position = 'suffix' |
|
731 escaped = endmatch.string[:endmatch.start(1)] |
|
732 rawsource = unescape(string[matchstart:textend], 1) |
|
733 if rawsource[-1:] == '_': |
|
734 if role: |
|
735 msg = self.reporter.warning( |
|
736 'Mismatch: both interpreted text role %s and ' |
|
737 'reference suffix.' % position, line=lineno) |
|
738 text = unescape(string[rolestart:textend], 1) |
|
739 prb = self.problematic(text, text, msg) |
|
740 return string[:rolestart], [prb], string[textend:], [msg] |
|
741 return self.phrase_ref(string[:matchstart], string[textend:], |
|
742 rawsource, escaped, unescape(escaped)) |
|
743 else: |
|
744 rawsource = unescape(string[rolestart:textend], 1) |
|
745 nodelist, messages = self.interpreted(rawsource, escaped, role, |
|
746 lineno) |
|
747 return (string[:rolestart], nodelist, |
|
748 string[textend:], messages) |
|
749 msg = self.reporter.warning( |
|
750 'Inline interpreted text or phrase reference start-string ' |
|
751 'without end-string.', line=lineno) |
|
752 text = unescape(string[matchstart:matchend], 1) |
|
753 prb = self.problematic(text, text, msg) |
|
754 return string[:matchstart], [prb], string[matchend:], [msg] |
|
755 |
|
756 def phrase_ref(self, before, after, rawsource, escaped, text): |
|
757 match = self.patterns.embedded_uri.search(escaped) |
|
758 if match: |
|
759 text = unescape(escaped[:match.start(0)]) |
|
760 uri_text = match.group(2) |
|
761 uri = ''.join(uri_text.split()) |
|
762 uri = self.adjust_uri(uri) |
|
763 if uri: |
|
764 target = nodes.target(match.group(1), refuri=uri) |
|
765 else: |
|
766 raise ApplicationError('problem with URI: %r' % uri_text) |
|
767 if not text: |
|
768 text = uri |
|
769 else: |
|
770 target = None |
|
771 refname = normalize_name(text) |
|
772 reference = nodes.reference(rawsource, text, |
|
773 name=whitespace_normalize_name(text)) |
|
774 node_list = [reference] |
|
775 if rawsource[-2:] == '__': |
|
776 if target: |
|
777 reference['refuri'] = uri |
|
778 else: |
|
779 reference['anonymous'] = 1 |
|
780 else: |
|
781 if target: |
|
782 reference['refuri'] = uri |
|
783 target['names'].append(refname) |
|
784 self.document.note_explicit_target(target, self.parent) |
|
785 node_list.append(target) |
|
786 else: |
|
787 reference['refname'] = refname |
|
788 self.document.note_refname(reference) |
|
789 return before, node_list, after, [] |
|
790 |
|
791 def adjust_uri(self, uri): |
|
792 match = self.patterns.email.match(uri) |
|
793 if match: |
|
794 return 'mailto:' + uri |
|
795 else: |
|
796 return uri |
|
797 |
|
798 def interpreted(self, rawsource, text, role, lineno): |
|
799 role_fn, messages = roles.role(role, self.language, lineno, |
|
800 self.reporter) |
|
801 if role_fn: |
|
802 nodes, messages2 = role_fn(role, rawsource, text, lineno, self) |
|
803 return nodes, messages + messages2 |
|
804 else: |
|
805 msg = self.reporter.error( |
|
806 'Unknown interpreted text role "%s".' % role, |
|
807 line=lineno) |
|
808 return ([self.problematic(rawsource, rawsource, msg)], |
|
809 messages + [msg]) |
|
810 |
|
811 def literal(self, match, lineno): |
|
812 before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
|
813 match, lineno, self.patterns.literal, nodes.literal, |
|
814 restore_backslashes=1) |
|
815 return before, inlines, remaining, sysmessages |
|
816 |
|
817 def inline_internal_target(self, match, lineno): |
|
818 before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
|
819 match, lineno, self.patterns.target, nodes.target) |
|
820 if inlines and isinstance(inlines[0], nodes.target): |
|
821 assert len(inlines) == 1 |
|
822 target = inlines[0] |
|
823 name = normalize_name(target.astext()) |
|
824 target['names'].append(name) |
|
825 self.document.note_explicit_target(target, self.parent) |
|
826 return before, inlines, remaining, sysmessages |
|
827 |
|
828 def substitution_reference(self, match, lineno): |
|
829 before, inlines, remaining, sysmessages, endstring = self.inline_obj( |
|
830 match, lineno, self.patterns.substitution_ref, |
|
831 nodes.substitution_reference) |
|
832 if len(inlines) == 1: |
|
833 subref_node = inlines[0] |
|
834 if isinstance(subref_node, nodes.substitution_reference): |
|
835 subref_text = subref_node.astext() |
|
836 self.document.note_substitution_ref(subref_node, subref_text) |
|
837 if endstring[-1:] == '_': |
|
838 reference_node = nodes.reference( |
|
839 '|%s%s' % (subref_text, endstring), '') |
|
840 if endstring[-2:] == '__': |
|
841 reference_node['anonymous'] = 1 |
|
842 else: |
|
843 reference_node['refname'] = normalize_name(subref_text) |
|
844 self.document.note_refname(reference_node) |
|
845 reference_node += subref_node |
|
846 inlines = [reference_node] |
|
847 return before, inlines, remaining, sysmessages |
|
848 |
|
849 def footnote_reference(self, match, lineno): |
|
850 """ |
|
851 Handles `nodes.footnote_reference` and `nodes.citation_reference` |
|
852 elements. |
|
853 """ |
|
854 label = match.group('footnotelabel') |
|
855 refname = normalize_name(label) |
|
856 string = match.string |
|
857 before = string[:match.start('whole')] |
|
858 remaining = string[match.end('whole'):] |
|
859 if match.group('citationlabel'): |
|
860 refnode = nodes.citation_reference('[%s]_' % label, |
|
861 refname=refname) |
|
862 refnode += nodes.Text(label) |
|
863 self.document.note_citation_ref(refnode) |
|
864 else: |
|
865 refnode = nodes.footnote_reference('[%s]_' % label) |
|
866 if refname[0] == '#': |
|
867 refname = refname[1:] |
|
868 refnode['auto'] = 1 |
|
869 self.document.note_autofootnote_ref(refnode) |
|
870 elif refname == '*': |
|
871 refname = '' |
|
872 refnode['auto'] = '*' |
|
873 self.document.note_symbol_footnote_ref( |
|
874 refnode) |
|
875 else: |
|
876 refnode += nodes.Text(label) |
|
877 if refname: |
|
878 refnode['refname'] = refname |
|
879 self.document.note_footnote_ref(refnode) |
|
880 if utils.get_trim_footnote_ref_space(self.document.settings): |
|
881 before = before.rstrip() |
|
882 return (before, [refnode], remaining, []) |
|
883 |
|
884 def reference(self, match, lineno, anonymous=None): |
|
885 referencename = match.group('refname') |
|
886 refname = normalize_name(referencename) |
|
887 referencenode = nodes.reference( |
|
888 referencename + match.group('refend'), referencename, |
|
889 name=whitespace_normalize_name(referencename)) |
|
890 if anonymous: |
|
891 referencenode['anonymous'] = 1 |
|
892 else: |
|
893 referencenode['refname'] = refname |
|
894 self.document.note_refname(referencenode) |
|
895 string = match.string |
|
896 matchstart = match.start('whole') |
|
897 matchend = match.end('whole') |
|
898 return (string[:matchstart], [referencenode], string[matchend:], []) |
|
899 |
|
900 def anonymous_reference(self, match, lineno): |
|
901 return self.reference(match, lineno, anonymous=1) |
|
902 |
|
903 def standalone_uri(self, match, lineno): |
|
904 if not match.group('scheme') or urischemes.schemes.has_key( |
|
905 match.group('scheme').lower()): |
|
906 if match.group('email'): |
|
907 addscheme = 'mailto:' |
|
908 else: |
|
909 addscheme = '' |
|
910 text = match.group('whole') |
|
911 unescaped = unescape(text, 0) |
|
912 return [nodes.reference(unescape(text, 1), unescaped, |
|
913 refuri=addscheme + unescaped)] |
|
914 else: # not a valid scheme |
|
915 raise MarkupMismatch |
|
916 |
|
917 def pep_reference(self, match, lineno): |
|
918 text = match.group(0) |
|
919 if text.startswith('pep-'): |
|
920 pepnum = int(match.group('pepnum1')) |
|
921 elif text.startswith('PEP'): |
|
922 pepnum = int(match.group('pepnum2')) |
|
923 else: |
|
924 raise MarkupMismatch |
|
925 ref = (self.document.settings.pep_base_url |
|
926 + self.document.settings.pep_file_url_template % pepnum) |
|
927 unescaped = unescape(text, 0) |
|
928 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] |
|
929 |
|
930 rfc_url = 'rfc%d.html' |
|
931 |
|
932 def rfc_reference(self, match, lineno): |
|
933 text = match.group(0) |
|
934 if text.startswith('RFC'): |
|
935 rfcnum = int(match.group('rfcnum')) |
|
936 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum |
|
937 else: |
|
938 raise MarkupMismatch |
|
939 unescaped = unescape(text, 0) |
|
940 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] |
|
941 |
|
942 def implicit_inline(self, text, lineno): |
|
943 """ |
|
944 Check each of the patterns in `self.implicit_dispatch` for a match, |
|
945 and dispatch to the stored method for the pattern. Recursively check |
|
946 the text before and after the match. Return a list of `nodes.Text` |
|
947 and inline element nodes. |
|
948 """ |
|
949 if not text: |
|
950 return [] |
|
951 for pattern, method in self.implicit_dispatch: |
|
952 match = pattern.search(text) |
|
953 if match: |
|
954 try: |
|
955 # Must recurse on strings before *and* after the match; |
|
956 # there may be multiple patterns. |
|
957 return (self.implicit_inline(text[:match.start()], lineno) |
|
958 + method(match, lineno) + |
|
959 self.implicit_inline(text[match.end():], lineno)) |
|
960 except MarkupMismatch: |
|
961 pass |
|
962 return [nodes.Text(unescape(text), rawsource=unescape(text, 1))] |
|
963 |
|
964 dispatch = {'*': emphasis, |
|
965 '**': strong, |
|
966 '`': interpreted_or_phrase_ref, |
|
967 '``': literal, |
|
968 '_`': inline_internal_target, |
|
969 ']_': footnote_reference, |
|
970 '|': substitution_reference, |
|
971 '_': reference, |
|
972 '__': anonymous_reference} |
|
973 |
|
974 |
|
975 def _loweralpha_to_int(s, _zero=(ord('a')-1)): |
|
976 return ord(s) - _zero |
|
977 |
|
978 def _upperalpha_to_int(s, _zero=(ord('A')-1)): |
|
979 return ord(s) - _zero |
|
980 |
|
981 def _lowerroman_to_int(s): |
|
982 return roman.fromRoman(s.upper()) |
|
983 |
|
984 |
|
985 class Body(RSTState): |
|
986 |
|
987 """ |
|
988 Generic classifier of the first line of a block. |
|
989 """ |
|
990 |
|
991 double_width_pad_char = tableparser.TableParser.double_width_pad_char |
|
992 """Padding character for East Asian double-width text.""" |
|
993 |
|
994 enum = Struct() |
|
995 """Enumerated list parsing information.""" |
|
996 |
|
997 enum.formatinfo = { |
|
998 'parens': Struct(prefix='(', suffix=')', start=1, end=-1), |
|
999 'rparen': Struct(prefix='', suffix=')', start=0, end=-1), |
|
1000 'period': Struct(prefix='', suffix='.', start=0, end=-1)} |
|
1001 enum.formats = enum.formatinfo.keys() |
|
1002 enum.sequences = ['arabic', 'loweralpha', 'upperalpha', |
|
1003 'lowerroman', 'upperroman'] # ORDERED! |
|
1004 enum.sequencepats = {'arabic': '[0-9]+', |
|
1005 'loweralpha': '[a-z]', |
|
1006 'upperalpha': '[A-Z]', |
|
1007 'lowerroman': '[ivxlcdm]+', |
|
1008 'upperroman': '[IVXLCDM]+',} |
|
1009 enum.converters = {'arabic': int, |
|
1010 'loweralpha': _loweralpha_to_int, |
|
1011 'upperalpha': _upperalpha_to_int, |
|
1012 'lowerroman': _lowerroman_to_int, |
|
1013 'upperroman': roman.fromRoman} |
|
1014 |
|
1015 enum.sequenceregexps = {} |
|
1016 for sequence in enum.sequences: |
|
1017 enum.sequenceregexps[sequence] = re.compile( |
|
1018 enum.sequencepats[sequence] + '$') |
|
1019 |
|
1020 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') |
|
1021 """Matches the top (& bottom) of a full table).""" |
|
1022 |
|
1023 simple_table_top_pat = re.compile('=+( +=+)+ *$') |
|
1024 """Matches the top of a simple table.""" |
|
1025 |
|
1026 simple_table_border_pat = re.compile('=+[ =]*$') |
|
1027 """Matches the bottom & header bottom of a simple table.""" |
|
1028 |
|
1029 pats = {} |
|
1030 """Fragments of patterns used by transitions.""" |
|
1031 |
|
1032 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' |
|
1033 pats['alpha'] = '[a-zA-Z]' |
|
1034 pats['alphanum'] = '[a-zA-Z0-9]' |
|
1035 pats['alphanumplus'] = '[a-zA-Z0-9_-]' |
|
1036 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' |
|
1037 '|%(upperroman)s|#)' % enum.sequencepats) |
|
1038 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats |
|
1039 # @@@ Loosen up the pattern? Allow Unicode? |
|
1040 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats |
|
1041 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats |
|
1042 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats |
|
1043 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats |
|
1044 |
|
1045 for format in enum.formats: |
|
1046 pats[format] = '(?P<%s>%s%s%s)' % ( |
|
1047 format, re.escape(enum.formatinfo[format].prefix), |
|
1048 pats['enum'], re.escape(enum.formatinfo[format].suffix)) |
|
1049 |
|
1050 patterns = { |
|
1051 'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)', |
|
1052 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, |
|
1053 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)', |
|
1054 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, |
|
1055 'doctest': r'>>>( +|$)', |
|
1056 'line_block': r'\|( +|$)', |
|
1057 'grid_table_top': grid_table_top_pat, |
|
1058 'simple_table_top': simple_table_top_pat, |
|
1059 'explicit_markup': r'\.\.( +|$)', |
|
1060 'anonymous': r'__( +|$)', |
|
1061 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, |
|
1062 'text': r''} |
|
1063 initial_transitions = ( |
|
1064 'bullet', |
|
1065 'enumerator', |
|
1066 'field_marker', |
|
1067 'option_marker', |
|
1068 'doctest', |
|
1069 'line_block', |
|
1070 'grid_table_top', |
|
1071 'simple_table_top', |
|
1072 'explicit_markup', |
|
1073 'anonymous', |
|
1074 'line', |
|
1075 'text') |
|
1076 |
|
1077 def indent(self, match, context, next_state): |
|
1078 """Block quote.""" |
|
1079 indented, indent, line_offset, blank_finish = \ |
|
1080 self.state_machine.get_indented() |
|
1081 elements = self.block_quote(indented, line_offset) |
|
1082 self.parent += elements |
|
1083 if not blank_finish: |
|
1084 self.parent += self.unindent_warning('Block quote') |
|
1085 return context, next_state, [] |
|
1086 |
|
1087 def block_quote(self, indented, line_offset): |
|
1088 elements = [] |
|
1089 while indented: |
|
1090 (blockquote_lines, |
|
1091 attribution_lines, |
|
1092 attribution_offset, |
|
1093 indented, |
|
1094 new_line_offset) = self.split_attribution(indented, line_offset) |
|
1095 blockquote = nodes.block_quote() |
|
1096 self.nested_parse(blockquote_lines, line_offset, blockquote) |
|
1097 elements.append(blockquote) |
|
1098 if attribution_lines: |
|
1099 attribution, messages = self.parse_attribution( |
|
1100 attribution_lines, attribution_offset) |
|
1101 blockquote += attribution |
|
1102 elements += messages |
|
1103 line_offset = new_line_offset |
|
1104 while indented and not indented[0]: |
|
1105 indented = indented[1:] |
|
1106 line_offset += 1 |
|
1107 return elements |
|
1108 |
|
1109 # U+2014 is an em-dash: |
|
1110 attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])') |
|
1111 |
|
1112 def split_attribution(self, indented, line_offset): |
|
1113 """ |
|
1114 Check for a block quote attribution and split it off: |
|
1115 |
|
1116 * First line after a blank line must begin with a dash ("--", "---", |
|
1117 em-dash; matches `self.attribution_pattern`). |
|
1118 * Every line after that must have consistent indentation. |
|
1119 * Attributions must be preceded by block quote content. |
|
1120 |
|
1121 Return a tuple of: (block quote content lines, content offset, |
|
1122 attribution lines, attribution offset, remaining indented lines). |
|
1123 """ |
|
1124 blank = None |
|
1125 nonblank_seen = False |
|
1126 for i in range(len(indented)): |
|
1127 line = indented[i].rstrip() |
|
1128 if line: |
|
1129 if nonblank_seen and blank == i - 1: # last line blank |
|
1130 match = self.attribution_pattern.match(line) |
|
1131 if match: |
|
1132 attribution_end, indent = self.check_attribution( |
|
1133 indented, i) |
|
1134 if attribution_end: |
|
1135 a_lines = indented[i:attribution_end] |
|
1136 a_lines.trim_left(match.end(), end=1) |
|
1137 a_lines.trim_left(indent, start=1) |
|
1138 return (indented[:i], a_lines, |
|
1139 i, indented[attribution_end:], |
|
1140 line_offset + attribution_end) |
|
1141 nonblank_seen = True |
|
1142 else: |
|
1143 blank = i |
|
1144 else: |
|
1145 return (indented, None, None, None, None) |
|
1146 |
|
1147 def check_attribution(self, indented, attribution_start): |
|
1148 """ |
|
1149 Check attribution shape. |
|
1150 Return the index past the end of the attribution, and the indent. |
|
1151 """ |
|
1152 indent = None |
|
1153 i = attribution_start + 1 |
|
1154 for i in range(attribution_start + 1, len(indented)): |
|
1155 line = indented[i].rstrip() |
|
1156 if not line: |
|
1157 break |
|
1158 if indent is None: |
|
1159 indent = len(line) - len(line.lstrip()) |
|
1160 elif len(line) - len(line.lstrip()) != indent: |
|
1161 return None, None # bad shape; not an attribution |
|
1162 else: |
|
1163 # return index of line after last attribution line: |
|
1164 i += 1 |
|
1165 return i, (indent or 0) |
|
1166 |
|
1167 def parse_attribution(self, indented, line_offset): |
|
1168 text = '\n'.join(indented).rstrip() |
|
1169 lineno = self.state_machine.abs_line_number() + line_offset |
|
1170 textnodes, messages = self.inline_text(text, lineno) |
|
1171 node = nodes.attribution(text, '', *textnodes) |
|
1172 node.line = lineno |
|
1173 return node, messages |
|
1174 |
|
1175 def bullet(self, match, context, next_state): |
|
1176 """Bullet list item.""" |
|
1177 bulletlist = nodes.bullet_list() |
|
1178 self.parent += bulletlist |
|
1179 bulletlist['bullet'] = match.string[0] |
|
1180 i, blank_finish = self.list_item(match.end()) |
|
1181 bulletlist += i |
|
1182 offset = self.state_machine.line_offset + 1 # next line |
|
1183 new_line_offset, blank_finish = self.nested_list_parse( |
|
1184 self.state_machine.input_lines[offset:], |
|
1185 input_offset=self.state_machine.abs_line_offset() + 1, |
|
1186 node=bulletlist, initial_state='BulletList', |
|
1187 blank_finish=blank_finish) |
|
1188 self.goto_line(new_line_offset) |
|
1189 if not blank_finish: |
|
1190 self.parent += self.unindent_warning('Bullet list') |
|
1191 return [], next_state, [] |
|
1192 |
|
1193 def list_item(self, indent): |
|
1194 if self.state_machine.line[indent:]: |
|
1195 indented, line_offset, blank_finish = ( |
|
1196 self.state_machine.get_known_indented(indent)) |
|
1197 else: |
|
1198 indented, indent, line_offset, blank_finish = ( |
|
1199 self.state_machine.get_first_known_indented(indent)) |
|
1200 listitem = nodes.list_item('\n'.join(indented)) |
|
1201 if indented: |
|
1202 self.nested_parse(indented, input_offset=line_offset, |
|
1203 node=listitem) |
|
1204 return listitem, blank_finish |
|
1205 |
|
1206 def enumerator(self, match, context, next_state): |
|
1207 """Enumerated List Item""" |
|
1208 format, sequence, text, ordinal = self.parse_enumerator(match) |
|
1209 if not self.is_enumerated_list_item(ordinal, sequence, format): |
|
1210 raise statemachine.TransitionCorrection('text') |
|
1211 enumlist = nodes.enumerated_list() |
|
1212 self.parent += enumlist |
|
1213 if sequence == '#': |
|
1214 enumlist['enumtype'] = 'arabic' |
|
1215 else: |
|
1216 enumlist['enumtype'] = sequence |
|
1217 enumlist['prefix'] = self.enum.formatinfo[format].prefix |
|
1218 enumlist['suffix'] = self.enum.formatinfo[format].suffix |
|
1219 if ordinal != 1: |
|
1220 enumlist['start'] = ordinal |
|
1221 msg = self.reporter.info( |
|
1222 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' |
|
1223 % (text, ordinal), line=self.state_machine.abs_line_number()) |
|
1224 self.parent += msg |
|
1225 listitem, blank_finish = self.list_item(match.end()) |
|
1226 enumlist += listitem |
|
1227 offset = self.state_machine.line_offset + 1 # next line |
|
1228 newline_offset, blank_finish = self.nested_list_parse( |
|
1229 self.state_machine.input_lines[offset:], |
|
1230 input_offset=self.state_machine.abs_line_offset() + 1, |
|
1231 node=enumlist, initial_state='EnumeratedList', |
|
1232 blank_finish=blank_finish, |
|
1233 extra_settings={'lastordinal': ordinal, |
|
1234 'format': format, |
|
1235 'auto': sequence == '#'}) |
|
1236 self.goto_line(newline_offset) |
|
1237 if not blank_finish: |
|
1238 self.parent += self.unindent_warning('Enumerated list') |
|
1239 return [], next_state, [] |
|
1240 |
|
1241 def parse_enumerator(self, match, expected_sequence=None): |
|
1242 """ |
|
1243 Analyze an enumerator and return the results. |
|
1244 |
|
1245 :Return: |
|
1246 - the enumerator format ('period', 'parens', or 'rparen'), |
|
1247 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), |
|
1248 - the text of the enumerator, stripped of formatting, and |
|
1249 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; |
|
1250 ``None`` is returned for invalid enumerator text). |
|
1251 |
|
1252 The enumerator format has already been determined by the regular |
|
1253 expression match. If `expected_sequence` is given, that sequence is |
|
1254 tried first. If not, we check for Roman numeral 1. This way, |
|
1255 single-character Roman numerals (which are also alphabetical) can be |
|
1256 matched. If no sequence has been matched, all sequences are checked in |
|
1257 order. |
|
1258 """ |
|
1259 groupdict = match.groupdict() |
|
1260 sequence = '' |
|
1261 for format in self.enum.formats: |
|
1262 if groupdict[format]: # was this the format matched? |
|
1263 break # yes; keep `format` |
|
1264 else: # shouldn't happen |
|
1265 raise ParserError('enumerator format not matched') |
|
1266 text = groupdict[format][self.enum.formatinfo[format].start |
|
1267 :self.enum.formatinfo[format].end] |
|
1268 if text == '#': |
|
1269 sequence = '#' |
|
1270 elif expected_sequence: |
|
1271 try: |
|
1272 if self.enum.sequenceregexps[expected_sequence].match(text): |
|
1273 sequence = expected_sequence |
|
1274 except KeyError: # shouldn't happen |
|
1275 raise ParserError('unknown enumerator sequence: %s' |
|
1276 % sequence) |
|
1277 elif text == 'i': |
|
1278 sequence = 'lowerroman' |
|
1279 elif text == 'I': |
|
1280 sequence = 'upperroman' |
|
1281 if not sequence: |
|
1282 for sequence in self.enum.sequences: |
|
1283 if self.enum.sequenceregexps[sequence].match(text): |
|
1284 break |
|
1285 else: # shouldn't happen |
|
1286 raise ParserError('enumerator sequence not matched') |
|
1287 if sequence == '#': |
|
1288 ordinal = 1 |
|
1289 else: |
|
1290 try: |
|
1291 ordinal = self.enum.converters[sequence](text) |
|
1292 except roman.InvalidRomanNumeralError: |
|
1293 ordinal = None |
|
1294 return format, sequence, text, ordinal |
|
1295 |
|
1296 def is_enumerated_list_item(self, ordinal, sequence, format): |
|
1297 """ |
|
1298 Check validity based on the ordinal value and the second line. |
|
1299 |
|
1300 Return true iff the ordinal is valid and the second line is blank, |
|
1301 indented, or starts with the next enumerator or an auto-enumerator. |
|
1302 """ |
|
1303 if ordinal is None: |
|
1304 return None |
|
1305 try: |
|
1306 next_line = self.state_machine.next_line() |
|
1307 except EOFError: # end of input lines |
|
1308 self.state_machine.previous_line() |
|
1309 return 1 |
|
1310 else: |
|
1311 self.state_machine.previous_line() |
|
1312 if not next_line[:1].strip(): # blank or indented |
|
1313 return 1 |
|
1314 result = self.make_enumerator(ordinal + 1, sequence, format) |
|
1315 if result: |
|
1316 next_enumerator, auto_enumerator = result |
|
1317 try: |
|
1318 if ( next_line.startswith(next_enumerator) or |
|
1319 next_line.startswith(auto_enumerator) ): |
|
1320 return 1 |
|
1321 except TypeError: |
|
1322 pass |
|
1323 return None |
|
1324 |
|
1325 def make_enumerator(self, ordinal, sequence, format): |
|
1326 """ |
|
1327 Construct and return the next enumerated list item marker, and an |
|
1328 auto-enumerator ("#" instead of the regular enumerator). |
|
1329 |
|
1330 Return ``None`` for invalid (out of range) ordinals. |
|
1331 """ #" |
|
1332 if sequence == '#': |
|
1333 enumerator = '#' |
|
1334 elif sequence == 'arabic': |
|
1335 enumerator = str(ordinal) |
|
1336 else: |
|
1337 if sequence.endswith('alpha'): |
|
1338 if ordinal > 26: |
|
1339 return None |
|
1340 enumerator = chr(ordinal + ord('a') - 1) |
|
1341 elif sequence.endswith('roman'): |
|
1342 try: |
|
1343 enumerator = roman.toRoman(ordinal) |
|
1344 except roman.RomanError: |
|
1345 return None |
|
1346 else: # shouldn't happen |
|
1347 raise ParserError('unknown enumerator sequence: "%s"' |
|
1348 % sequence) |
|
1349 if sequence.startswith('lower'): |
|
1350 enumerator = enumerator.lower() |
|
1351 elif sequence.startswith('upper'): |
|
1352 enumerator = enumerator.upper() |
|
1353 else: # shouldn't happen |
|
1354 raise ParserError('unknown enumerator sequence: "%s"' |
|
1355 % sequence) |
|
1356 formatinfo = self.enum.formatinfo[format] |
|
1357 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix |
|
1358 + ' ') |
|
1359 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' |
|
1360 return next_enumerator, auto_enumerator |
|
1361 |
|
1362 def field_marker(self, match, context, next_state): |
|
1363 """Field list item.""" |
|
1364 field_list = nodes.field_list() |
|
1365 self.parent += field_list |
|
1366 field, blank_finish = self.field(match) |
|
1367 field_list += field |
|
1368 offset = self.state_machine.line_offset + 1 # next line |
|
1369 newline_offset, blank_finish = self.nested_list_parse( |
|
1370 self.state_machine.input_lines[offset:], |
|
1371 input_offset=self.state_machine.abs_line_offset() + 1, |
|
1372 node=field_list, initial_state='FieldList', |
|
1373 blank_finish=blank_finish) |
|
1374 self.goto_line(newline_offset) |
|
1375 if not blank_finish: |
|
1376 self.parent += self.unindent_warning('Field list') |
|
1377 return [], next_state, [] |
|
1378 |
|
1379 def field(self, match): |
|
1380 name = self.parse_field_marker(match) |
|
1381 lineno = self.state_machine.abs_line_number() |
|
1382 indented, indent, line_offset, blank_finish = \ |
|
1383 self.state_machine.get_first_known_indented(match.end()) |
|
1384 field_node = nodes.field() |
|
1385 field_node.line = lineno |
|
1386 name_nodes, name_messages = self.inline_text(name, lineno) |
|
1387 field_node += nodes.field_name(name, '', *name_nodes) |
|
1388 field_body = nodes.field_body('\n'.join(indented), *name_messages) |
|
1389 field_node += field_body |
|
1390 if indented: |
|
1391 self.parse_field_body(indented, line_offset, field_body) |
|
1392 return field_node, blank_finish |
|
1393 |
|
1394 def parse_field_marker(self, match): |
|
1395 """Extract & return field name from a field marker match.""" |
|
1396 field = match.group()[1:] # strip off leading ':' |
|
1397 field = field[:field.rfind(':')] # strip off trailing ':' etc. |
|
1398 return field |
|
1399 |
|
1400 def parse_field_body(self, indented, offset, node): |
|
1401 self.nested_parse(indented, input_offset=offset, node=node) |
|
1402 |
|
1403 def option_marker(self, match, context, next_state): |
|
1404 """Option list item.""" |
|
1405 optionlist = nodes.option_list() |
|
1406 try: |
|
1407 listitem, blank_finish = self.option_list_item(match) |
|
1408 except MarkupError, (message, lineno): |
|
1409 # This shouldn't happen; pattern won't match. |
|
1410 msg = self.reporter.error( |
|
1411 'Invalid option list marker: %s' % message, line=lineno) |
|
1412 self.parent += msg |
|
1413 indented, indent, line_offset, blank_finish = \ |
|
1414 self.state_machine.get_first_known_indented(match.end()) |
|
1415 elements = self.block_quote(indented, line_offset) |
|
1416 self.parent += elements |
|
1417 if not blank_finish: |
|
1418 self.parent += self.unindent_warning('Option list') |
|
1419 return [], next_state, [] |
|
1420 self.parent += optionlist |
|
1421 optionlist += listitem |
|
1422 offset = self.state_machine.line_offset + 1 # next line |
|
1423 newline_offset, blank_finish = self.nested_list_parse( |
|
1424 self.state_machine.input_lines[offset:], |
|
1425 input_offset=self.state_machine.abs_line_offset() + 1, |
|
1426 node=optionlist, initial_state='OptionList', |
|
1427 blank_finish=blank_finish) |
|
1428 self.goto_line(newline_offset) |
|
1429 if not blank_finish: |
|
1430 self.parent += self.unindent_warning('Option list') |
|
1431 return [], next_state, [] |
|
1432 |
|
1433 def option_list_item(self, match): |
|
1434 offset = self.state_machine.abs_line_offset() |
|
1435 options = self.parse_option_marker(match) |
|
1436 indented, indent, line_offset, blank_finish = \ |
|
1437 self.state_machine.get_first_known_indented(match.end()) |
|
1438 if not indented: # not an option list item |
|
1439 self.goto_line(offset) |
|
1440 raise statemachine.TransitionCorrection('text') |
|
1441 option_group = nodes.option_group('', *options) |
|
1442 description = nodes.description('\n'.join(indented)) |
|
1443 option_list_item = nodes.option_list_item('', option_group, |
|
1444 description) |
|
1445 if indented: |
|
1446 self.nested_parse(indented, input_offset=line_offset, |
|
1447 node=description) |
|
1448 return option_list_item, blank_finish |
|
1449 |
|
1450 def parse_option_marker(self, match): |
|
1451 """ |
|
1452 Return a list of `node.option` and `node.option_argument` objects, |
|
1453 parsed from an option marker match. |
|
1454 |
|
1455 :Exception: `MarkupError` for invalid option markers. |
|
1456 """ |
|
1457 optlist = [] |
|
1458 optionstrings = match.group().rstrip().split(', ') |
|
1459 for optionstring in optionstrings: |
|
1460 tokens = optionstring.split() |
|
1461 delimiter = ' ' |
|
1462 firstopt = tokens[0].split('=') |
|
1463 if len(firstopt) > 1: |
|
1464 # "--opt=value" form |
|
1465 tokens[:1] = firstopt |
|
1466 delimiter = '=' |
|
1467 elif (len(tokens[0]) > 2 |
|
1468 and ((tokens[0].startswith('-') |
|
1469 and not tokens[0].startswith('--')) |
|
1470 or tokens[0].startswith('+'))): |
|
1471 # "-ovalue" form |
|
1472 tokens[:1] = [tokens[0][:2], tokens[0][2:]] |
|
1473 delimiter = '' |
|
1474 if len(tokens) > 1 and (tokens[1].startswith('<') |
|
1475 and tokens[-1].endswith('>')): |
|
1476 # "-o <value1 value2>" form; join all values into one token |
|
1477 tokens[1:] = [' '.join(tokens[1:])] |
|
1478 if 0 < len(tokens) <= 2: |
|
1479 option = nodes.option(optionstring) |
|
1480 option += nodes.option_string(tokens[0], tokens[0]) |
|
1481 if len(tokens) > 1: |
|
1482 option += nodes.option_argument(tokens[1], tokens[1], |
|
1483 delimiter=delimiter) |
|
1484 optlist.append(option) |
|
1485 else: |
|
1486 raise MarkupError( |
|
1487 'wrong number of option tokens (=%s), should be 1 or 2: ' |
|
1488 '"%s"' % (len(tokens), optionstring), |
|
1489 self.state_machine.abs_line_number() + 1) |
|
1490 return optlist |
|
1491 |
|
1492 def doctest(self, match, context, next_state): |
|
1493 data = '\n'.join(self.state_machine.get_text_block()) |
|
1494 self.parent += nodes.doctest_block(data, data) |
|
1495 return [], next_state, [] |
|
1496 |
|
1497 def line_block(self, match, context, next_state): |
|
1498 """First line of a line block.""" |
|
1499 block = nodes.line_block() |
|
1500 self.parent += block |
|
1501 lineno = self.state_machine.abs_line_number() |
|
1502 line, messages, blank_finish = self.line_block_line(match, lineno) |
|
1503 block += line |
|
1504 self.parent += messages |
|
1505 if not blank_finish: |
|
1506 offset = self.state_machine.line_offset + 1 # next line |
|
1507 new_line_offset, blank_finish = self.nested_list_parse( |
|
1508 self.state_machine.input_lines[offset:], |
|
1509 input_offset=self.state_machine.abs_line_offset() + 1, |
|
1510 node=block, initial_state='LineBlock', |
|
1511 blank_finish=0) |
|
1512 self.goto_line(new_line_offset) |
|
1513 if not blank_finish: |
|
1514 self.parent += self.reporter.warning( |
|
1515 'Line block ends without a blank line.', |
|
1516 line=(self.state_machine.abs_line_number() + 1)) |
|
1517 if len(block): |
|
1518 if block[0].indent is None: |
|
1519 block[0].indent = 0 |
|
1520 self.nest_line_block_lines(block) |
|
1521 return [], next_state, [] |
|
1522 |
|
1523 def line_block_line(self, match, lineno): |
|
1524 """Return one line element of a line_block.""" |
|
1525 indented, indent, line_offset, blank_finish = \ |
|
1526 self.state_machine.get_first_known_indented(match.end(), |
|
1527 until_blank=1) |
|
1528 text = u'\n'.join(indented) |
|
1529 text_nodes, messages = self.inline_text(text, lineno) |
|
1530 line = nodes.line(text, '', *text_nodes) |
|
1531 if match.string.rstrip() != '|': # not empty |
|
1532 line.indent = len(match.group(1)) - 1 |
|
1533 return line, messages, blank_finish |
|
1534 |
|
1535 def nest_line_block_lines(self, block): |
|
1536 for index in range(1, len(block)): |
|
1537 if block[index].indent is None: |
|
1538 block[index].indent = block[index - 1].indent |
|
1539 self.nest_line_block_segment(block) |
|
1540 |
|
1541 def nest_line_block_segment(self, block): |
|
1542 indents = [item.indent for item in block] |
|
1543 least = min(indents) |
|
1544 new_items = [] |
|
1545 new_block = nodes.line_block() |
|
1546 for item in block: |
|
1547 if item.indent > least: |
|
1548 new_block.append(item) |
|
1549 else: |
|
1550 if len(new_block): |
|
1551 self.nest_line_block_segment(new_block) |
|
1552 new_items.append(new_block) |
|
1553 new_block = nodes.line_block() |
|
1554 new_items.append(item) |
|
1555 if len(new_block): |
|
1556 self.nest_line_block_segment(new_block) |
|
1557 new_items.append(new_block) |
|
1558 block[:] = new_items |
|
1559 |
|
1560 def grid_table_top(self, match, context, next_state): |
|
1561 """Top border of a full table.""" |
|
1562 return self.table_top(match, context, next_state, |
|
1563 self.isolate_grid_table, |
|
1564 tableparser.GridTableParser) |
|
1565 |
|
1566 def simple_table_top(self, match, context, next_state): |
|
1567 """Top border of a simple table.""" |
|
1568 return self.table_top(match, context, next_state, |
|
1569 self.isolate_simple_table, |
|
1570 tableparser.SimpleTableParser) |
|
1571 |
|
1572 def table_top(self, match, context, next_state, |
|
1573 isolate_function, parser_class): |
|
1574 """Top border of a generic table.""" |
|
1575 nodelist, blank_finish = self.table(isolate_function, parser_class) |
|
1576 self.parent += nodelist |
|
1577 if not blank_finish: |
|
1578 msg = self.reporter.warning( |
|
1579 'Blank line required after table.', |
|
1580 line=self.state_machine.abs_line_number() + 1) |
|
1581 self.parent += msg |
|
1582 return [], next_state, [] |
|
1583 |
|
1584 def table(self, isolate_function, parser_class): |
|
1585 """Parse a table.""" |
|
1586 block, messages, blank_finish = isolate_function() |
|
1587 if block: |
|
1588 try: |
|
1589 parser = parser_class() |
|
1590 tabledata = parser.parse(block) |
|
1591 tableline = (self.state_machine.abs_line_number() - len(block) |
|
1592 + 1) |
|
1593 table = self.build_table(tabledata, tableline) |
|
1594 nodelist = [table] + messages |
|
1595 except tableparser.TableMarkupError, detail: |
|
1596 nodelist = self.malformed_table( |
|
1597 block, ' '.join(detail.args)) + messages |
|
1598 else: |
|
1599 nodelist = messages |
|
1600 return nodelist, blank_finish |
|
1601 |
|
1602 def isolate_grid_table(self): |
|
1603 messages = [] |
|
1604 blank_finish = 1 |
|
1605 try: |
|
1606 block = self.state_machine.get_text_block(flush_left=1) |
|
1607 except statemachine.UnexpectedIndentationError, instance: |
|
1608 block, source, lineno = instance.args |
|
1609 messages.append(self.reporter.error('Unexpected indentation.', |
|
1610 source=source, line=lineno)) |
|
1611 blank_finish = 0 |
|
1612 block.disconnect() |
|
1613 # for East Asian chars: |
|
1614 block.pad_double_width(self.double_width_pad_char) |
|
1615 width = len(block[0].strip()) |
|
1616 for i in range(len(block)): |
|
1617 block[i] = block[i].strip() |
|
1618 if block[i][0] not in '+|': # check left edge |
|
1619 blank_finish = 0 |
|
1620 self.state_machine.previous_line(len(block) - i) |
|
1621 del block[i:] |
|
1622 break |
|
1623 if not self.grid_table_top_pat.match(block[-1]): # find bottom |
|
1624 blank_finish = 0 |
|
1625 # from second-last to third line of table: |
|
1626 for i in range(len(block) - 2, 1, -1): |
|
1627 if self.grid_table_top_pat.match(block[i]): |
|
1628 self.state_machine.previous_line(len(block) - i + 1) |
|
1629 del block[i+1:] |
|
1630 break |
|
1631 else: |
|
1632 messages.extend(self.malformed_table(block)) |
|
1633 return [], messages, blank_finish |
|
1634 for i in range(len(block)): # check right edge |
|
1635 if len(block[i]) != width or block[i][-1] not in '+|': |
|
1636 messages.extend(self.malformed_table(block)) |
|
1637 return [], messages, blank_finish |
|
1638 return block, messages, blank_finish |
|
1639 |
|
1640 def isolate_simple_table(self): |
|
1641 start = self.state_machine.line_offset |
|
1642 lines = self.state_machine.input_lines |
|
1643 limit = len(lines) - 1 |
|
1644 toplen = len(lines[start].strip()) |
|
1645 pattern_match = self.simple_table_border_pat.match |
|
1646 found = 0 |
|
1647 found_at = None |
|
1648 i = start + 1 |
|
1649 while i <= limit: |
|
1650 line = lines[i] |
|
1651 match = pattern_match(line) |
|
1652 if match: |
|
1653 if len(line.strip()) != toplen: |
|
1654 self.state_machine.next_line(i - start) |
|
1655 messages = self.malformed_table( |
|
1656 lines[start:i+1], 'Bottom/header table border does ' |
|
1657 'not match top border.') |
|
1658 return [], messages, i == limit or not lines[i+1].strip() |
|
1659 found += 1 |
|
1660 found_at = i |
|
1661 if found == 2 or i == limit or not lines[i+1].strip(): |
|
1662 end = i |
|
1663 break |
|
1664 i += 1 |
|
1665 else: # reached end of input_lines |
|
1666 if found: |
|
1667 extra = ' or no blank line after table bottom' |
|
1668 self.state_machine.next_line(found_at - start) |
|
1669 block = lines[start:found_at+1] |
|
1670 else: |
|
1671 extra = '' |
|
1672 self.state_machine.next_line(i - start - 1) |
|
1673 block = lines[start:] |
|
1674 messages = self.malformed_table( |
|
1675 block, 'No bottom table border found%s.' % extra) |
|
1676 return [], messages, not extra |
|
1677 self.state_machine.next_line(end - start) |
|
1678 block = lines[start:end+1] |
|
1679 # for East Asian chars: |
|
1680 block.pad_double_width(self.double_width_pad_char) |
|
1681 return block, [], end == limit or not lines[end+1].strip() |
|
1682 |
|
1683 def malformed_table(self, block, detail=''): |
|
1684 block.replace(self.double_width_pad_char, '') |
|
1685 data = '\n'.join(block) |
|
1686 message = 'Malformed table.' |
|
1687 lineno = self.state_machine.abs_line_number() - len(block) + 1 |
|
1688 if detail: |
|
1689 message += '\n' + detail |
|
1690 error = self.reporter.error(message, nodes.literal_block(data, data), |
|
1691 line=lineno) |
|
1692 return [error] |
|
1693 |
|
1694 def build_table(self, tabledata, tableline, stub_columns=0): |
|
1695 colwidths, headrows, bodyrows = tabledata |
|
1696 table = nodes.table() |
|
1697 tgroup = nodes.tgroup(cols=len(colwidths)) |
|
1698 table += tgroup |
|
1699 for colwidth in colwidths: |
|
1700 colspec = nodes.colspec(colwidth=colwidth) |
|
1701 if stub_columns: |
|
1702 colspec.attributes['stub'] = 1 |
|
1703 stub_columns -= 1 |
|
1704 tgroup += colspec |
|
1705 if headrows: |
|
1706 thead = nodes.thead() |
|
1707 tgroup += thead |
|
1708 for row in headrows: |
|
1709 thead += self.build_table_row(row, tableline) |
|
1710 tbody = nodes.tbody() |
|
1711 tgroup += tbody |
|
1712 for row in bodyrows: |
|
1713 tbody += self.build_table_row(row, tableline) |
|
1714 return table |
|
1715 |
|
1716 def build_table_row(self, rowdata, tableline): |
|
1717 row = nodes.row() |
|
1718 for cell in rowdata: |
|
1719 if cell is None: |
|
1720 continue |
|
1721 morerows, morecols, offset, cellblock = cell |
|
1722 attributes = {} |
|
1723 if morerows: |
|
1724 attributes['morerows'] = morerows |
|
1725 if morecols: |
|
1726 attributes['morecols'] = morecols |
|
1727 entry = nodes.entry(**attributes) |
|
1728 row += entry |
|
1729 if ''.join(cellblock): |
|
1730 self.nested_parse(cellblock, input_offset=tableline+offset, |
|
1731 node=entry) |
|
1732 return row |
|
1733 |
|
1734 |
|
1735 explicit = Struct() |
|
1736 """Patterns and constants used for explicit markup recognition.""" |
|
1737 |
|
1738 explicit.patterns = Struct( |
|
1739 target=re.compile(r""" |
|
1740 ( |
|
1741 _ # anonymous target |
|
1742 | # *OR* |
|
1743 (?!_) # no underscore at the beginning |
|
1744 (?P<quote>`?) # optional open quote |
|
1745 (?![ `]) # first char. not space or |
|
1746 # backquote |
|
1747 (?P<name> # reference name |
|
1748 .+? |
|
1749 ) |
|
1750 %(non_whitespace_escape_before)s |
|
1751 (?P=quote) # close quote if open quote used |
|
1752 ) |
|
1753 (?<!(?<!\x00):) # no unescaped colon at end |
|
1754 %(non_whitespace_escape_before)s |
|
1755 [ ]? # optional space |
|
1756 : # end of reference name |
|
1757 ([ ]+|$) # followed by whitespace |
|
1758 """ % vars(Inliner), re.VERBOSE), |
|
1759 reference=re.compile(r""" |
|
1760 ( |
|
1761 (?P<simple>%(simplename)s)_ |
|
1762 | # *OR* |
|
1763 ` # open backquote |
|
1764 (?![ ]) # not space |
|
1765 (?P<phrase>.+?) # hyperlink phrase |
|
1766 %(non_whitespace_escape_before)s |
|
1767 `_ # close backquote, |
|
1768 # reference mark |
|
1769 ) |
|
1770 $ # end of string |
|
1771 """ % vars(Inliner), re.VERBOSE | re.UNICODE), |
|
1772 substitution=re.compile(r""" |
|
1773 ( |
|
1774 (?![ ]) # first char. not space |
|
1775 (?P<name>.+?) # substitution text |
|
1776 %(non_whitespace_escape_before)s |
|
1777 \| # close delimiter |
|
1778 ) |
|
1779 ([ ]+|$) # followed by whitespace |
|
1780 """ % vars(Inliner), re.VERBOSE),) |
|
1781 |
|
1782 def footnote(self, match): |
|
1783 lineno = self.state_machine.abs_line_number() |
|
1784 indented, indent, offset, blank_finish = \ |
|
1785 self.state_machine.get_first_known_indented(match.end()) |
|
1786 label = match.group(1) |
|
1787 name = normalize_name(label) |
|
1788 footnote = nodes.footnote('\n'.join(indented)) |
|
1789 footnote.line = lineno |
|
1790 if name[0] == '#': # auto-numbered |
|
1791 name = name[1:] # autonumber label |
|
1792 footnote['auto'] = 1 |
|
1793 if name: |
|
1794 footnote['names'].append(name) |
|
1795 self.document.note_autofootnote(footnote) |
|
1796 elif name == '*': # auto-symbol |
|
1797 name = '' |
|
1798 footnote['auto'] = '*' |
|
1799 self.document.note_symbol_footnote(footnote) |
|
1800 else: # manually numbered |
|
1801 footnote += nodes.label('', label) |
|
1802 footnote['names'].append(name) |
|
1803 self.document.note_footnote(footnote) |
|
1804 if name: |
|
1805 self.document.note_explicit_target(footnote, footnote) |
|
1806 else: |
|
1807 self.document.set_id(footnote, footnote) |
|
1808 if indented: |
|
1809 self.nested_parse(indented, input_offset=offset, node=footnote) |
|
1810 return [footnote], blank_finish |
|
1811 |
|
1812 def citation(self, match): |
|
1813 lineno = self.state_machine.abs_line_number() |
|
1814 indented, indent, offset, blank_finish = \ |
|
1815 self.state_machine.get_first_known_indented(match.end()) |
|
1816 label = match.group(1) |
|
1817 name = normalize_name(label) |
|
1818 citation = nodes.citation('\n'.join(indented)) |
|
1819 citation.line = lineno |
|
1820 citation += nodes.label('', label) |
|
1821 citation['names'].append(name) |
|
1822 self.document.note_citation(citation) |
|
1823 self.document.note_explicit_target(citation, citation) |
|
1824 if indented: |
|
1825 self.nested_parse(indented, input_offset=offset, node=citation) |
|
1826 return [citation], blank_finish |
|
1827 |
|
1828 def hyperlink_target(self, match): |
|
1829 pattern = self.explicit.patterns.target |
|
1830 lineno = self.state_machine.abs_line_number() |
|
1831 block, indent, offset, blank_finish = \ |
|
1832 self.state_machine.get_first_known_indented( |
|
1833 match.end(), until_blank=1, strip_indent=0) |
|
1834 blocktext = match.string[:match.end()] + '\n'.join(block) |
|
1835 block = [escape2null(line) for line in block] |
|
1836 escaped = block[0] |
|
1837 blockindex = 0 |
|
1838 while 1: |
|
1839 targetmatch = pattern.match(escaped) |
|
1840 if targetmatch: |
|
1841 break |
|
1842 blockindex += 1 |
|
1843 try: |
|
1844 escaped += block[blockindex] |
|
1845 except IndexError: |
|
1846 raise MarkupError('malformed hyperlink target.', lineno) |
|
1847 del block[:blockindex] |
|
1848 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() |
|
1849 target = self.make_target(block, blocktext, lineno, |
|
1850 targetmatch.group('name')) |
|
1851 return [target], blank_finish |
|
1852 |
|
1853 def make_target(self, block, block_text, lineno, target_name): |
|
1854 target_type, data = self.parse_target(block, block_text, lineno) |
|
1855 if target_type == 'refname': |
|
1856 target = nodes.target(block_text, '', refname=normalize_name(data)) |
|
1857 target.indirect_reference_name = data |
|
1858 self.add_target(target_name, '', target, lineno) |
|
1859 self.document.note_indirect_target(target) |
|
1860 return target |
|
1861 elif target_type == 'refuri': |
|
1862 target = nodes.target(block_text, '') |
|
1863 self.add_target(target_name, data, target, lineno) |
|
1864 return target |
|
1865 else: |
|
1866 return data |
|
1867 |
|
1868 def parse_target(self, block, block_text, lineno): |
|
1869 """ |
|
1870 Determine the type of reference of a target. |
|
1871 |
|
1872 :Return: A 2-tuple, one of: |
|
1873 |
|
1874 - 'refname' and the indirect reference name |
|
1875 - 'refuri' and the URI |
|
1876 - 'malformed' and a system_message node |
|
1877 """ |
|
1878 if block and block[-1].strip()[-1:] == '_': # possible indirect target |
|
1879 reference = ' '.join([line.strip() for line in block]) |
|
1880 refname = self.is_reference(reference) |
|
1881 if refname: |
|
1882 return 'refname', refname |
|
1883 reference = ''.join([''.join(line.split()) for line in block]) |
|
1884 return 'refuri', unescape(reference) |
|
1885 |
|
1886 def is_reference(self, reference): |
|
1887 match = self.explicit.patterns.reference.match( |
|
1888 whitespace_normalize_name(reference)) |
|
1889 if not match: |
|
1890 return None |
|
1891 return unescape(match.group('simple') or match.group('phrase')) |
|
1892 |
|
1893 def add_target(self, targetname, refuri, target, lineno): |
|
1894 target.line = lineno |
|
1895 if targetname: |
|
1896 name = normalize_name(unescape(targetname)) |
|
1897 target['names'].append(name) |
|
1898 if refuri: |
|
1899 uri = self.inliner.adjust_uri(refuri) |
|
1900 if uri: |
|
1901 target['refuri'] = uri |
|
1902 else: |
|
1903 raise ApplicationError('problem with URI: %r' % refuri) |
|
1904 self.document.note_explicit_target(target, self.parent) |
|
1905 else: # anonymous target |
|
1906 if refuri: |
|
1907 target['refuri'] = refuri |
|
1908 target['anonymous'] = 1 |
|
1909 self.document.note_anonymous_target(target) |
|
1910 |
|
1911 def substitution_def(self, match): |
|
1912 pattern = self.explicit.patterns.substitution |
|
1913 lineno = self.state_machine.abs_line_number() |
|
1914 block, indent, offset, blank_finish = \ |
|
1915 self.state_machine.get_first_known_indented(match.end(), |
|
1916 strip_indent=0) |
|
1917 blocktext = (match.string[:match.end()] + '\n'.join(block)) |
|
1918 block.disconnect() |
|
1919 escaped = escape2null(block[0].rstrip()) |
|
1920 blockindex = 0 |
|
1921 while 1: |
|
1922 subdefmatch = pattern.match(escaped) |
|
1923 if subdefmatch: |
|
1924 break |
|
1925 blockindex += 1 |
|
1926 try: |
|
1927 escaped = escaped + ' ' + escape2null(block[blockindex].strip()) |
|
1928 except IndexError: |
|
1929 raise MarkupError('malformed substitution definition.', |
|
1930 lineno) |
|
1931 del block[:blockindex] # strip out the substitution marker |
|
1932 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1] |
|
1933 if not block[0]: |
|
1934 del block[0] |
|
1935 offset += 1 |
|
1936 while block and not block[-1].strip(): |
|
1937 block.pop() |
|
1938 subname = subdefmatch.group('name') |
|
1939 substitution_node = nodes.substitution_definition(blocktext) |
|
1940 substitution_node.line = lineno |
|
1941 if not block: |
|
1942 msg = self.reporter.warning( |
|
1943 'Substitution definition "%s" missing contents.' % subname, |
|
1944 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
1945 return [msg], blank_finish |
|
1946 block[0] = block[0].strip() |
|
1947 substitution_node['names'].append( |
|
1948 nodes.whitespace_normalize_name(subname)) |
|
1949 new_abs_offset, blank_finish = self.nested_list_parse( |
|
1950 block, input_offset=offset, node=substitution_node, |
|
1951 initial_state='SubstitutionDef', blank_finish=blank_finish) |
|
1952 i = 0 |
|
1953 for node in substitution_node[:]: |
|
1954 if not (isinstance(node, nodes.Inline) or |
|
1955 isinstance(node, nodes.Text)): |
|
1956 self.parent += substitution_node[i] |
|
1957 del substitution_node[i] |
|
1958 else: |
|
1959 i += 1 |
|
1960 for node in substitution_node.traverse(nodes.Element): |
|
1961 if self.disallowed_inside_substitution_definitions(node): |
|
1962 pformat = nodes.literal_block('', node.pformat().rstrip()) |
|
1963 msg = self.reporter.error( |
|
1964 'Substitution definition contains illegal element:', |
|
1965 pformat, nodes.literal_block(blocktext, blocktext), |
|
1966 line=lineno) |
|
1967 return [msg], blank_finish |
|
1968 if len(substitution_node) == 0: |
|
1969 msg = self.reporter.warning( |
|
1970 'Substitution definition "%s" empty or invalid.' |
|
1971 % subname, |
|
1972 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
1973 return [msg], blank_finish |
|
1974 self.document.note_substitution_def( |
|
1975 substitution_node, subname, self.parent) |
|
1976 return [substitution_node], blank_finish |
|
1977 |
|
1978 def disallowed_inside_substitution_definitions(self, node): |
|
1979 if (node['ids'] or |
|
1980 isinstance(node, nodes.reference) and node.get('anonymous') or |
|
1981 isinstance(node, nodes.footnote_reference) and node.get('auto')): |
|
1982 return 1 |
|
1983 else: |
|
1984 return 0 |
|
1985 |
|
1986 def directive(self, match, **option_presets): |
|
1987 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" |
|
1988 type_name = match.group(1) |
|
1989 directive_class, messages = directives.directive( |
|
1990 type_name, self.memo.language, self.document) |
|
1991 self.parent += messages |
|
1992 if directive_class: |
|
1993 return self.run_directive( |
|
1994 directive_class, match, type_name, option_presets) |
|
1995 else: |
|
1996 return self.unknown_directive(type_name) |
|
1997 |
|
1998 def run_directive(self, directive, match, type_name, option_presets): |
|
1999 """ |
|
2000 Parse a directive then run its directive function. |
|
2001 |
|
2002 Parameters: |
|
2003 |
|
2004 - `directive`: The class implementing the directive. Must be |
|
2005 a subclass of `rst.Directive`. |
|
2006 |
|
2007 - `match`: A regular expression match object which matched the first |
|
2008 line of the directive. |
|
2009 |
|
2010 - `type_name`: The directive name, as used in the source text. |
|
2011 |
|
2012 - `option_presets`: A dictionary of preset options, defaults for the |
|
2013 directive options. Currently, only an "alt" option is passed by |
|
2014 substitution definitions (value: the substitution name), which may |
|
2015 be used by an embedded image directive. |
|
2016 |
|
2017 Returns a 2-tuple: list of nodes, and a "blank finish" boolean. |
|
2018 """ |
|
2019 if isinstance(directive, (FunctionType, MethodType)): |
|
2020 from docutils.parsers.rst import convert_directive_function |
|
2021 directive = convert_directive_function(directive) |
|
2022 lineno = self.state_machine.abs_line_number() |
|
2023 initial_line_offset = self.state_machine.line_offset |
|
2024 indented, indent, line_offset, blank_finish \ |
|
2025 = self.state_machine.get_first_known_indented(match.end(), |
|
2026 strip_top=0) |
|
2027 block_text = '\n'.join(self.state_machine.input_lines[ |
|
2028 initial_line_offset : self.state_machine.line_offset + 1]) |
|
2029 try: |
|
2030 arguments, options, content, content_offset = ( |
|
2031 self.parse_directive_block(indented, line_offset, |
|
2032 directive, option_presets)) |
|
2033 except MarkupError, detail: |
|
2034 error = self.reporter.error( |
|
2035 'Error in "%s" directive:\n%s.' % (type_name, |
|
2036 ' '.join(detail.args)), |
|
2037 nodes.literal_block(block_text, block_text), line=lineno) |
|
2038 return [error], blank_finish |
|
2039 directive_instance = directive( |
|
2040 type_name, arguments, options, content, lineno, |
|
2041 content_offset, block_text, self, self.state_machine) |
|
2042 try: |
|
2043 result = directive_instance.run() |
|
2044 except docutils.parsers.rst.DirectiveError, directive_error: |
|
2045 msg_node = self.reporter.system_message(directive_error.level, |
|
2046 directive_error.message) |
|
2047 msg_node += nodes.literal_block(block_text, block_text) |
|
2048 msg_node['line'] = lineno |
|
2049 result = [msg_node] |
|
2050 assert isinstance(result, list), \ |
|
2051 'Directive "%s" must return a list of nodes.' % type_name |
|
2052 for i in range(len(result)): |
|
2053 assert isinstance(result[i], nodes.Node), \ |
|
2054 ('Directive "%s" returned non-Node object (index %s): %r' |
|
2055 % (type_name, i, result[i])) |
|
2056 return (result, |
|
2057 blank_finish or self.state_machine.is_next_line_blank()) |
|
2058 |
|
2059 def parse_directive_block(self, indented, line_offset, directive, |
|
2060 option_presets): |
|
2061 option_spec = directive.option_spec |
|
2062 has_content = directive.has_content |
|
2063 if indented and not indented[0].strip(): |
|
2064 indented.trim_start() |
|
2065 line_offset += 1 |
|
2066 while indented and not indented[-1].strip(): |
|
2067 indented.trim_end() |
|
2068 if indented and (directive.required_arguments |
|
2069 or directive.optional_arguments |
|
2070 or option_spec): |
|
2071 for i in range(len(indented)): |
|
2072 if not indented[i].strip(): |
|
2073 break |
|
2074 else: |
|
2075 i += 1 |
|
2076 arg_block = indented[:i] |
|
2077 content = indented[i+1:] |
|
2078 content_offset = line_offset + i + 1 |
|
2079 else: |
|
2080 content = indented |
|
2081 content_offset = line_offset |
|
2082 arg_block = [] |
|
2083 while content and not content[0].strip(): |
|
2084 content.trim_start() |
|
2085 content_offset += 1 |
|
2086 if option_spec: |
|
2087 options, arg_block = self.parse_directive_options( |
|
2088 option_presets, option_spec, arg_block) |
|
2089 if arg_block and not (directive.required_arguments |
|
2090 or directive.optional_arguments): |
|
2091 raise MarkupError('no arguments permitted; blank line ' |
|
2092 'required before content block') |
|
2093 else: |
|
2094 options = {} |
|
2095 if directive.required_arguments or directive.optional_arguments: |
|
2096 arguments = self.parse_directive_arguments( |
|
2097 directive, arg_block) |
|
2098 else: |
|
2099 arguments = [] |
|
2100 if content and not has_content: |
|
2101 raise MarkupError('no content permitted') |
|
2102 return (arguments, options, content, content_offset) |
|
2103 |
|
2104 def parse_directive_options(self, option_presets, option_spec, arg_block): |
|
2105 options = option_presets.copy() |
|
2106 for i in range(len(arg_block)): |
|
2107 if arg_block[i][:1] == ':': |
|
2108 opt_block = arg_block[i:] |
|
2109 arg_block = arg_block[:i] |
|
2110 break |
|
2111 else: |
|
2112 opt_block = [] |
|
2113 if opt_block: |
|
2114 success, data = self.parse_extension_options(option_spec, |
|
2115 opt_block) |
|
2116 if success: # data is a dict of options |
|
2117 options.update(data) |
|
2118 else: # data is an error string |
|
2119 raise MarkupError(data) |
|
2120 return options, arg_block |
|
2121 |
|
2122 def parse_directive_arguments(self, directive, arg_block): |
|
2123 required = directive.required_arguments |
|
2124 optional = directive.optional_arguments |
|
2125 arg_text = '\n'.join(arg_block) |
|
2126 arguments = arg_text.split() |
|
2127 if len(arguments) < required: |
|
2128 raise MarkupError('%s argument(s) required, %s supplied' |
|
2129 % (required, len(arguments))) |
|
2130 elif len(arguments) > required + optional: |
|
2131 if directive.final_argument_whitespace: |
|
2132 arguments = arg_text.split(None, required + optional - 1) |
|
2133 else: |
|
2134 raise MarkupError( |
|
2135 'maximum %s argument(s) allowed, %s supplied' |
|
2136 % (required + optional, len(arguments))) |
|
2137 return arguments |
|
2138 |
|
2139 def parse_extension_options(self, option_spec, datalines): |
|
2140 """ |
|
2141 Parse `datalines` for a field list containing extension options |
|
2142 matching `option_spec`. |
|
2143 |
|
2144 :Parameters: |
|
2145 - `option_spec`: a mapping of option name to conversion |
|
2146 function, which should raise an exception on bad input. |
|
2147 - `datalines`: a list of input strings. |
|
2148 |
|
2149 :Return: |
|
2150 - Success value, 1 or 0. |
|
2151 - An option dictionary on success, an error string on failure. |
|
2152 """ |
|
2153 node = nodes.field_list() |
|
2154 newline_offset, blank_finish = self.nested_list_parse( |
|
2155 datalines, 0, node, initial_state='ExtensionOptions', |
|
2156 blank_finish=1) |
|
2157 if newline_offset != len(datalines): # incomplete parse of block |
|
2158 return 0, 'invalid option block' |
|
2159 try: |
|
2160 options = utils.extract_extension_options(node, option_spec) |
|
2161 except KeyError, detail: |
|
2162 return 0, ('unknown option: "%s"' % detail.args[0]) |
|
2163 except (ValueError, TypeError), detail: |
|
2164 return 0, ('invalid option value: %s' % ' '.join(detail.args)) |
|
2165 except utils.ExtensionOptionError, detail: |
|
2166 return 0, ('invalid option data: %s' % ' '.join(detail.args)) |
|
2167 if blank_finish: |
|
2168 return 1, options |
|
2169 else: |
|
2170 return 0, 'option data incompletely parsed' |
|
2171 |
|
2172 def unknown_directive(self, type_name): |
|
2173 lineno = self.state_machine.abs_line_number() |
|
2174 indented, indent, offset, blank_finish = \ |
|
2175 self.state_machine.get_first_known_indented(0, strip_indent=0) |
|
2176 text = '\n'.join(indented) |
|
2177 error = self.reporter.error( |
|
2178 'Unknown directive type "%s".' % type_name, |
|
2179 nodes.literal_block(text, text), line=lineno) |
|
2180 return [error], blank_finish |
|
2181 |
|
2182 def comment(self, match): |
|
2183 if not match.string[match.end():].strip() \ |
|
2184 and self.state_machine.is_next_line_blank(): # an empty comment? |
|
2185 return [nodes.comment()], 1 # "A tiny but practical wart." |
|
2186 indented, indent, offset, blank_finish = \ |
|
2187 self.state_machine.get_first_known_indented(match.end()) |
|
2188 while indented and not indented[-1].strip(): |
|
2189 indented.trim_end() |
|
2190 text = '\n'.join(indented) |
|
2191 return [nodes.comment(text, text)], blank_finish |
|
2192 |
|
2193 explicit.constructs = [ |
|
2194 (footnote, |
|
2195 re.compile(r""" |
|
2196 \.\.[ ]+ # explicit markup start |
|
2197 \[ |
|
2198 ( # footnote label: |
|
2199 [0-9]+ # manually numbered footnote |
|
2200 | # *OR* |
|
2201 \# # anonymous auto-numbered footnote |
|
2202 | # *OR* |
|
2203 \#%s # auto-number ed?) footnote label |
|
2204 | # *OR* |
|
2205 \* # auto-symbol footnote |
|
2206 ) |
|
2207 \] |
|
2208 ([ ]+|$) # whitespace or end of line |
|
2209 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), |
|
2210 (citation, |
|
2211 re.compile(r""" |
|
2212 \.\.[ ]+ # explicit markup start |
|
2213 \[(%s)\] # citation label |
|
2214 ([ ]+|$) # whitespace or end of line |
|
2215 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), |
|
2216 (hyperlink_target, |
|
2217 re.compile(r""" |
|
2218 \.\.[ ]+ # explicit markup start |
|
2219 _ # target indicator |
|
2220 (?![ ]|$) # first char. not space or EOL |
|
2221 """, re.VERBOSE)), |
|
2222 (substitution_def, |
|
2223 re.compile(r""" |
|
2224 \.\.[ ]+ # explicit markup start |
|
2225 \| # substitution indicator |
|
2226 (?![ ]|$) # first char. not space or EOL |
|
2227 """, re.VERBOSE)), |
|
2228 (directive, |
|
2229 re.compile(r""" |
|
2230 \.\.[ ]+ # explicit markup start |
|
2231 (%s) # directive name |
|
2232 [ ]? # optional space |
|
2233 :: # directive delimiter |
|
2234 ([ ]+|$) # whitespace or end of line |
|
2235 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))] |
|
2236 |
|
2237 def explicit_markup(self, match, context, next_state): |
|
2238 """Footnotes, hyperlink targets, directives, comments.""" |
|
2239 nodelist, blank_finish = self.explicit_construct(match) |
|
2240 self.parent += nodelist |
|
2241 self.explicit_list(blank_finish) |
|
2242 return [], next_state, [] |
|
2243 |
|
2244 def explicit_construct(self, match): |
|
2245 """Determine which explicit construct this is, parse & return it.""" |
|
2246 errors = [] |
|
2247 for method, pattern in self.explicit.constructs: |
|
2248 expmatch = pattern.match(match.string) |
|
2249 if expmatch: |
|
2250 try: |
|
2251 return method(self, expmatch) |
|
2252 except MarkupError, (message, lineno): # never reached? |
|
2253 errors.append(self.reporter.warning(message, line=lineno)) |
|
2254 break |
|
2255 nodelist, blank_finish = self.comment(match) |
|
2256 return nodelist + errors, blank_finish |
|
2257 |
|
2258 def explicit_list(self, blank_finish): |
|
2259 """ |
|
2260 Create a nested state machine for a series of explicit markup |
|
2261 constructs (including anonymous hyperlink targets). |
|
2262 """ |
|
2263 offset = self.state_machine.line_offset + 1 # next line |
|
2264 newline_offset, blank_finish = self.nested_list_parse( |
|
2265 self.state_machine.input_lines[offset:], |
|
2266 input_offset=self.state_machine.abs_line_offset() + 1, |
|
2267 node=self.parent, initial_state='Explicit', |
|
2268 blank_finish=blank_finish, |
|
2269 match_titles=self.state_machine.match_titles) |
|
2270 self.goto_line(newline_offset) |
|
2271 if not blank_finish: |
|
2272 self.parent += self.unindent_warning('Explicit markup') |
|
2273 |
|
2274 def anonymous(self, match, context, next_state): |
|
2275 """Anonymous hyperlink targets.""" |
|
2276 nodelist, blank_finish = self.anonymous_target(match) |
|
2277 self.parent += nodelist |
|
2278 self.explicit_list(blank_finish) |
|
2279 return [], next_state, [] |
|
2280 |
|
2281 def anonymous_target(self, match): |
|
2282 lineno = self.state_machine.abs_line_number() |
|
2283 block, indent, offset, blank_finish \ |
|
2284 = self.state_machine.get_first_known_indented(match.end(), |
|
2285 until_blank=1) |
|
2286 blocktext = match.string[:match.end()] + '\n'.join(block) |
|
2287 block = [escape2null(line) for line in block] |
|
2288 target = self.make_target(block, blocktext, lineno, '') |
|
2289 return [target], blank_finish |
|
2290 |
|
2291 def line(self, match, context, next_state): |
|
2292 """Section title overline or transition marker.""" |
|
2293 if self.state_machine.match_titles: |
|
2294 return [match.string], 'Line', [] |
|
2295 elif match.string.strip() == '::': |
|
2296 raise statemachine.TransitionCorrection('text') |
|
2297 elif len(match.string.strip()) < 4: |
|
2298 msg = self.reporter.info( |
|
2299 'Unexpected possible title overline or transition.\n' |
|
2300 "Treating it as ordinary text because it's so short.", |
|
2301 line=self.state_machine.abs_line_number()) |
|
2302 self.parent += msg |
|
2303 raise statemachine.TransitionCorrection('text') |
|
2304 else: |
|
2305 blocktext = self.state_machine.line |
|
2306 msg = self.reporter.severe( |
|
2307 'Unexpected section title or transition.', |
|
2308 nodes.literal_block(blocktext, blocktext), |
|
2309 line=self.state_machine.abs_line_number()) |
|
2310 self.parent += msg |
|
2311 return [], next_state, [] |
|
2312 |
|
2313 def text(self, match, context, next_state): |
|
2314 """Titles, definition lists, paragraphs.""" |
|
2315 return [match.string], 'Text', [] |
|
2316 |
|
2317 |
|
2318 class RFC2822Body(Body): |
|
2319 |
|
2320 """ |
|
2321 RFC2822 headers are only valid as the first constructs in documents. As |
|
2322 soon as anything else appears, the `Body` state should take over. |
|
2323 """ |
|
2324 |
|
2325 patterns = Body.patterns.copy() # can't modify the original |
|
2326 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' |
|
2327 initial_transitions = [(name, 'Body') |
|
2328 for name in Body.initial_transitions] |
|
2329 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' |
|
2330 |
|
2331 def rfc2822(self, match, context, next_state): |
|
2332 """RFC2822-style field list item.""" |
|
2333 fieldlist = nodes.field_list(classes=['rfc2822']) |
|
2334 self.parent += fieldlist |
|
2335 field, blank_finish = self.rfc2822_field(match) |
|
2336 fieldlist += field |
|
2337 offset = self.state_machine.line_offset + 1 # next line |
|
2338 newline_offset, blank_finish = self.nested_list_parse( |
|
2339 self.state_machine.input_lines[offset:], |
|
2340 input_offset=self.state_machine.abs_line_offset() + 1, |
|
2341 node=fieldlist, initial_state='RFC2822List', |
|
2342 blank_finish=blank_finish) |
|
2343 self.goto_line(newline_offset) |
|
2344 if not blank_finish: |
|
2345 self.parent += self.unindent_warning( |
|
2346 'RFC2822-style field list') |
|
2347 return [], next_state, [] |
|
2348 |
|
2349 def rfc2822_field(self, match): |
|
2350 name = match.string[:match.string.find(':')] |
|
2351 indented, indent, line_offset, blank_finish = \ |
|
2352 self.state_machine.get_first_known_indented(match.end(), |
|
2353 until_blank=1) |
|
2354 fieldnode = nodes.field() |
|
2355 fieldnode += nodes.field_name(name, name) |
|
2356 fieldbody = nodes.field_body('\n'.join(indented)) |
|
2357 fieldnode += fieldbody |
|
2358 if indented: |
|
2359 self.nested_parse(indented, input_offset=line_offset, |
|
2360 node=fieldbody) |
|
2361 return fieldnode, blank_finish |
|
2362 |
|
2363 |
|
2364 class SpecializedBody(Body): |
|
2365 |
|
2366 """ |
|
2367 Superclass for second and subsequent compound element members. Compound |
|
2368 elements are lists and list-like constructs. |
|
2369 |
|
2370 All transition methods are disabled (redefined as `invalid_input`). |
|
2371 Override individual methods in subclasses to re-enable. |
|
2372 |
|
2373 For example, once an initial bullet list item, say, is recognized, the |
|
2374 `BulletList` subclass takes over, with a "bullet_list" node as its |
|
2375 container. Upon encountering the initial bullet list item, `Body.bullet` |
|
2376 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which |
|
2377 starts up a nested parsing session with `BulletList` as the initial state. |
|
2378 Only the ``bullet`` transition method is enabled in `BulletList`; as long |
|
2379 as only bullet list items are encountered, they are parsed and inserted |
|
2380 into the container. The first construct which is *not* a bullet list item |
|
2381 triggers the `invalid_input` method, which ends the nested parse and |
|
2382 closes the container. `BulletList` needs to recognize input that is |
|
2383 invalid in the context of a bullet list, which means everything *other |
|
2384 than* bullet list items, so it inherits the transition list created in |
|
2385 `Body`. |
|
2386 """ |
|
2387 |
|
2388 def invalid_input(self, match=None, context=None, next_state=None): |
|
2389 """Not a compound element member. Abort this state machine.""" |
|
2390 self.state_machine.previous_line() # back up so parent SM can reassess |
|
2391 raise EOFError |
|
2392 |
|
2393 indent = invalid_input |
|
2394 bullet = invalid_input |
|
2395 enumerator = invalid_input |
|
2396 field_marker = invalid_input |
|
2397 option_marker = invalid_input |
|
2398 doctest = invalid_input |
|
2399 line_block = invalid_input |
|
2400 grid_table_top = invalid_input |
|
2401 simple_table_top = invalid_input |
|
2402 explicit_markup = invalid_input |
|
2403 anonymous = invalid_input |
|
2404 line = invalid_input |
|
2405 text = invalid_input |
|
2406 |
|
2407 |
|
2408 class BulletList(SpecializedBody): |
|
2409 |
|
2410 """Second and subsequent bullet_list list_items.""" |
|
2411 |
|
2412 def bullet(self, match, context, next_state): |
|
2413 """Bullet list item.""" |
|
2414 if match.string[0] != self.parent['bullet']: |
|
2415 # different bullet: new list |
|
2416 self.invalid_input() |
|
2417 listitem, blank_finish = self.list_item(match.end()) |
|
2418 self.parent += listitem |
|
2419 self.blank_finish = blank_finish |
|
2420 return [], next_state, [] |
|
2421 |
|
2422 |
|
2423 class DefinitionList(SpecializedBody): |
|
2424 |
|
2425 """Second and subsequent definition_list_items.""" |
|
2426 |
|
2427 def text(self, match, context, next_state): |
|
2428 """Definition lists.""" |
|
2429 return [match.string], 'Definition', [] |
|
2430 |
|
2431 |
|
2432 class EnumeratedList(SpecializedBody): |
|
2433 |
|
2434 """Second and subsequent enumerated_list list_items.""" |
|
2435 |
|
2436 def enumerator(self, match, context, next_state): |
|
2437 """Enumerated list item.""" |
|
2438 format, sequence, text, ordinal = self.parse_enumerator( |
|
2439 match, self.parent['enumtype']) |
|
2440 if ( format != self.format |
|
2441 or (sequence != '#' and (sequence != self.parent['enumtype'] |
|
2442 or self.auto |
|
2443 or ordinal != (self.lastordinal + 1))) |
|
2444 or not self.is_enumerated_list_item(ordinal, sequence, format)): |
|
2445 # different enumeration: new list |
|
2446 self.invalid_input() |
|
2447 if sequence == '#': |
|
2448 self.auto = 1 |
|
2449 listitem, blank_finish = self.list_item(match.end()) |
|
2450 self.parent += listitem |
|
2451 self.blank_finish = blank_finish |
|
2452 self.lastordinal = ordinal |
|
2453 return [], next_state, [] |
|
2454 |
|
2455 |
|
2456 class FieldList(SpecializedBody): |
|
2457 |
|
2458 """Second and subsequent field_list fields.""" |
|
2459 |
|
2460 def field_marker(self, match, context, next_state): |
|
2461 """Field list field.""" |
|
2462 field, blank_finish = self.field(match) |
|
2463 self.parent += field |
|
2464 self.blank_finish = blank_finish |
|
2465 return [], next_state, [] |
|
2466 |
|
2467 |
|
2468 class OptionList(SpecializedBody): |
|
2469 |
|
2470 """Second and subsequent option_list option_list_items.""" |
|
2471 |
|
2472 def option_marker(self, match, context, next_state): |
|
2473 """Option list item.""" |
|
2474 try: |
|
2475 option_list_item, blank_finish = self.option_list_item(match) |
|
2476 except MarkupError, (message, lineno): |
|
2477 self.invalid_input() |
|
2478 self.parent += option_list_item |
|
2479 self.blank_finish = blank_finish |
|
2480 return [], next_state, [] |
|
2481 |
|
2482 |
|
2483 class RFC2822List(SpecializedBody, RFC2822Body): |
|
2484 |
|
2485 """Second and subsequent RFC2822-style field_list fields.""" |
|
2486 |
|
2487 patterns = RFC2822Body.patterns |
|
2488 initial_transitions = RFC2822Body.initial_transitions |
|
2489 |
|
2490 def rfc2822(self, match, context, next_state): |
|
2491 """RFC2822-style field list item.""" |
|
2492 field, blank_finish = self.rfc2822_field(match) |
|
2493 self.parent += field |
|
2494 self.blank_finish = blank_finish |
|
2495 return [], 'RFC2822List', [] |
|
2496 |
|
2497 blank = SpecializedBody.invalid_input |
|
2498 |
|
2499 |
|
2500 class ExtensionOptions(FieldList): |
|
2501 |
|
2502 """ |
|
2503 Parse field_list fields for extension options. |
|
2504 |
|
2505 No nested parsing is done (including inline markup parsing). |
|
2506 """ |
|
2507 |
|
2508 def parse_field_body(self, indented, offset, node): |
|
2509 """Override `Body.parse_field_body` for simpler parsing.""" |
|
2510 lines = [] |
|
2511 for line in list(indented) + ['']: |
|
2512 if line.strip(): |
|
2513 lines.append(line) |
|
2514 elif lines: |
|
2515 text = '\n'.join(lines) |
|
2516 node += nodes.paragraph(text, text) |
|
2517 lines = [] |
|
2518 |
|
2519 |
|
2520 class LineBlock(SpecializedBody): |
|
2521 |
|
2522 """Second and subsequent lines of a line_block.""" |
|
2523 |
|
2524 blank = SpecializedBody.invalid_input |
|
2525 |
|
2526 def line_block(self, match, context, next_state): |
|
2527 """New line of line block.""" |
|
2528 lineno = self.state_machine.abs_line_number() |
|
2529 line, messages, blank_finish = self.line_block_line(match, lineno) |
|
2530 self.parent += line |
|
2531 self.parent.parent += messages |
|
2532 self.blank_finish = blank_finish |
|
2533 return [], next_state, [] |
|
2534 |
|
2535 |
|
2536 class Explicit(SpecializedBody): |
|
2537 |
|
2538 """Second and subsequent explicit markup construct.""" |
|
2539 |
|
2540 def explicit_markup(self, match, context, next_state): |
|
2541 """Footnotes, hyperlink targets, directives, comments.""" |
|
2542 nodelist, blank_finish = self.explicit_construct(match) |
|
2543 self.parent += nodelist |
|
2544 self.blank_finish = blank_finish |
|
2545 return [], next_state, [] |
|
2546 |
|
2547 def anonymous(self, match, context, next_state): |
|
2548 """Anonymous hyperlink targets.""" |
|
2549 nodelist, blank_finish = self.anonymous_target(match) |
|
2550 self.parent += nodelist |
|
2551 self.blank_finish = blank_finish |
|
2552 return [], next_state, [] |
|
2553 |
|
2554 blank = SpecializedBody.invalid_input |
|
2555 |
|
2556 |
|
2557 class SubstitutionDef(Body): |
|
2558 |
|
2559 """ |
|
2560 Parser for the contents of a substitution_definition element. |
|
2561 """ |
|
2562 |
|
2563 patterns = { |
|
2564 'embedded_directive': re.compile(r'(%s)::( +|$)' |
|
2565 % Inliner.simplename, re.UNICODE), |
|
2566 'text': r''} |
|
2567 initial_transitions = ['embedded_directive', 'text'] |
|
2568 |
|
2569 def embedded_directive(self, match, context, next_state): |
|
2570 nodelist, blank_finish = self.directive(match, |
|
2571 alt=self.parent['names'][0]) |
|
2572 self.parent += nodelist |
|
2573 if not self.state_machine.at_eof(): |
|
2574 self.blank_finish = blank_finish |
|
2575 raise EOFError |
|
2576 |
|
2577 def text(self, match, context, next_state): |
|
2578 if not self.state_machine.at_eof(): |
|
2579 self.blank_finish = self.state_machine.is_next_line_blank() |
|
2580 raise EOFError |
|
2581 |
|
2582 |
|
2583 class Text(RSTState): |
|
2584 |
|
2585 """ |
|
2586 Classifier of second line of a text block. |
|
2587 |
|
2588 Could be a paragraph, a definition list item, or a title. |
|
2589 """ |
|
2590 |
|
2591 patterns = {'underline': Body.patterns['line'], |
|
2592 'text': r''} |
|
2593 initial_transitions = [('underline', 'Body'), ('text', 'Body')] |
|
2594 |
|
2595 def blank(self, match, context, next_state): |
|
2596 """End of paragraph.""" |
|
2597 paragraph, literalnext = self.paragraph( |
|
2598 context, self.state_machine.abs_line_number() - 1) |
|
2599 self.parent += paragraph |
|
2600 if literalnext: |
|
2601 self.parent += self.literal_block() |
|
2602 return [], 'Body', [] |
|
2603 |
|
2604 def eof(self, context): |
|
2605 if context: |
|
2606 self.blank(None, context, None) |
|
2607 return [] |
|
2608 |
|
2609 def indent(self, match, context, next_state): |
|
2610 """Definition list item.""" |
|
2611 definitionlist = nodes.definition_list() |
|
2612 definitionlistitem, blank_finish = self.definition_list_item(context) |
|
2613 definitionlist += definitionlistitem |
|
2614 self.parent += definitionlist |
|
2615 offset = self.state_machine.line_offset + 1 # next line |
|
2616 newline_offset, blank_finish = self.nested_list_parse( |
|
2617 self.state_machine.input_lines[offset:], |
|
2618 input_offset=self.state_machine.abs_line_offset() + 1, |
|
2619 node=definitionlist, initial_state='DefinitionList', |
|
2620 blank_finish=blank_finish, blank_finish_state='Definition') |
|
2621 self.goto_line(newline_offset) |
|
2622 if not blank_finish: |
|
2623 self.parent += self.unindent_warning('Definition list') |
|
2624 return [], 'Body', [] |
|
2625 |
|
2626 def underline(self, match, context, next_state): |
|
2627 """Section title.""" |
|
2628 lineno = self.state_machine.abs_line_number() |
|
2629 title = context[0].rstrip() |
|
2630 underline = match.string.rstrip() |
|
2631 source = title + '\n' + underline |
|
2632 messages = [] |
|
2633 if column_width(title) > len(underline): |
|
2634 if len(underline) < 4: |
|
2635 if self.state_machine.match_titles: |
|
2636 msg = self.reporter.info( |
|
2637 'Possible title underline, too short for the title.\n' |
|
2638 "Treating it as ordinary text because it's so short.", |
|
2639 line=lineno) |
|
2640 self.parent += msg |
|
2641 raise statemachine.TransitionCorrection('text') |
|
2642 else: |
|
2643 blocktext = context[0] + '\n' + self.state_machine.line |
|
2644 msg = self.reporter.warning( |
|
2645 'Title underline too short.', |
|
2646 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
2647 messages.append(msg) |
|
2648 if not self.state_machine.match_titles: |
|
2649 blocktext = context[0] + '\n' + self.state_machine.line |
|
2650 msg = self.reporter.severe( |
|
2651 'Unexpected section title.', |
|
2652 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
2653 self.parent += messages |
|
2654 self.parent += msg |
|
2655 return [], next_state, [] |
|
2656 style = underline[0] |
|
2657 context[:] = [] |
|
2658 self.section(title, source, style, lineno - 1, messages) |
|
2659 return [], next_state, [] |
|
2660 |
|
2661 def text(self, match, context, next_state): |
|
2662 """Paragraph.""" |
|
2663 startline = self.state_machine.abs_line_number() - 1 |
|
2664 msg = None |
|
2665 try: |
|
2666 block = self.state_machine.get_text_block(flush_left=1) |
|
2667 except statemachine.UnexpectedIndentationError, instance: |
|
2668 block, source, lineno = instance.args |
|
2669 msg = self.reporter.error('Unexpected indentation.', |
|
2670 source=source, line=lineno) |
|
2671 lines = context + list(block) |
|
2672 paragraph, literalnext = self.paragraph(lines, startline) |
|
2673 self.parent += paragraph |
|
2674 self.parent += msg |
|
2675 if literalnext: |
|
2676 try: |
|
2677 self.state_machine.next_line() |
|
2678 except EOFError: |
|
2679 pass |
|
2680 self.parent += self.literal_block() |
|
2681 return [], next_state, [] |
|
2682 |
|
2683 def literal_block(self): |
|
2684 """Return a list of nodes.""" |
|
2685 indented, indent, offset, blank_finish = \ |
|
2686 self.state_machine.get_indented() |
|
2687 while indented and not indented[-1].strip(): |
|
2688 indented.trim_end() |
|
2689 if not indented: |
|
2690 return self.quoted_literal_block() |
|
2691 data = '\n'.join(indented) |
|
2692 literal_block = nodes.literal_block(data, data) |
|
2693 literal_block.line = offset + 1 |
|
2694 nodelist = [literal_block] |
|
2695 if not blank_finish: |
|
2696 nodelist.append(self.unindent_warning('Literal block')) |
|
2697 return nodelist |
|
2698 |
|
2699 def quoted_literal_block(self): |
|
2700 abs_line_offset = self.state_machine.abs_line_offset() |
|
2701 offset = self.state_machine.line_offset |
|
2702 parent_node = nodes.Element() |
|
2703 new_abs_offset = self.nested_parse( |
|
2704 self.state_machine.input_lines[offset:], |
|
2705 input_offset=abs_line_offset, node=parent_node, match_titles=0, |
|
2706 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), |
|
2707 'initial_state': 'QuotedLiteralBlock'}) |
|
2708 self.goto_line(new_abs_offset) |
|
2709 return parent_node.children |
|
2710 |
|
2711 def definition_list_item(self, termline): |
|
2712 indented, indent, line_offset, blank_finish = \ |
|
2713 self.state_machine.get_indented() |
|
2714 definitionlistitem = nodes.definition_list_item( |
|
2715 '\n'.join(termline + list(indented))) |
|
2716 lineno = self.state_machine.abs_line_number() - 1 |
|
2717 definitionlistitem.line = lineno |
|
2718 termlist, messages = self.term(termline, lineno) |
|
2719 definitionlistitem += termlist |
|
2720 definition = nodes.definition('', *messages) |
|
2721 definitionlistitem += definition |
|
2722 if termline[0][-2:] == '::': |
|
2723 definition += self.reporter.info( |
|
2724 'Blank line missing before literal block (after the "::")? ' |
|
2725 'Interpreted as a definition list item.', line=line_offset+1) |
|
2726 self.nested_parse(indented, input_offset=line_offset, node=definition) |
|
2727 return definitionlistitem, blank_finish |
|
2728 |
|
2729 classifier_delimiter = re.compile(' +: +') |
|
2730 |
|
2731 def term(self, lines, lineno): |
|
2732 """Return a definition_list's term and optional classifiers.""" |
|
2733 assert len(lines) == 1 |
|
2734 text_nodes, messages = self.inline_text(lines[0], lineno) |
|
2735 term_node = nodes.term() |
|
2736 node_list = [term_node] |
|
2737 for i in range(len(text_nodes)): |
|
2738 node = text_nodes[i] |
|
2739 if isinstance(node, nodes.Text): |
|
2740 parts = self.classifier_delimiter.split(node.rawsource) |
|
2741 if len(parts) == 1: |
|
2742 node_list[-1] += node |
|
2743 else: |
|
2744 |
|
2745 node_list[-1] += nodes.Text(parts[0].rstrip()) |
|
2746 for part in parts[1:]: |
|
2747 classifier_node = nodes.classifier('', part) |
|
2748 node_list.append(classifier_node) |
|
2749 else: |
|
2750 node_list[-1] += node |
|
2751 return node_list, messages |
|
2752 |
|
2753 |
|
2754 class SpecializedText(Text): |
|
2755 |
|
2756 """ |
|
2757 Superclass for second and subsequent lines of Text-variants. |
|
2758 |
|
2759 All transition methods are disabled. Override individual methods in |
|
2760 subclasses to re-enable. |
|
2761 """ |
|
2762 |
|
2763 def eof(self, context): |
|
2764 """Incomplete construct.""" |
|
2765 return [] |
|
2766 |
|
2767 def invalid_input(self, match=None, context=None, next_state=None): |
|
2768 """Not a compound element member. Abort this state machine.""" |
|
2769 raise EOFError |
|
2770 |
|
2771 blank = invalid_input |
|
2772 indent = invalid_input |
|
2773 underline = invalid_input |
|
2774 text = invalid_input |
|
2775 |
|
2776 |
|
2777 class Definition(SpecializedText): |
|
2778 |
|
2779 """Second line of potential definition_list_item.""" |
|
2780 |
|
2781 def eof(self, context): |
|
2782 """Not a definition.""" |
|
2783 self.state_machine.previous_line(2) # so parent SM can reassess |
|
2784 return [] |
|
2785 |
|
2786 def indent(self, match, context, next_state): |
|
2787 """Definition list item.""" |
|
2788 definitionlistitem, blank_finish = self.definition_list_item(context) |
|
2789 self.parent += definitionlistitem |
|
2790 self.blank_finish = blank_finish |
|
2791 return [], 'DefinitionList', [] |
|
2792 |
|
2793 |
|
2794 class Line(SpecializedText): |
|
2795 |
|
2796 """ |
|
2797 Second line of over- & underlined section title or transition marker. |
|
2798 """ |
|
2799 |
|
2800 eofcheck = 1 # @@@ ??? |
|
2801 """Set to 0 while parsing sections, so that we don't catch the EOF.""" |
|
2802 |
|
2803 def eof(self, context): |
|
2804 """Transition marker at end of section or document.""" |
|
2805 marker = context[0].strip() |
|
2806 if self.memo.section_bubble_up_kludge: |
|
2807 self.memo.section_bubble_up_kludge = 0 |
|
2808 elif len(marker) < 4: |
|
2809 self.state_correction(context) |
|
2810 if self.eofcheck: # ignore EOFError with sections |
|
2811 lineno = self.state_machine.abs_line_number() - 1 |
|
2812 transition = nodes.transition(rawsource=context[0]) |
|
2813 transition.line = lineno |
|
2814 self.parent += transition |
|
2815 self.eofcheck = 1 |
|
2816 return [] |
|
2817 |
|
2818 def blank(self, match, context, next_state): |
|
2819 """Transition marker.""" |
|
2820 lineno = self.state_machine.abs_line_number() - 1 |
|
2821 marker = context[0].strip() |
|
2822 if len(marker) < 4: |
|
2823 self.state_correction(context) |
|
2824 transition = nodes.transition(rawsource=marker) |
|
2825 transition.line = lineno |
|
2826 self.parent += transition |
|
2827 return [], 'Body', [] |
|
2828 |
|
2829 def text(self, match, context, next_state): |
|
2830 """Potential over- & underlined title.""" |
|
2831 lineno = self.state_machine.abs_line_number() - 1 |
|
2832 overline = context[0] |
|
2833 title = match.string |
|
2834 underline = '' |
|
2835 try: |
|
2836 underline = self.state_machine.next_line() |
|
2837 except EOFError: |
|
2838 blocktext = overline + '\n' + title |
|
2839 if len(overline.rstrip()) < 4: |
|
2840 self.short_overline(context, blocktext, lineno, 2) |
|
2841 else: |
|
2842 msg = self.reporter.severe( |
|
2843 'Incomplete section title.', |
|
2844 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
2845 self.parent += msg |
|
2846 return [], 'Body', [] |
|
2847 source = '%s\n%s\n%s' % (overline, title, underline) |
|
2848 overline = overline.rstrip() |
|
2849 underline = underline.rstrip() |
|
2850 if not self.transitions['underline'][0].match(underline): |
|
2851 blocktext = overline + '\n' + title + '\n' + underline |
|
2852 if len(overline.rstrip()) < 4: |
|
2853 self.short_overline(context, blocktext, lineno, 2) |
|
2854 else: |
|
2855 msg = self.reporter.severe( |
|
2856 'Missing matching underline for section title overline.', |
|
2857 nodes.literal_block(source, source), line=lineno) |
|
2858 self.parent += msg |
|
2859 return [], 'Body', [] |
|
2860 elif overline != underline: |
|
2861 blocktext = overline + '\n' + title + '\n' + underline |
|
2862 if len(overline.rstrip()) < 4: |
|
2863 self.short_overline(context, blocktext, lineno, 2) |
|
2864 else: |
|
2865 msg = self.reporter.severe( |
|
2866 'Title overline & underline mismatch.', |
|
2867 nodes.literal_block(source, source), line=lineno) |
|
2868 self.parent += msg |
|
2869 return [], 'Body', [] |
|
2870 title = title.rstrip() |
|
2871 messages = [] |
|
2872 if column_width(title) > len(overline): |
|
2873 blocktext = overline + '\n' + title + '\n' + underline |
|
2874 if len(overline.rstrip()) < 4: |
|
2875 self.short_overline(context, blocktext, lineno, 2) |
|
2876 else: |
|
2877 msg = self.reporter.warning( |
|
2878 'Title overline too short.', |
|
2879 nodes.literal_block(source, source), line=lineno) |
|
2880 messages.append(msg) |
|
2881 style = (overline[0], underline[0]) |
|
2882 self.eofcheck = 0 # @@@ not sure this is correct |
|
2883 self.section(title.lstrip(), source, style, lineno + 1, messages) |
|
2884 self.eofcheck = 1 |
|
2885 return [], 'Body', [] |
|
2886 |
|
2887 indent = text # indented title |
|
2888 |
|
2889 def underline(self, match, context, next_state): |
|
2890 overline = context[0] |
|
2891 blocktext = overline + '\n' + self.state_machine.line |
|
2892 lineno = self.state_machine.abs_line_number() - 1 |
|
2893 if len(overline.rstrip()) < 4: |
|
2894 self.short_overline(context, blocktext, lineno, 1) |
|
2895 msg = self.reporter.error( |
|
2896 'Invalid section title or transition marker.', |
|
2897 nodes.literal_block(blocktext, blocktext), line=lineno) |
|
2898 self.parent += msg |
|
2899 return [], 'Body', [] |
|
2900 |
|
2901 def short_overline(self, context, blocktext, lineno, lines=1): |
|
2902 msg = self.reporter.info( |
|
2903 'Possible incomplete section title.\nTreating the overline as ' |
|
2904 "ordinary text because it's so short.", line=lineno) |
|
2905 self.parent += msg |
|
2906 self.state_correction(context, lines) |
|
2907 |
|
2908 def state_correction(self, context, lines=1): |
|
2909 self.state_machine.previous_line(lines) |
|
2910 context[:] = [] |
|
2911 raise statemachine.StateCorrection('Body', 'text') |
|
2912 |
|
2913 |
|
2914 class QuotedLiteralBlock(RSTState): |
|
2915 |
|
2916 """ |
|
2917 Nested parse handler for quoted (unindented) literal blocks. |
|
2918 |
|
2919 Special-purpose. Not for inclusion in `state_classes`. |
|
2920 """ |
|
2921 |
|
2922 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, |
|
2923 'text': r''} |
|
2924 initial_transitions = ('initial_quoted', 'text') |
|
2925 |
|
2926 def __init__(self, state_machine, debug=0): |
|
2927 RSTState.__init__(self, state_machine, debug) |
|
2928 self.messages = [] |
|
2929 self.initial_lineno = None |
|
2930 |
|
2931 def blank(self, match, context, next_state): |
|
2932 if context: |
|
2933 raise EOFError |
|
2934 else: |
|
2935 return context, next_state, [] |
|
2936 |
|
2937 def eof(self, context): |
|
2938 if context: |
|
2939 text = '\n'.join(context) |
|
2940 literal_block = nodes.literal_block(text, text) |
|
2941 literal_block.line = self.initial_lineno |
|
2942 self.parent += literal_block |
|
2943 else: |
|
2944 self.parent += self.reporter.warning( |
|
2945 'Literal block expected; none found.', |
|
2946 line=self.state_machine.abs_line_number()) |
|
2947 self.state_machine.previous_line() |
|
2948 self.parent += self.messages |
|
2949 return [] |
|
2950 |
|
2951 def indent(self, match, context, next_state): |
|
2952 assert context, ('QuotedLiteralBlock.indent: context should not ' |
|
2953 'be empty!') |
|
2954 self.messages.append( |
|
2955 self.reporter.error('Unexpected indentation.', |
|
2956 line=self.state_machine.abs_line_number())) |
|
2957 self.state_machine.previous_line() |
|
2958 raise EOFError |
|
2959 |
|
2960 def initial_quoted(self, match, context, next_state): |
|
2961 """Match arbitrary quote character on the first line only.""" |
|
2962 self.remove_transition('initial_quoted') |
|
2963 quote = match.string[0] |
|
2964 pattern = re.compile(re.escape(quote)) |
|
2965 # New transition matches consistent quotes only: |
|
2966 self.add_transition('quoted', |
|
2967 (pattern, self.quoted, self.__class__.__name__)) |
|
2968 self.initial_lineno = self.state_machine.abs_line_number() |
|
2969 return [match.string], next_state, [] |
|
2970 |
|
2971 def quoted(self, match, context, next_state): |
|
2972 """Match consistent quotes on subsequent lines.""" |
|
2973 context.append(match.string) |
|
2974 return context, next_state, [] |
|
2975 |
|
2976 def text(self, match, context, next_state): |
|
2977 if context: |
|
2978 self.messages.append( |
|
2979 self.reporter.error('Inconsistent literal block quoting.', |
|
2980 line=self.state_machine.abs_line_number())) |
|
2981 self.state_machine.previous_line() |
|
2982 raise EOFError |
|
2983 |
|
2984 |
|
2985 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, |
|
2986 OptionList, LineBlock, ExtensionOptions, Explicit, Text, |
|
2987 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) |
|
2988 """Standard set of State classes used to start `RSTStateMachine`.""" |