symbian-qemu-0.9.1-12/python-2.6.1/Lib/textwrap.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 """Text wrapping and filling.
       
     2 """
       
     3 
       
     4 # Copyright (C) 1999-2001 Gregory P. Ward.
       
     5 # Copyright (C) 2002, 2003 Python Software Foundation.
       
     6 # Written by Greg Ward <gward@python.net>
       
     7 
       
     8 __revision__ = "$Id: textwrap.py 65349 2008-08-01 01:34:05Z brett.cannon $"
       
     9 
       
    10 import string, re
       
    11 
       
    12 # Do the right thing with boolean values for all known Python versions
       
    13 # (so this module can be copied to projects that don't depend on Python
       
    14 # 2.3, e.g. Optik and Docutils) by uncommenting the block of code below.
       
    15 #try:
       
    16 #    True, False
       
    17 #except NameError:
       
    18 #    (True, False) = (1, 0)
       
    19 
       
    20 __all__ = ['TextWrapper', 'wrap', 'fill']
       
    21 
       
    22 # Hardcode the recognized whitespace characters to the US-ASCII
       
    23 # whitespace characters.  The main reason for doing this is that in
       
    24 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
       
    25 # that character winds up in string.whitespace.  Respecting
       
    26 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the
       
    27 # same as any other whitespace char, which is clearly wrong (it's a
       
    28 # *non-breaking* space), 2) possibly cause problems with Unicode,
       
    29 # since 0xa0 is not in range(128).
       
    30 _whitespace = '\t\n\x0b\x0c\r '
       
    31 
       
    32 class TextWrapper:
       
    33     """
       
    34     Object for wrapping/filling text.  The public interface consists of
       
    35     the wrap() and fill() methods; the other methods are just there for
       
    36     subclasses to override in order to tweak the default behaviour.
       
    37     If you want to completely replace the main wrapping algorithm,
       
    38     you'll probably have to override _wrap_chunks().
       
    39 
       
    40     Several instance attributes control various aspects of wrapping:
       
    41       width (default: 70)
       
    42         the maximum width of wrapped lines (unless break_long_words
       
    43         is false)
       
    44       initial_indent (default: "")
       
    45         string that will be prepended to the first line of wrapped
       
    46         output.  Counts towards the line's width.
       
    47       subsequent_indent (default: "")
       
    48         string that will be prepended to all lines save the first
       
    49         of wrapped output; also counts towards each line's width.
       
    50       expand_tabs (default: true)
       
    51         Expand tabs in input text to spaces before further processing.
       
    52         Each tab will become 1 .. 8 spaces, depending on its position in
       
    53         its line.  If false, each tab is treated as a single character.
       
    54       replace_whitespace (default: true)
       
    55         Replace all whitespace characters in the input text by spaces
       
    56         after tab expansion.  Note that if expand_tabs is false and
       
    57         replace_whitespace is true, every tab will be converted to a
       
    58         single space!
       
    59       fix_sentence_endings (default: false)
       
    60         Ensure that sentence-ending punctuation is always followed
       
    61         by two spaces.  Off by default because the algorithm is
       
    62         (unavoidably) imperfect.
       
    63       break_long_words (default: true)
       
    64         Break words longer than 'width'.  If false, those words will not
       
    65         be broken, and some lines might be longer than 'width'.
       
    66       break_on_hyphens (default: true)
       
    67         Allow breaking hyphenated words. If true, wrapping will occur
       
    68         preferably on whitespaces and right after hyphens part of
       
    69         compound words.
       
    70       drop_whitespace (default: true)
       
    71         Drop leading and trailing whitespace from lines.
       
    72     """
       
    73 
       
    74     whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
       
    75 
       
    76     unicode_whitespace_trans = {}
       
    77     uspace = ord(u' ')
       
    78     for x in map(ord, _whitespace):
       
    79         unicode_whitespace_trans[x] = uspace
       
    80 
       
    81     # This funky little regex is just the trick for splitting
       
    82     # text up into word-wrappable chunks.  E.g.
       
    83     #   "Hello there -- you goof-ball, use the -b option!"
       
    84     # splits into
       
    85     #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
       
    86     # (after stripping out empty strings).
       
    87     wordsep_re = re.compile(
       
    88         r'(\s+|'                                  # any whitespace
       
    89         r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
       
    90         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
       
    91 
       
    92     # This less funky little regex just split on recognized spaces. E.g.
       
    93     #   "Hello there -- you goof-ball, use the -b option!"
       
    94     # splits into
       
    95     #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
       
    96     wordsep_simple_re = re.compile(r'(\s+)')
       
    97 
       
    98     # XXX this is not locale- or charset-aware -- string.lowercase
       
    99     # is US-ASCII only (and therefore English-only)
       
   100     sentence_end_re = re.compile(r'[%s]'              # lowercase letter
       
   101                                  r'[\.\!\?]'          # sentence-ending punct.
       
   102                                  r'[\"\']?'           # optional end-of-quote
       
   103                                  r'\Z'                # end of chunk
       
   104                                  % string.lowercase)
       
   105 
       
   106 
       
   107     def __init__(self,
       
   108                  width=70,
       
   109                  initial_indent="",
       
   110                  subsequent_indent="",
       
   111                  expand_tabs=True,
       
   112                  replace_whitespace=True,
       
   113                  fix_sentence_endings=False,
       
   114                  break_long_words=True,
       
   115                  drop_whitespace=True,
       
   116                  break_on_hyphens=True):
       
   117         self.width = width
       
   118         self.initial_indent = initial_indent
       
   119         self.subsequent_indent = subsequent_indent
       
   120         self.expand_tabs = expand_tabs
       
   121         self.replace_whitespace = replace_whitespace
       
   122         self.fix_sentence_endings = fix_sentence_endings
       
   123         self.break_long_words = break_long_words
       
   124         self.drop_whitespace = drop_whitespace
       
   125         self.break_on_hyphens = break_on_hyphens
       
   126 
       
   127 
       
   128     # -- Private methods -----------------------------------------------
       
   129     # (possibly useful for subclasses to override)
       
   130 
       
   131     def _munge_whitespace(self, text):
       
   132         """_munge_whitespace(text : string) -> string
       
   133 
       
   134         Munge whitespace in text: expand tabs and convert all other
       
   135         whitespace characters to spaces.  Eg. " foo\tbar\n\nbaz"
       
   136         becomes " foo    bar  baz".
       
   137         """
       
   138         if self.expand_tabs:
       
   139             text = text.expandtabs()
       
   140         if self.replace_whitespace:
       
   141             if isinstance(text, str):
       
   142                 text = text.translate(self.whitespace_trans)
       
   143             elif isinstance(text, unicode):
       
   144                 text = text.translate(self.unicode_whitespace_trans)
       
   145         return text
       
   146 
       
   147 
       
   148     def _split(self, text):
       
   149         """_split(text : string) -> [string]
       
   150 
       
   151         Split the text to wrap into indivisible chunks.  Chunks are
       
   152         not quite the same as words; see wrap_chunks() for full
       
   153         details.  As an example, the text
       
   154           Look, goof-ball -- use the -b option!
       
   155         breaks into the following chunks:
       
   156           'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
       
   157           'use', ' ', 'the', ' ', '-b', ' ', 'option!'
       
   158         if break_on_hyphens is True, or in:
       
   159           'Look,', ' ', 'goof-ball', ' ', '--', ' ',
       
   160           'use', ' ', 'the', ' ', '-b', ' ', option!'
       
   161         otherwise.
       
   162         """
       
   163         if self.break_on_hyphens is True:
       
   164             chunks = self.wordsep_re.split(text)
       
   165         else:
       
   166             chunks = self.wordsep_simple_re.split(text)
       
   167         chunks = filter(None, chunks)  # remove empty chunks
       
   168         return chunks
       
   169 
       
   170     def _fix_sentence_endings(self, chunks):
       
   171         """_fix_sentence_endings(chunks : [string])
       
   172 
       
   173         Correct for sentence endings buried in 'chunks'.  Eg. when the
       
   174         original text contains "... foo.\nBar ...", munge_whitespace()
       
   175         and split() will convert that to [..., "foo.", " ", "Bar", ...]
       
   176         which has one too few spaces; this method simply changes the one
       
   177         space to two.
       
   178         """
       
   179         i = 0
       
   180         pat = self.sentence_end_re
       
   181         while i < len(chunks)-1:
       
   182             if chunks[i+1] == " " and pat.search(chunks[i]):
       
   183                 chunks[i+1] = "  "
       
   184                 i += 2
       
   185             else:
       
   186                 i += 1
       
   187 
       
   188     def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
       
   189         """_handle_long_word(chunks : [string],
       
   190                              cur_line : [string],
       
   191                              cur_len : int, width : int)
       
   192 
       
   193         Handle a chunk of text (most likely a word, not whitespace) that
       
   194         is too long to fit in any line.
       
   195         """
       
   196         # Figure out when indent is larger than the specified width, and make
       
   197         # sure at least one character is stripped off on every pass
       
   198         if width < 1:
       
   199             space_left = 1
       
   200         else:
       
   201             space_left = width - cur_len
       
   202 
       
   203         # If we're allowed to break long words, then do so: put as much
       
   204         # of the next chunk onto the current line as will fit.
       
   205         if self.break_long_words:
       
   206             cur_line.append(reversed_chunks[-1][:space_left])
       
   207             reversed_chunks[-1] = reversed_chunks[-1][space_left:]
       
   208 
       
   209         # Otherwise, we have to preserve the long word intact.  Only add
       
   210         # it to the current line if there's nothing already there --
       
   211         # that minimizes how much we violate the width constraint.
       
   212         elif not cur_line:
       
   213             cur_line.append(reversed_chunks.pop())
       
   214 
       
   215         # If we're not allowed to break long words, and there's already
       
   216         # text on the current line, do nothing.  Next time through the
       
   217         # main loop of _wrap_chunks(), we'll wind up here again, but
       
   218         # cur_len will be zero, so the next line will be entirely
       
   219         # devoted to the long word that we can't handle right now.
       
   220 
       
   221     def _wrap_chunks(self, chunks):
       
   222         """_wrap_chunks(chunks : [string]) -> [string]
       
   223 
       
   224         Wrap a sequence of text chunks and return a list of lines of
       
   225         length 'self.width' or less.  (If 'break_long_words' is false,
       
   226         some lines may be longer than this.)  Chunks correspond roughly
       
   227         to words and the whitespace between them: each chunk is
       
   228         indivisible (modulo 'break_long_words'), but a line break can
       
   229         come between any two chunks.  Chunks should not have internal
       
   230         whitespace; ie. a chunk is either all whitespace or a "word".
       
   231         Whitespace chunks will be removed from the beginning and end of
       
   232         lines, but apart from that whitespace is preserved.
       
   233         """
       
   234         lines = []
       
   235         if self.width <= 0:
       
   236             raise ValueError("invalid width %r (must be > 0)" % self.width)
       
   237 
       
   238         # Arrange in reverse order so items can be efficiently popped
       
   239         # from a stack of chucks.
       
   240         chunks.reverse()
       
   241 
       
   242         while chunks:
       
   243 
       
   244             # Start the list of chunks that will make up the current line.
       
   245             # cur_len is just the length of all the chunks in cur_line.
       
   246             cur_line = []
       
   247             cur_len = 0
       
   248 
       
   249             # Figure out which static string will prefix this line.
       
   250             if lines:
       
   251                 indent = self.subsequent_indent
       
   252             else:
       
   253                 indent = self.initial_indent
       
   254 
       
   255             # Maximum width for this line.
       
   256             width = self.width - len(indent)
       
   257 
       
   258             # First chunk on line is whitespace -- drop it, unless this
       
   259             # is the very beginning of the text (ie. no lines started yet).
       
   260             if self.drop_whitespace and chunks[-1].strip() == '' and lines:
       
   261                 del chunks[-1]
       
   262 
       
   263             while chunks:
       
   264                 l = len(chunks[-1])
       
   265 
       
   266                 # Can at least squeeze this chunk onto the current line.
       
   267                 if cur_len + l <= width:
       
   268                     cur_line.append(chunks.pop())
       
   269                     cur_len += l
       
   270 
       
   271                 # Nope, this line is full.
       
   272                 else:
       
   273                     break
       
   274 
       
   275             # The current line is full, and the next chunk is too big to
       
   276             # fit on *any* line (not just this one).
       
   277             if chunks and len(chunks[-1]) > width:
       
   278                 self._handle_long_word(chunks, cur_line, cur_len, width)
       
   279 
       
   280             # If the last chunk on this line is all whitespace, drop it.
       
   281             if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
       
   282                 del cur_line[-1]
       
   283 
       
   284             # Convert current line back to a string and store it in list
       
   285             # of all lines (return value).
       
   286             if cur_line:
       
   287                 lines.append(indent + ''.join(cur_line))
       
   288 
       
   289         return lines
       
   290 
       
   291 
       
   292     # -- Public interface ----------------------------------------------
       
   293 
       
   294     def wrap(self, text):
       
   295         """wrap(text : string) -> [string]
       
   296 
       
   297         Reformat the single paragraph in 'text' so it fits in lines of
       
   298         no more than 'self.width' columns, and return a list of wrapped
       
   299         lines.  Tabs in 'text' are expanded with string.expandtabs(),
       
   300         and all other whitespace characters (including newline) are
       
   301         converted to space.
       
   302         """
       
   303         text = self._munge_whitespace(text)
       
   304         chunks = self._split(text)
       
   305         if self.fix_sentence_endings:
       
   306             self._fix_sentence_endings(chunks)
       
   307         return self._wrap_chunks(chunks)
       
   308 
       
   309     def fill(self, text):
       
   310         """fill(text : string) -> string
       
   311 
       
   312         Reformat the single paragraph in 'text' to fit in lines of no
       
   313         more than 'self.width' columns, and return a new string
       
   314         containing the entire wrapped paragraph.
       
   315         """
       
   316         return "\n".join(self.wrap(text))
       
   317 
       
   318 
       
   319 # -- Convenience interface ---------------------------------------------
       
   320 
       
   321 def wrap(text, width=70, **kwargs):
       
   322     """Wrap a single paragraph of text, returning a list of wrapped lines.
       
   323 
       
   324     Reformat the single paragraph in 'text' so it fits in lines of no
       
   325     more than 'width' columns, and return a list of wrapped lines.  By
       
   326     default, tabs in 'text' are expanded with string.expandtabs(), and
       
   327     all other whitespace characters (including newline) are converted to
       
   328     space.  See TextWrapper class for available keyword args to customize
       
   329     wrapping behaviour.
       
   330     """
       
   331     w = TextWrapper(width=width, **kwargs)
       
   332     return w.wrap(text)
       
   333 
       
   334 def fill(text, width=70, **kwargs):
       
   335     """Fill a single paragraph of text, returning a new string.
       
   336 
       
   337     Reformat the single paragraph in 'text' to fit in lines of no more
       
   338     than 'width' columns, and return a new string containing the entire
       
   339     wrapped paragraph.  As with wrap(), tabs are expanded and other
       
   340     whitespace characters converted to space.  See TextWrapper class for
       
   341     available keyword args to customize wrapping behaviour.
       
   342     """
       
   343     w = TextWrapper(width=width, **kwargs)
       
   344     return w.fill(text)
       
   345 
       
   346 
       
   347 # -- Loosely related functionality -------------------------------------
       
   348 
       
   349 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
       
   350 _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
       
   351 
       
   352 def dedent(text):
       
   353     """Remove any common leading whitespace from every line in `text`.
       
   354 
       
   355     This can be used to make triple-quoted strings line up with the left
       
   356     edge of the display, while still presenting them in the source code
       
   357     in indented form.
       
   358 
       
   359     Note that tabs and spaces are both treated as whitespace, but they
       
   360     are not equal: the lines "  hello" and "\thello" are
       
   361     considered to have no common leading whitespace.  (This behaviour is
       
   362     new in Python 2.5; older versions of this module incorrectly
       
   363     expanded tabs before searching for common leading whitespace.)
       
   364     """
       
   365     # Look for the longest leading string of spaces and tabs common to
       
   366     # all lines.
       
   367     margin = None
       
   368     text = _whitespace_only_re.sub('', text)
       
   369     indents = _leading_whitespace_re.findall(text)
       
   370     for indent in indents:
       
   371         if margin is None:
       
   372             margin = indent
       
   373 
       
   374         # Current line more deeply indented than previous winner:
       
   375         # no change (previous winner is still on top).
       
   376         elif indent.startswith(margin):
       
   377             pass
       
   378 
       
   379         # Current line consistent with and no deeper than previous winner:
       
   380         # it's the new winner.
       
   381         elif margin.startswith(indent):
       
   382             margin = indent
       
   383 
       
   384         # Current line and previous winner have no common whitespace:
       
   385         # there is no margin.
       
   386         else:
       
   387             margin = ""
       
   388             break
       
   389 
       
   390     # sanity check (testing/debugging only)
       
   391     if 0 and margin:
       
   392         for line in text.split("\n"):
       
   393             assert not line or line.startswith(margin), \
       
   394                    "line = %r, margin = %r" % (line, margin)
       
   395 
       
   396     if margin:
       
   397         text = re.sub(r'(?m)^' + margin, '', text)
       
   398     return text
       
   399 
       
   400 if __name__ == "__main__":
       
   401     #print dedent("\tfoo\n\tbar")
       
   402     #print dedent("  \thello there\n  \t  how are you?")
       
   403     print dedent("Hello there.\n  This is indented.")