sbsv2/raptor/python/pyparsing.py
changeset 0 044383f39525
child 3 e1eecf4d390d
equal deleted inserted replaced
-1:000000000000 0:044383f39525
       
     1 # module pyparsing.py
       
     2 
       
     3 #
       
     4 
       
     5 # Copyright (c) 2003-2006  Paul T. McGuire
       
     6 
       
     7 #
       
     8 
       
     9 # Permission is hereby granted, free of charge, to any person obtaining
       
    10 
       
    11 # a copy of this software and associated documentation files (the
       
    12 
       
    13 # "Software"), to deal in the Software without restriction, including
       
    14 
       
    15 # without limitation the rights to use, copy, modify, merge, publish,
       
    16 
       
    17 # distribute, sublicense, and/or sell copies of the Software, and to
       
    18 
       
    19 # permit persons to whom the Software is furnished to do so, subject to
       
    20 
       
    21 # the following conditions:
       
    22 
       
    23 #
       
    24 
       
    25 # The above copyright notice and this permission notice shall be
       
    26 
       
    27 # included in all copies or substantial portions of the Software.
       
    28 
       
    29 #
       
    30 
       
    31 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
       
    32 
       
    33 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
       
    34 
       
    35 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
       
    36 
       
    37 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
       
    38 
       
    39 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
       
    40 
       
    41 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
       
    42 
       
    43 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
       
    44 
       
    45 #
       
    46 
       
    47 #from __future__ import generators
       
    48 
       
    49 
       
    50 
       
    51 __doc__ = \
       
    52 """
       
    53 
       
    54 pyparsing module - Classes and methods to define and execute parsing grammars
       
    55 
       
    56 
       
    57 
       
    58 The pyparsing module is an alternative approach to creating and executing simple grammars, 
       
    59 
       
    60 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
       
    61 
       
    62 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
       
    63 
       
    64 provides a library of classes that you use to construct the grammar directly in Python.
       
    65 
       
    66 
       
    67 
       
    68 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
       
    69 
       
    70 
       
    71 
       
    72     from pyparsing import Word, alphas
       
    73 
       
    74     
       
    75 
       
    76     # define grammar of a greeting
       
    77 
       
    78     greet = Word( alphas ) + "," + Word( alphas ) + "!" 
       
    79 
       
    80     
       
    81 
       
    82     hello = "Hello, World!"
       
    83 
       
    84     print hello, "->", greet.parseString( hello )
       
    85 
       
    86 
       
    87 
       
    88 The program outputs the following::
       
    89 
       
    90 
       
    91 
       
    92     Hello, World! -> ['Hello', ',', 'World', '!']
       
    93 
       
    94 
       
    95 
       
    96 The Python representation of the grammar is quite readable, owing to the self-explanatory 
       
    97 
       
    98 class names, and the use of '+', '|' and '^' operators.
       
    99 
       
   100 
       
   101 
       
   102 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
       
   103 
       
   104 object with named attributes.
       
   105 
       
   106 
       
   107 
       
   108 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
       
   109 
       
   110  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
       
   111 
       
   112  - quoted strings
       
   113 
       
   114  - embedded comments
       
   115 
       
   116 """
       
   117 
       
   118 __version__ = "1.4.5"
       
   119 
       
   120 __versionTime__ = "16 December 2006 07:20"
       
   121 
       
   122 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
       
   123 
       
   124 
       
   125 
       
   126 import string
       
   127 
       
   128 import copy,sys
       
   129 
       
   130 import warnings
       
   131 
       
   132 import re
       
   133 
       
   134 import sre_constants
       
   135 
       
   136 import xml.sax.saxutils
       
   137 
       
   138 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
       
   139 
       
   140 
       
   141 
       
   142 def _ustr(obj):
       
   143 
       
   144     """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
       
   145 
       
   146        str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
       
   147 
       
   148        then < returns the unicode object | encodes it with the default encoding | ... >.
       
   149 
       
   150     """
       
   151 
       
   152     try:
       
   153 
       
   154         # If this works, then _ustr(obj) has the same behaviour as str(obj), so
       
   155 
       
   156         # it won't break any existing code.
       
   157 
       
   158         return str(obj)
       
   159 
       
   160         
       
   161 
       
   162     except UnicodeEncodeError, e:
       
   163 
       
   164         # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
       
   165 
       
   166         # state that "The return value must be a string object". However, does a
       
   167 
       
   168         # unicode object (being a subclass of basestring) count as a "string
       
   169 
       
   170         # object"?
       
   171 
       
   172         # If so, then return a unicode object:
       
   173 
       
   174         return unicode(obj)
       
   175 
       
   176         # Else encode it... but how? There are many choices... :)
       
   177 
       
   178         # Replace unprintables with escape codes?
       
   179 
       
   180         #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
       
   181 
       
   182         # Replace unprintables with question marks?
       
   183 
       
   184         #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
       
   185 
       
   186         # ...
       
   187 
       
   188 
       
   189 
       
   190 def _str2dict(strg):
       
   191 
       
   192     return dict( [(c,0) for c in strg] )
       
   193 
       
   194     #~ return set( [c for c in strg] )
       
   195 
       
   196 
       
   197 
       
   198 class _Constants(object):
       
   199 
       
   200     pass
       
   201 
       
   202     
       
   203 
       
   204 alphas     = string.lowercase + string.uppercase
       
   205 
       
   206 nums       = string.digits
       
   207 
       
   208 hexnums    = nums + "ABCDEFabcdef"
       
   209 
       
   210 alphanums  = alphas + nums    
       
   211 
       
   212 
       
   213 
       
   214 class ParseBaseException(Exception):
       
   215 
       
   216     """base exception class for all parsing runtime exceptions"""
       
   217 
       
   218     __slots__ = ( "loc","msg","pstr","parserElement" )
       
   219 
       
   220     # Performance tuning: we construct a *lot* of these, so keep this
       
   221 
       
   222     # constructor as small and fast as possible        
       
   223 
       
   224     def __init__( self, pstr, loc, msg, elem=None ):
       
   225 
       
   226         self.loc = loc
       
   227 
       
   228         self.msg = msg
       
   229 
       
   230         self.pstr = pstr
       
   231 
       
   232         self.parserElement = elem
       
   233 
       
   234 
       
   235 
       
   236     def __getattr__( self, aname ):
       
   237 
       
   238         """supported attributes by name are:
       
   239 
       
   240             - lineno - returns the line number of the exception text
       
   241 
       
   242             - col - returns the column number of the exception text
       
   243 
       
   244             - line - returns the line containing the exception text
       
   245 
       
   246         """
       
   247 
       
   248         if( aname == "lineno" ):
       
   249 
       
   250             return lineno( self.loc, self.pstr )
       
   251 
       
   252         elif( aname in ("col", "column") ):
       
   253 
       
   254             return col( self.loc, self.pstr )
       
   255 
       
   256         elif( aname == "line" ):
       
   257 
       
   258             return line( self.loc, self.pstr )
       
   259 
       
   260         else:
       
   261 
       
   262             raise AttributeError, aname
       
   263 
       
   264 
       
   265 
       
   266     def __str__( self ):
       
   267 
       
   268         return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
       
   269 
       
   270     def __repr__( self ):
       
   271 
       
   272         return _ustr(self)
       
   273 
       
   274     def markInputline( self, markerString = ">!<" ):
       
   275 
       
   276         """Extracts the exception line from the input string, and marks 
       
   277 
       
   278            the location of the exception with a special symbol.
       
   279 
       
   280         """
       
   281 
       
   282         line_str = self.line
       
   283 
       
   284         line_column = self.column - 1
       
   285 
       
   286         if markerString:
       
   287 
       
   288             line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
       
   289 
       
   290         return line_str.strip()
       
   291 
       
   292 
       
   293 
       
   294 class ParseException(ParseBaseException):
       
   295 
       
   296     """exception thrown when parse expressions don't match class"""
       
   297 
       
   298     """supported attributes by name are:
       
   299 
       
   300         - lineno - returns the line number of the exception text
       
   301 
       
   302         - col - returns the column number of the exception text
       
   303 
       
   304         - line - returns the line containing the exception text
       
   305 
       
   306     """
       
   307 
       
   308     pass
       
   309 
       
   310     
       
   311 
       
   312 class ParseFatalException(ParseBaseException):
       
   313 
       
   314     """user-throwable exception thrown when inconsistent parse content
       
   315 
       
   316        is found; stops all parsing immediately"""
       
   317 
       
   318     pass
       
   319 
       
   320 
       
   321 
       
   322 class ReparseException(ParseBaseException):
       
   323 
       
   324     def __init_( self, newstring, restartLoc ):
       
   325 
       
   326         self.newParseText = newstring
       
   327 
       
   328         self.reparseLoc = restartLoc
       
   329 
       
   330 
       
   331 
       
   332 
       
   333 
       
   334 class RecursiveGrammarException(Exception):
       
   335 
       
   336     """exception thrown by validate() if the grammar could be improperly recursive"""
       
   337 
       
   338     def __init__( self, parseElementList ):
       
   339 
       
   340         self.parseElementTrace = parseElementList
       
   341 
       
   342     
       
   343 
       
   344     def __str__( self ):
       
   345 
       
   346         return "RecursiveGrammarException: %s" % self.parseElementTrace
       
   347 
       
   348 
       
   349 
       
   350 class ParseResults(object):
       
   351 
       
   352     """Structured parse results, to provide multiple means of access to the parsed data:
       
   353 
       
   354        - as a list (len(results))
       
   355 
       
   356        - by list index (results[0], results[1], etc.)
       
   357 
       
   358        - by attribute (results.<resultsName>)
       
   359 
       
   360        """
       
   361 
       
   362     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames" )
       
   363 
       
   364     def __new__(cls, toklist, name=None, asList=True, modal=True ):
       
   365 
       
   366         if isinstance(toklist, cls):
       
   367 
       
   368             return toklist
       
   369 
       
   370         retobj = object.__new__(cls)
       
   371 
       
   372         retobj.__doinit = True
       
   373 
       
   374         return retobj
       
   375 
       
   376         
       
   377 
       
   378     # Performance tuning: we construct a *lot* of these, so keep this
       
   379 
       
   380     # constructor as small and fast as possible
       
   381 
       
   382     def __init__( self, toklist, name=None, asList=True, modal=True ):
       
   383 
       
   384         if self.__doinit:
       
   385 
       
   386             self.__doinit = False
       
   387 
       
   388             self.__name = None
       
   389 
       
   390             self.__parent = None
       
   391 
       
   392             self.__accumNames = {}
       
   393 
       
   394             if isinstance(toklist, list):
       
   395 
       
   396                 self.__toklist = toklist[:]
       
   397 
       
   398             else:
       
   399 
       
   400                 self.__toklist = [toklist]
       
   401 
       
   402             self.__tokdict = dict()
       
   403 
       
   404 
       
   405 
       
   406         # this line is related to debugging the asXML bug
       
   407 
       
   408         #~ asList = False
       
   409 
       
   410         
       
   411 
       
   412         if name:
       
   413 
       
   414             if not modal:
       
   415 
       
   416                 self.__accumNames[name] = 0
       
   417 
       
   418             if isinstance(name,int):
       
   419 
       
   420                 name = _ustr(name) # will always return a str, but use _ustr for consistency
       
   421 
       
   422             self.__name = name
       
   423 
       
   424             if not toklist in (None,'',[]):
       
   425 
       
   426                 if isinstance(toklist,basestring): 
       
   427 
       
   428                     toklist = [ toklist ]
       
   429 
       
   430                 if asList:
       
   431 
       
   432                     if isinstance(toklist,ParseResults):
       
   433 
       
   434                         self[name] = (toklist.copy(),-1)
       
   435 
       
   436                     else:
       
   437 
       
   438                         self[name] = (ParseResults(toklist[0]),-1)
       
   439 
       
   440                     self[name].__name = name
       
   441 
       
   442                 else:
       
   443 
       
   444                     try:
       
   445 
       
   446                         self[name] = toklist[0]
       
   447 
       
   448                     except (KeyError,TypeError):
       
   449 
       
   450                         self[name] = toklist
       
   451 
       
   452 
       
   453 
       
   454     def __getitem__( self, i ):
       
   455 
       
   456         if isinstance( i, (int,slice) ):
       
   457 
       
   458             return self.__toklist[i]
       
   459 
       
   460         else:
       
   461 
       
   462             if i not in self.__accumNames:
       
   463 
       
   464                 return self.__tokdict[i][-1][0]
       
   465 
       
   466             else:
       
   467 
       
   468                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
       
   469 
       
   470 
       
   471 
       
   472     def __setitem__( self, k, v ):
       
   473 
       
   474         if isinstance(v,tuple):
       
   475 
       
   476             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
       
   477 
       
   478             sub = v[0]
       
   479 
       
   480         elif isinstance(k,int):
       
   481 
       
   482             self.__toklist[k] = v
       
   483 
       
   484             sub = v
       
   485 
       
   486         else:
       
   487 
       
   488             self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
       
   489 
       
   490             sub = v
       
   491 
       
   492         if isinstance(sub,ParseResults):
       
   493 
       
   494             sub.__parent = self
       
   495 
       
   496         
       
   497 
       
   498     def __delitem__( self, i ):
       
   499 
       
   500         if isinstance(i,(int,slice)):
       
   501 
       
   502             del self.__toklist[i]
       
   503 
       
   504         else:
       
   505 
       
   506             del self._tokdict[i]
       
   507 
       
   508 
       
   509 
       
   510     def __contains__( self, k ):
       
   511 
       
   512         return self.__tokdict.has_key(k)
       
   513 
       
   514         
       
   515 
       
   516     def __len__( self ): return len( self.__toklist )
       
   517 
       
   518     def __nonzero__( self ): return len( self.__toklist ) > 0
       
   519 
       
   520     def __iter__( self ): return iter( self.__toklist )
       
   521 
       
   522     def keys( self ): 
       
   523 
       
   524         """Returns all named result keys."""
       
   525 
       
   526         return self.__tokdict.keys()
       
   527 
       
   528     
       
   529 
       
   530     def items( self ): 
       
   531 
       
   532         """Returns all named result keys and values as a list of tuples."""
       
   533 
       
   534         return [(k,self[k]) for k in self.__tokdict.keys()]
       
   535 
       
   536     
       
   537 
       
   538     def values( self ): 
       
   539 
       
   540         """Returns all named result values."""
       
   541 
       
   542         return [ v[-1][0] for v in self.__tokdict.values() ]
       
   543 
       
   544 
       
   545 
       
   546     def __getattr__( self, name ):
       
   547 
       
   548         if name not in self.__slots__:
       
   549 
       
   550             if self.__tokdict.has_key( name ):
       
   551 
       
   552                 if name not in self.__accumNames:
       
   553 
       
   554                     return self.__tokdict[name][-1][0]
       
   555 
       
   556                 else:
       
   557 
       
   558                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
       
   559 
       
   560             else:
       
   561 
       
   562                 return ""
       
   563 
       
   564         return None
       
   565 
       
   566 
       
   567 
       
   568     def __add__( self, other ):
       
   569 
       
   570         ret = self.copy()
       
   571 
       
   572         ret += other
       
   573 
       
   574         return ret
       
   575 
       
   576         
       
   577 
       
   578     def __iadd__( self, other ):
       
   579 
       
   580         if other.__tokdict:
       
   581 
       
   582             offset = len(self.__toklist)
       
   583 
       
   584             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
       
   585 
       
   586             otheritems = other.__tokdict.items()
       
   587 
       
   588             otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in otheritems for v in vlist]
       
   589 
       
   590             for k,v in otherdictitems:
       
   591 
       
   592                 self[k] = v
       
   593 
       
   594                 if isinstance(v[0],ParseResults):
       
   595 
       
   596                     v[0].__parent = self
       
   597 
       
   598         self.__toklist += other.__toklist
       
   599 
       
   600         self.__accumNames.update( other.__accumNames )
       
   601 
       
   602         del other
       
   603 
       
   604         return self
       
   605 
       
   606        
       
   607 
       
   608     def __repr__( self ):
       
   609 
       
   610         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
       
   611 
       
   612 
       
   613 
       
   614     def __str__( self ):
       
   615 
       
   616         out = "["
       
   617 
       
   618         sep = ""
       
   619 
       
   620         for i in self.__toklist:
       
   621 
       
   622             if isinstance(i, ParseResults):
       
   623 
       
   624                 out += sep + _ustr(i)
       
   625 
       
   626             else:
       
   627 
       
   628                 out += sep + repr(i)
       
   629 
       
   630             sep = ", "
       
   631 
       
   632         out += "]"
       
   633 
       
   634         return out
       
   635 
       
   636 
       
   637 
       
   638     def _asStringList( self, sep='' ):
       
   639 
       
   640         out = []
       
   641 
       
   642         for item in self.__toklist:
       
   643 
       
   644             if out and sep:
       
   645 
       
   646                 out.append(sep)
       
   647 
       
   648             if isinstance( item, ParseResults ):
       
   649 
       
   650                 out += item._asStringList()
       
   651 
       
   652             else:
       
   653 
       
   654                 out.append( _ustr(item) )
       
   655 
       
   656         return out
       
   657 
       
   658 
       
   659 
       
   660     def asList( self ):
       
   661 
       
   662         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
       
   663 
       
   664         out = []
       
   665 
       
   666         for res in self.__toklist:
       
   667 
       
   668             if isinstance(res,ParseResults):
       
   669 
       
   670                 out.append( res.asList() )
       
   671 
       
   672             else:
       
   673 
       
   674                 out.append( res )
       
   675 
       
   676         return out
       
   677 
       
   678 
       
   679 
       
   680     def asDict( self ):
       
   681 
       
   682         """Returns the named parse results as dictionary."""
       
   683 
       
   684         return dict( self.items() )
       
   685 
       
   686 
       
   687 
       
   688     def copy( self ):
       
   689 
       
   690         """Returns a new copy of a ParseResults object."""
       
   691 
       
   692         ret = ParseResults( self.__toklist )
       
   693 
       
   694         ret.__tokdict = self.__tokdict.copy()
       
   695 
       
   696         ret.__parent = self.__parent
       
   697 
       
   698         ret.__accumNames.update( self.__accumNames )
       
   699 
       
   700         ret.__name = self.__name
       
   701 
       
   702         return ret
       
   703 
       
   704         
       
   705 
       
   706     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
       
   707 
       
   708         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
       
   709 
       
   710         nl = "\n"
       
   711 
       
   712         out = []
       
   713 
       
   714         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
       
   715 
       
   716         nextLevelIndent = indent + "  "
       
   717 
       
   718         
       
   719 
       
   720         # collapse out indents if formatting is not desired
       
   721 
       
   722         if not formatted:
       
   723 
       
   724             indent = ""
       
   725 
       
   726             nextLevelIndent = ""
       
   727 
       
   728             nl = ""
       
   729 
       
   730             
       
   731 
       
   732         selfTag = None
       
   733 
       
   734         if doctag is not None:
       
   735 
       
   736             selfTag = doctag
       
   737 
       
   738         else:
       
   739 
       
   740             if self.__name:
       
   741 
       
   742                 selfTag = self.__name
       
   743 
       
   744         
       
   745 
       
   746         if not selfTag:
       
   747 
       
   748             if namedItemsOnly:
       
   749 
       
   750                 return ""
       
   751 
       
   752             else:
       
   753 
       
   754                 selfTag = "ITEM"
       
   755 
       
   756         
       
   757 
       
   758         out += [ nl, indent, "<", selfTag, ">" ]
       
   759 
       
   760         
       
   761 
       
   762         worklist = self.__toklist
       
   763 
       
   764         for i,res in enumerate(worklist):
       
   765 
       
   766             if isinstance(res,ParseResults):
       
   767 
       
   768                 if i in namedItems:
       
   769 
       
   770                     out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
       
   771 
       
   772                 else:
       
   773 
       
   774                     out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
       
   775 
       
   776             else:
       
   777 
       
   778                 # individual token, see if there is a name for it
       
   779 
       
   780                 resTag = None
       
   781 
       
   782                 if i in namedItems:
       
   783 
       
   784                     resTag = namedItems[i]
       
   785 
       
   786                 if not resTag:
       
   787 
       
   788                     if namedItemsOnly:
       
   789 
       
   790                         continue
       
   791 
       
   792                     else:
       
   793 
       
   794                         resTag = "ITEM"
       
   795 
       
   796                 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
       
   797 
       
   798                 out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
       
   799 
       
   800         
       
   801 
       
   802         out += [ nl, indent, "</", selfTag, ">" ]
       
   803 
       
   804         return "".join(out)
       
   805 
       
   806 
       
   807 
       
   808     def __lookup(self,sub):
       
   809 
       
   810         for k,vlist in self.__tokdict.items():
       
   811 
       
   812             for v,loc in vlist:
       
   813 
       
   814                 if sub is v:
       
   815 
       
   816                     return k
       
   817 
       
   818         return None
       
   819 
       
   820             
       
   821 
       
   822     def getName(self):
       
   823 
       
   824         """Returns the results name for this token expression."""
       
   825 
       
   826         if self.__name:
       
   827 
       
   828             return self.__name
       
   829 
       
   830         elif self.__parent:
       
   831 
       
   832             par = self.__parent
       
   833 
       
   834             if par:
       
   835 
       
   836                 return par.__lookup(self)
       
   837 
       
   838             else:
       
   839 
       
   840                 return None
       
   841 
       
   842         elif (len(self) == 1 and 
       
   843 
       
   844                len(self.__tokdict) == 1 and
       
   845 
       
   846                self.__tokdict.values()[0][0][1] in (0,-1)):
       
   847 
       
   848             return self.__tokdict.keys()[0]
       
   849 
       
   850         else:
       
   851 
       
   852             return None
       
   853 
       
   854             
       
   855 
       
   856     def dump(self,indent='',depth=0):
       
   857 
       
   858         """Diagnostic method for listing out the contents of a ParseResults.
       
   859 
       
   860            Accepts an optional indent argument so that this string can be embedded
       
   861 
       
   862            in a nested display of other data."""
       
   863 
       
   864         out = []
       
   865 
       
   866         out.append( indent+str(self.asList()) )
       
   867 
       
   868         keys = self.items()
       
   869 
       
   870         keys.sort()
       
   871 
       
   872         for k,v in keys:
       
   873 
       
   874             if out:
       
   875 
       
   876                 out.append('\n')
       
   877 
       
   878             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
       
   879 
       
   880             if isinstance(v,ParseResults):
       
   881 
       
   882                 if v.keys():
       
   883 
       
   884                     #~ out.append('\n')
       
   885 
       
   886                     out.append( v.dump(indent,depth+1) )
       
   887 
       
   888                     #~ out.append('\n')
       
   889 
       
   890                 else:
       
   891 
       
   892                     out.append(str(v))
       
   893 
       
   894             else:
       
   895 
       
   896                 out.append(str(v))
       
   897 
       
   898         #~ out.append('\n')
       
   899 
       
   900         return "".join(out)
       
   901 
       
   902 
       
   903 
       
   904     # add support for pickle protocol
       
   905 
       
   906     def __getstate__(self):
       
   907 
       
   908         return ( self.__toklist,
       
   909 
       
   910                  ( self.__tokdict.copy(),
       
   911 
       
   912                    self.__parent,
       
   913 
       
   914                    self.__accumNames,
       
   915 
       
   916                    self.__name ) )
       
   917 
       
   918     
       
   919 
       
   920     def __setstate__(self,state):
       
   921 
       
   922         self.__toklist = state[0]
       
   923 
       
   924         self.__tokdict, \
       
   925 
       
   926         self.__parent, \
       
   927 
       
   928         inAccumNames, \
       
   929 
       
   930         self.__name = state[1]
       
   931 
       
   932         self.__accumNames = {}
       
   933 
       
   934         self.__accumNames.update(inAccumNames)
       
   935 
       
   936 
       
   937 
       
   938 
       
   939 
       
   940 def col (loc,strg):
       
   941 
       
   942     """Returns current column within a string, counting newlines as line separators.
       
   943 
       
   944    The first column is number 1.
       
   945 
       
   946    """
       
   947 
       
   948     return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
       
   949 
       
   950 
       
   951 
       
   952 def lineno(loc,strg):
       
   953 
       
   954     """Returns current line number within a string, counting newlines as line separators.
       
   955 
       
   956    The first line is number 1.
       
   957 
       
   958    """
       
   959 
       
   960     return strg.count("\n",0,loc) + 1
       
   961 
       
   962 
       
   963 
       
   964 def line( loc, strg ):
       
   965 
       
   966     """Returns the line of text containing loc within a string, counting newlines as line separators.
       
   967 
       
   968        """
       
   969 
       
   970     lastCR = strg.rfind("\n", 0, loc)
       
   971 
       
   972     nextCR = strg.find("\n", loc)
       
   973 
       
   974     if nextCR > 0:
       
   975 
       
   976         return strg[lastCR+1:nextCR]
       
   977 
       
   978     else:
       
   979 
       
   980         return strg[lastCR+1:]
       
   981 
       
   982 
       
   983 
       
   984 def _defaultStartDebugAction( instring, loc, expr ):
       
   985 
       
   986     print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
       
   987 
       
   988 
       
   989 
       
   990 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
       
   991 
       
   992     print "Matched",expr,"->",toks.asList()
       
   993 
       
   994     
       
   995 
       
   996 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
       
   997 
       
   998     print "Exception raised:", exc
       
   999 
       
  1000 
       
  1001 
       
  1002 def nullDebugAction(*args):
       
  1003 
       
  1004     """'Do-nothing' debug action, to suppress debugging output during parsing."""
       
  1005 
       
  1006     pass
       
  1007 
       
  1008 
       
  1009 
       
  1010 class ParserElement(object):
       
  1011 
       
  1012     """Abstract base level parser element class."""
       
  1013 
       
  1014     DEFAULT_WHITE_CHARS = " \n\t\r"
       
  1015 
       
  1016     
       
  1017 
       
  1018     def setDefaultWhitespaceChars( chars ):
       
  1019 
       
  1020         """Overrides the default whitespace chars
       
  1021 
       
  1022         """
       
  1023 
       
  1024         ParserElement.DEFAULT_WHITE_CHARS = chars
       
  1025 
       
  1026     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
       
  1027 
       
  1028     
       
  1029 
       
  1030     def __init__( self, savelist=False ):
       
  1031 
       
  1032         self.parseAction = list()
       
  1033 
       
  1034         self.failAction = None
       
  1035 
       
  1036         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
       
  1037 
       
  1038         self.strRepr = None
       
  1039 
       
  1040         self.resultsName = None
       
  1041 
       
  1042         self.saveAsList = savelist
       
  1043 
       
  1044         self.skipWhitespace = True
       
  1045 
       
  1046         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
       
  1047 
       
  1048         self.copyDefaultWhiteChars = True
       
  1049 
       
  1050         self.mayReturnEmpty = False
       
  1051 
       
  1052         self.keepTabs = False
       
  1053 
       
  1054         self.ignoreExprs = list()
       
  1055 
       
  1056         self.debug = False
       
  1057 
       
  1058         self.streamlined = False
       
  1059 
       
  1060         self.mayIndexError = True
       
  1061 
       
  1062         self.errmsg = ""
       
  1063 
       
  1064         self.modalResults = True
       
  1065 
       
  1066         self.debugActions = ( None, None, None )
       
  1067 
       
  1068         self.re = None
       
  1069 
       
  1070 
       
  1071 
       
  1072     def copy( self ):
       
  1073 
       
  1074         """Make a copy of this ParserElement.  Useful for defining different parse actions
       
  1075 
       
  1076            for the same parsing pattern, using copies of the original parse element."""
       
  1077 
       
  1078         cpy = copy.copy( self )
       
  1079 
       
  1080         cpy.parseAction = self.parseAction[:]
       
  1081 
       
  1082         cpy.ignoreExprs = self.ignoreExprs[:]
       
  1083 
       
  1084         if self.copyDefaultWhiteChars:
       
  1085 
       
  1086             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
       
  1087 
       
  1088         return cpy
       
  1089 
       
  1090 
       
  1091 
       
  1092     def setName( self, name ):
       
  1093 
       
  1094         """Define name for this expression, for use in debugging."""
       
  1095 
       
  1096         self.name = name
       
  1097 
       
  1098         self.errmsg = "Expected " + self.name
       
  1099 
       
  1100         return self
       
  1101 
       
  1102 
       
  1103 
       
  1104     def setResultsName( self, name, listAllMatches=False ):
       
  1105 
       
  1106         """Define name for referencing matching tokens as a nested attribute 
       
  1107 
       
  1108            of the returned parse results.
       
  1109 
       
  1110            NOTE: this returns a *copy* of the original ParserElement object;
       
  1111 
       
  1112            this is so that the client can define a basic element, such as an
       
  1113 
       
  1114            integer, and reference it in multiple places with different names.
       
  1115 
       
  1116         """
       
  1117 
       
  1118         newself = self.copy()
       
  1119 
       
  1120         newself.resultsName = name
       
  1121 
       
  1122         newself.modalResults = not listAllMatches
       
  1123 
       
  1124         return newself
       
  1125 
       
  1126 
       
  1127 
       
  1128     def normalizeParseActionArgs( f ):
       
  1129 
       
  1130         """Internal method used to decorate parse actions that take fewer than 3 arguments,
       
  1131 
       
  1132            so that all parse actions can be called as f(s,l,t)."""
       
  1133 
       
  1134         STAR_ARGS = 4
       
  1135 
       
  1136 
       
  1137 
       
  1138         try:
       
  1139 
       
  1140             restore = None
       
  1141 
       
  1142             if isinstance(f,type):
       
  1143 
       
  1144                 restore = f
       
  1145 
       
  1146                 f = f.__init__
       
  1147 
       
  1148             if f.func_code.co_flags & STAR_ARGS:
       
  1149 
       
  1150                 return f
       
  1151 
       
  1152             numargs = f.func_code.co_argcount
       
  1153 
       
  1154             if hasattr(f,"im_self"):
       
  1155 
       
  1156                 numargs -= 1
       
  1157 
       
  1158             if restore:
       
  1159 
       
  1160                 f = restore
       
  1161 
       
  1162         except AttributeError:
       
  1163 
       
  1164             try:
       
  1165 
       
  1166                 # not a function, must be a callable object, get info from the
       
  1167 
       
  1168                 # im_func binding of its bound __call__ method
       
  1169 
       
  1170                 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
       
  1171 
       
  1172                     return f
       
  1173 
       
  1174                 numargs = f.__call__.im_func.func_code.co_argcount
       
  1175 
       
  1176                 if hasattr(f.__call__,"im_self"):
       
  1177 
       
  1178                     numargs -= 1
       
  1179 
       
  1180             except AttributeError:
       
  1181 
       
  1182                 # not a bound method, get info directly from __call__ method
       
  1183 
       
  1184                 if f.__call__.func_code.co_flags & STAR_ARGS:
       
  1185 
       
  1186                     return f
       
  1187 
       
  1188                 numargs = f.__call__.func_code.co_argcount
       
  1189 
       
  1190                 if hasattr(f.__call__,"im_self"):
       
  1191 
       
  1192                     numargs -= 1
       
  1193 
       
  1194 
       
  1195 
       
  1196         #~ print "adding function %s with %d args" % (f.func_name,numargs)
       
  1197 
       
  1198         if numargs == 3:
       
  1199 
       
  1200             return f
       
  1201 
       
  1202         else:
       
  1203 
       
  1204             if numargs == 2:
       
  1205 
       
  1206                 def tmp(s,l,t):
       
  1207 
       
  1208                     return f(l,t)
       
  1209 
       
  1210             elif numargs == 1:
       
  1211 
       
  1212                 def tmp(s,l,t):
       
  1213 
       
  1214                     return f(t)
       
  1215 
       
  1216             else: #~ numargs == 0:
       
  1217 
       
  1218                 def tmp(s,l,t):
       
  1219 
       
  1220                     return f()
       
  1221 
       
  1222             return tmp
       
  1223 
       
  1224     normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
       
  1225 
       
  1226             
       
  1227 
       
  1228     def setParseAction( self, *fns ):
       
  1229 
       
  1230         """Define action to perform when successfully matching parse element definition.
       
  1231 
       
  1232            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
       
  1233 
       
  1234            fn(loc,toks), fn(toks), or just fn(), where:
       
  1235 
       
  1236             - s   = the original string being parsed
       
  1237 
       
  1238             - loc = the location of the matching substring
       
  1239 
       
  1240             - toks = a list of the matched tokens, packaged as a ParseResults object
       
  1241 
       
  1242            If the functions in fns modify the tokens, they can return them as the return
       
  1243 
       
  1244            value from fn, and the modified list of tokens will replace the original.
       
  1245 
       
  1246            Otherwise, fn does not need to return any value."""
       
  1247 
       
  1248         self.parseAction = map(self.normalizeParseActionArgs, list(fns))
       
  1249 
       
  1250         return self
       
  1251 
       
  1252 
       
  1253 
       
  1254     def addParseAction( self, *fns ):
       
  1255 
       
  1256         """Add parse action to expression's list of parse actions. See setParseAction_."""
       
  1257 
       
  1258         self.parseAction += map(self.normalizeParseActionArgs, list(fns))
       
  1259 
       
  1260         return self
       
  1261 
       
  1262 
       
  1263 
       
  1264     def setFailAction( self, fn ):
       
  1265 
       
  1266         """Define action to perform if parsing fails at this expression. 
       
  1267 
       
  1268            Fail acton fn is a callable function that takes the arguments 
       
  1269 
       
  1270            fn(s,loc,expr,err) where:
       
  1271 
       
  1272             - s = string being parsed
       
  1273 
       
  1274             - loc = location where expression match was attempted and failed
       
  1275 
       
  1276             - expr = the parse expression that failed
       
  1277 
       
  1278             - err = the exception thrown
       
  1279 
       
  1280            The function returns no value.  It may throw ParseFatalException
       
  1281 
       
  1282            if it is desired to stop parsing immediately."""
       
  1283 
       
  1284         self.failAction = fn
       
  1285 
       
  1286         return self
       
  1287 
       
  1288         
       
  1289 
       
  1290     def skipIgnorables( self, instring, loc ):
       
  1291 
       
  1292         exprsFound = True
       
  1293 
       
  1294         while exprsFound:
       
  1295 
       
  1296             exprsFound = False
       
  1297 
       
  1298             for e in self.ignoreExprs:
       
  1299 
       
  1300                 try:
       
  1301 
       
  1302                     while 1:
       
  1303 
       
  1304                         loc,dummy = e._parse( instring, loc )
       
  1305 
       
  1306                         exprsFound = True
       
  1307 
       
  1308                 except ParseException:
       
  1309 
       
  1310                     pass
       
  1311 
       
  1312         return loc
       
  1313 
       
  1314 
       
  1315 
       
  1316     def preParse( self, instring, loc ):
       
  1317 
       
  1318         if self.ignoreExprs:
       
  1319 
       
  1320             loc = self.skipIgnorables( instring, loc )
       
  1321 
       
  1322         
       
  1323 
       
  1324         if self.skipWhitespace:
       
  1325 
       
  1326             wt = self.whiteChars
       
  1327 
       
  1328             instrlen = len(instring)
       
  1329 
       
  1330             while loc < instrlen and instring[loc] in wt:
       
  1331 
       
  1332                 loc += 1
       
  1333 
       
  1334                 
       
  1335 
       
  1336         return loc
       
  1337 
       
  1338 
       
  1339 
       
  1340     def parseImpl( self, instring, loc, doActions=True ):
       
  1341 
       
  1342         return loc, []
       
  1343 
       
  1344 
       
  1345 
       
  1346     def postParse( self, instring, loc, tokenlist ):
       
  1347 
       
  1348         return tokenlist
       
  1349 
       
  1350 
       
  1351 
       
  1352     #~ @profile
       
  1353 
       
  1354     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
       
  1355 
       
  1356         debugging = ( self.debug ) #and doActions )
       
  1357 
       
  1358 
       
  1359 
       
  1360         if debugging or self.failAction:
       
  1361 
       
  1362             #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
       
  1363 
       
  1364             if (self.debugActions[0] ):
       
  1365 
       
  1366                 self.debugActions[0]( instring, loc, self )
       
  1367 
       
  1368             if callPreParse:
       
  1369 
       
  1370                 preloc = self.preParse( instring, loc )
       
  1371 
       
  1372             else:
       
  1373 
       
  1374                 preloc = loc
       
  1375 
       
  1376             tokensStart = loc
       
  1377 
       
  1378             try:
       
  1379 
       
  1380                 try:
       
  1381 
       
  1382                     loc,tokens = self.parseImpl( instring, preloc, doActions )
       
  1383 
       
  1384                 except IndexError:
       
  1385 
       
  1386                     raise ParseException( instring, len(instring), self.errmsg, self )
       
  1387 
       
  1388             #~ except ReparseException, retryEx:
       
  1389 
       
  1390                 #~ pass
       
  1391 
       
  1392             except ParseException, err:
       
  1393 
       
  1394                 #~ print "Exception raised:", err
       
  1395 
       
  1396                 if self.debugActions[2]:
       
  1397 
       
  1398                     self.debugActions[2]( instring, tokensStart, self, err )
       
  1399 
       
  1400                 if self.failAction:
       
  1401 
       
  1402                     self.failAction( instring, tokensStart, self, err )
       
  1403 
       
  1404                 raise
       
  1405 
       
  1406         else:
       
  1407 
       
  1408             if callPreParse:
       
  1409 
       
  1410                 preloc = self.preParse( instring, loc )
       
  1411 
       
  1412             else:
       
  1413 
       
  1414                 preloc = loc
       
  1415 
       
  1416             tokensStart = loc
       
  1417 
       
  1418             if self.mayIndexError or loc >= len(instring):
       
  1419 
       
  1420                 try:
       
  1421 
       
  1422                     loc,tokens = self.parseImpl( instring, preloc, doActions )
       
  1423 
       
  1424                 except IndexError:
       
  1425 
       
  1426                     raise ParseException( instring, len(instring), self.errmsg, self )
       
  1427 
       
  1428             else:
       
  1429 
       
  1430                 loc,tokens = self.parseImpl( instring, preloc, doActions )
       
  1431 
       
  1432         
       
  1433 
       
  1434         tokens = self.postParse( instring, loc, tokens )
       
  1435 
       
  1436 
       
  1437 
       
  1438         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
       
  1439 
       
  1440         if self.parseAction and doActions:
       
  1441 
       
  1442             if debugging:
       
  1443 
       
  1444                 try:
       
  1445 
       
  1446                     for fn in self.parseAction:
       
  1447 
       
  1448                         tokens = fn( instring, tokensStart, retTokens )
       
  1449 
       
  1450                         if tokens is not None:
       
  1451 
       
  1452                             retTokens = ParseResults( tokens, 
       
  1453 
       
  1454                                                       self.resultsName, 
       
  1455 
       
  1456                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
       
  1457 
       
  1458                                                       modal=self.modalResults )
       
  1459 
       
  1460                 except ParseException, err:
       
  1461 
       
  1462                     #~ print "Exception raised in user parse action:", err
       
  1463 
       
  1464                     if (self.debugActions[2] ):
       
  1465 
       
  1466                         self.debugActions[2]( instring, tokensStart, self, err )
       
  1467 
       
  1468                     raise
       
  1469 
       
  1470             else:
       
  1471 
       
  1472                 for fn in self.parseAction:
       
  1473 
       
  1474                     tokens = fn( instring, tokensStart, retTokens )
       
  1475 
       
  1476                     if tokens is not None:
       
  1477 
       
  1478                         retTokens = ParseResults( tokens, 
       
  1479 
       
  1480                                                   self.resultsName, 
       
  1481 
       
  1482                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
       
  1483 
       
  1484                                                   modal=self.modalResults )
       
  1485 
       
  1486 
       
  1487 
       
  1488         if debugging:
       
  1489 
       
  1490             #~ print "Matched",self,"->",retTokens.asList()
       
  1491 
       
  1492             if (self.debugActions[1] ):
       
  1493 
       
  1494                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
       
  1495 
       
  1496 
       
  1497 
       
  1498         return loc, retTokens
       
  1499 
       
  1500 
       
  1501 
       
  1502     def tryParse( self, instring, loc ):
       
  1503 
       
  1504         return self._parse( instring, loc, doActions=False )[0]
       
  1505 
       
  1506     
       
  1507 
       
  1508     # this method gets repeatedly called during backtracking with the same arguments -
       
  1509 
       
  1510     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
       
  1511 
       
  1512     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
       
  1513 
       
  1514         if doActions and self.parseAction:
       
  1515 
       
  1516             return self._parseNoCache( instring, loc, doActions, callPreParse )
       
  1517 
       
  1518         lookup = (self,instring,loc,callPreParse)
       
  1519 
       
  1520         if lookup in ParserElement._exprArgCache:
       
  1521 
       
  1522             value = ParserElement._exprArgCache[ lookup ]
       
  1523 
       
  1524             if isinstance(value,Exception):
       
  1525 
       
  1526                 if isinstance(value,ParseBaseException):
       
  1527 
       
  1528                     value.loc = loc
       
  1529 
       
  1530                 raise value
       
  1531 
       
  1532             return value
       
  1533 
       
  1534         else:
       
  1535 
       
  1536             try:
       
  1537 
       
  1538                 ParserElement._exprArgCache[ lookup ] = \
       
  1539                     value = self._parseNoCache( instring, loc, doActions, callPreParse )
       
  1540 
       
  1541                 return value
       
  1542 
       
  1543             except ParseBaseException, pe:
       
  1544 
       
  1545                 ParserElement._exprArgCache[ lookup ] = pe
       
  1546 
       
  1547                 raise
       
  1548 
       
  1549 
       
  1550 
       
  1551     _parse = _parseNoCache
       
  1552 
       
  1553 
       
  1554 
       
  1555     # argument cache for optimizing repeated calls when backtracking through recursive expressions
       
  1556 
       
  1557     _exprArgCache = {}
       
  1558 
       
  1559     def resetCache():
       
  1560 
       
  1561         ParserElement._exprArgCache.clear()
       
  1562 
       
  1563     resetCache = staticmethod(resetCache)
       
  1564 
       
  1565     
       
  1566 
       
  1567     _packratEnabled = False
       
  1568 
       
  1569     def enablePackrat():
       
  1570 
       
  1571         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
       
  1572 
       
  1573            Repeated parse attempts at the same string location (which happens 
       
  1574 
       
  1575            often in many complex grammars) can immediately return a cached value, 
       
  1576 
       
  1577            instead of re-executing parsing/validating code.  Memoizing is done of
       
  1578 
       
  1579            both valid results and parsing exceptions.
       
  1580 
       
  1581             
       
  1582 
       
  1583            This speedup may break existing programs that use parse actions that 
       
  1584 
       
  1585            have side-effects.  For this reason, packrat parsing is disabled when
       
  1586 
       
  1587            you first import pyparsing.  To activate the packrat feature, your
       
  1588 
       
  1589            program must call the class method ParserElement.enablePackrat().  If
       
  1590 
       
  1591            your program uses psyco to "compile as you go", you must call 
       
  1592 
       
  1593            enablePackrat before calling psyco.full().  If you do not do this,
       
  1594 
       
  1595            Python will crash.  For best results, call enablePackrat() immediately
       
  1596 
       
  1597            after importing pyparsing.
       
  1598 
       
  1599         """
       
  1600 
       
  1601         if not ParserElement._packratEnabled:
       
  1602 
       
  1603             ParserElement._packratEnabled = True
       
  1604 
       
  1605             ParserElement._parse = ParserElement._parseCache
       
  1606 
       
  1607     enablePackrat = staticmethod(enablePackrat)
       
  1608 
       
  1609 
       
  1610 
       
  1611     def parseString( self, instring ):
       
  1612 
       
  1613         """Execute the parse expression with the given string.
       
  1614 
       
  1615            This is the main interface to the client code, once the complete 
       
  1616 
       
  1617            expression has been built.
       
  1618 
       
  1619         """
       
  1620 
       
  1621         ParserElement.resetCache()
       
  1622 
       
  1623         if not self.streamlined:
       
  1624 
       
  1625             self.streamline()
       
  1626 
       
  1627             #~ self.saveAsList = True
       
  1628 
       
  1629         for e in self.ignoreExprs:
       
  1630 
       
  1631             e.streamline()
       
  1632 
       
  1633         if self.keepTabs:
       
  1634 
       
  1635             loc, tokens = self._parse( instring, 0 )
       
  1636 
       
  1637         else:
       
  1638 
       
  1639             loc, tokens = self._parse( instring.expandtabs(), 0 )
       
  1640 
       
  1641         return tokens
       
  1642 
       
  1643 
       
  1644 
       
  1645     def scanString( self, instring, maxMatches=sys.maxint ):
       
  1646 
       
  1647         """Scan the input string for expression matches.  Each match will return the 
       
  1648 
       
  1649            matching tokens, start location, and end location.  May be called with optional
       
  1650 
       
  1651            maxMatches argument, to clip scanning after 'n' matches are found."""
       
  1652 
       
  1653         if not self.streamlined:
       
  1654 
       
  1655             self.streamline()
       
  1656 
       
  1657         for e in self.ignoreExprs:
       
  1658 
       
  1659             e.streamline()
       
  1660 
       
  1661         
       
  1662 
       
  1663         if not self.keepTabs:
       
  1664 
       
  1665             instring = instring.expandtabs()
       
  1666 
       
  1667         instrlen = len(instring)
       
  1668 
       
  1669         loc = 0
       
  1670 
       
  1671         preparseFn = self.preParse
       
  1672 
       
  1673         parseFn = self._parse
       
  1674 
       
  1675         ParserElement.resetCache()
       
  1676 
       
  1677         matches = 0
       
  1678 
       
  1679         while loc <= instrlen and matches < maxMatches:
       
  1680 
       
  1681             try:
       
  1682 
       
  1683                 preloc = preparseFn( instring, loc )
       
  1684 
       
  1685                 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
       
  1686 
       
  1687             except ParseException:
       
  1688 
       
  1689                 loc = preloc+1
       
  1690 
       
  1691             else:
       
  1692 
       
  1693                 matches += 1
       
  1694 
       
  1695                 yield tokens, preloc, nextLoc
       
  1696 
       
  1697                 loc = nextLoc
       
  1698 
       
  1699         
       
  1700 
       
  1701     def transformString( self, instring ):
       
  1702 
       
  1703         """Extension to scanString, to modify matching text with modified tokens that may
       
  1704 
       
  1705            be returned from a parse action.  To use transformString, define a grammar and 
       
  1706 
       
  1707            attach a parse action to it that modifies the returned token list.  
       
  1708 
       
  1709            Invoking transformString() on a target string will then scan for matches, 
       
  1710 
       
  1711            and replace the matched text patterns according to the logic in the parse 
       
  1712 
       
  1713            action.  transformString() returns the resulting transformed string."""
       
  1714 
       
  1715         out = []
       
  1716 
       
  1717         lastE = 0
       
  1718 
       
  1719         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
       
  1720 
       
  1721         # keep string locs straight between transformString and scanString
       
  1722 
       
  1723         self.keepTabs = True
       
  1724 
       
  1725         for t,s,e in self.scanString( instring ):
       
  1726 
       
  1727             out.append( instring[lastE:s] )
       
  1728 
       
  1729             if t:
       
  1730 
       
  1731                 if isinstance(t,ParseResults):
       
  1732 
       
  1733                     out += t.asList()
       
  1734 
       
  1735                 elif isinstance(t,list):
       
  1736 
       
  1737                     out += t
       
  1738 
       
  1739                 else:
       
  1740 
       
  1741                     out.append(t)
       
  1742 
       
  1743             lastE = e
       
  1744 
       
  1745         out.append(instring[lastE:])
       
  1746 
       
  1747         return "".join(out)
       
  1748 
       
  1749 
       
  1750 
       
  1751     def searchString( self, instring, maxMatches=sys.maxint ):
       
  1752 
       
  1753         """Another extension to scanString, simplifying the access to the tokens found
       
  1754 
       
  1755            to match the given parse expression.  May be called with optional
       
  1756 
       
  1757            maxMatches argument, to clip searching after 'n' matches are found.
       
  1758 
       
  1759         """
       
  1760 
       
  1761         return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
       
  1762 
       
  1763             
       
  1764 
       
  1765     def __add__(self, other ):
       
  1766 
       
  1767         """Implementation of + operator - returns And"""
       
  1768 
       
  1769         if isinstance( other, basestring ):
       
  1770 
       
  1771             other = Literal( other )
       
  1772 
       
  1773         if not isinstance( other, ParserElement ):
       
  1774 
       
  1775             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1776 
       
  1777                     SyntaxWarning, stacklevel=2)
       
  1778 
       
  1779         return And( [ self, other ] )
       
  1780 
       
  1781 
       
  1782 
       
  1783     def __radd__(self, other ):
       
  1784 
       
  1785         """Implementation of += operator"""
       
  1786 
       
  1787         if isinstance( other, basestring ):
       
  1788 
       
  1789             other = Literal( other )
       
  1790 
       
  1791         if not isinstance( other, ParserElement ):
       
  1792 
       
  1793             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1794 
       
  1795                     SyntaxWarning, stacklevel=2)
       
  1796 
       
  1797         return other + self
       
  1798 
       
  1799 
       
  1800 
       
  1801     def __or__(self, other ):
       
  1802 
       
  1803         """Implementation of | operator - returns MatchFirst"""
       
  1804 
       
  1805         if isinstance( other, basestring ):
       
  1806 
       
  1807             other = Literal( other )
       
  1808 
       
  1809         if not isinstance( other, ParserElement ):
       
  1810 
       
  1811             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1812 
       
  1813                     SyntaxWarning, stacklevel=2)
       
  1814 
       
  1815         return MatchFirst( [ self, other ] )
       
  1816 
       
  1817 
       
  1818 
       
  1819     def __ror__(self, other ):
       
  1820 
       
  1821         """Implementation of |= operator"""
       
  1822 
       
  1823         if isinstance( other, basestring ):
       
  1824 
       
  1825             other = Literal( other )
       
  1826 
       
  1827         if not isinstance( other, ParserElement ):
       
  1828 
       
  1829             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1830 
       
  1831                     SyntaxWarning, stacklevel=2)
       
  1832 
       
  1833         return other | self
       
  1834 
       
  1835 
       
  1836 
       
  1837     def __xor__(self, other ):
       
  1838 
       
  1839         """Implementation of ^ operator - returns Or"""
       
  1840 
       
  1841         if isinstance( other, basestring ):
       
  1842 
       
  1843             other = Literal( other )
       
  1844 
       
  1845         if not isinstance( other, ParserElement ):
       
  1846 
       
  1847             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1848 
       
  1849                     SyntaxWarning, stacklevel=2)
       
  1850 
       
  1851         return Or( [ self, other ] )
       
  1852 
       
  1853 
       
  1854 
       
  1855     def __rxor__(self, other ):
       
  1856 
       
  1857         """Implementation of ^= operator"""
       
  1858 
       
  1859         if isinstance( other, basestring ):
       
  1860 
       
  1861             other = Literal( other )
       
  1862 
       
  1863         if not isinstance( other, ParserElement ):
       
  1864 
       
  1865             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1866 
       
  1867                     SyntaxWarning, stacklevel=2)
       
  1868 
       
  1869         return other ^ self
       
  1870 
       
  1871 
       
  1872 
       
  1873     def __and__(self, other ):
       
  1874 
       
  1875         """Implementation of & operator - returns Each"""
       
  1876 
       
  1877         if isinstance( other, basestring ):
       
  1878 
       
  1879             other = Literal( other )
       
  1880 
       
  1881         if not isinstance( other, ParserElement ):
       
  1882 
       
  1883             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1884 
       
  1885                     SyntaxWarning, stacklevel=2)
       
  1886 
       
  1887         return Each( [ self, other ] )
       
  1888 
       
  1889 
       
  1890 
       
  1891     def __rand__(self, other ):
       
  1892 
       
  1893         """Implementation of right-& operator"""
       
  1894 
       
  1895         if isinstance( other, basestring ):
       
  1896 
       
  1897             other = Literal( other )
       
  1898 
       
  1899         if not isinstance( other, ParserElement ):
       
  1900 
       
  1901             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
       
  1902 
       
  1903                     SyntaxWarning, stacklevel=2)
       
  1904 
       
  1905         return other & self
       
  1906 
       
  1907 
       
  1908 
       
  1909     def __invert__( self ):
       
  1910 
       
  1911         """Implementation of ~ operator - returns NotAny"""
       
  1912 
       
  1913         return NotAny( self )
       
  1914 
       
  1915 
       
  1916 
       
  1917     def suppress( self ):
       
  1918 
       
  1919         """Suppresses the output of this ParserElement; useful to keep punctuation from
       
  1920 
       
  1921            cluttering up returned output.
       
  1922 
       
  1923         """
       
  1924 
       
  1925         return Suppress( self )
       
  1926 
       
  1927 
       
  1928 
       
  1929     def leaveWhitespace( self ):
       
  1930 
       
  1931         """Disables the skipping of whitespace before matching the characters in the 
       
  1932 
       
  1933            ParserElement's defined pattern.  This is normally only used internally by
       
  1934 
       
  1935            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
       
  1936 
       
  1937         """
       
  1938 
       
  1939         self.skipWhitespace = False
       
  1940 
       
  1941         return self
       
  1942 
       
  1943 
       
  1944 
       
  1945     def setWhitespaceChars( self, chars ):
       
  1946 
       
  1947         """Overrides the default whitespace chars
       
  1948 
       
  1949         """
       
  1950 
       
  1951         self.skipWhitespace = True
       
  1952 
       
  1953         self.whiteChars = chars
       
  1954 
       
  1955         self.copyDefaultWhiteChars = False
       
  1956 
       
  1957         return self
       
  1958 
       
  1959         
       
  1960 
       
  1961     def parseWithTabs( self ):
       
  1962 
       
  1963         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
       
  1964 
       
  1965            Must be called before parseString when the input grammar contains elements that 
       
  1966 
       
  1967            match <TAB> characters."""
       
  1968 
       
  1969         self.keepTabs = True
       
  1970 
       
  1971         return self
       
  1972 
       
  1973         
       
  1974 
       
  1975     def ignore( self, other ):
       
  1976 
       
  1977         """Define expression to be ignored (e.g., comments) while doing pattern 
       
  1978 
       
  1979            matching; may be called repeatedly, to define multiple comment or other
       
  1980 
       
  1981            ignorable patterns.
       
  1982 
       
  1983         """
       
  1984 
       
  1985         if isinstance( other, Suppress ):
       
  1986 
       
  1987             if other not in self.ignoreExprs:
       
  1988 
       
  1989                 self.ignoreExprs.append( other )
       
  1990 
       
  1991         else:
       
  1992 
       
  1993             self.ignoreExprs.append( Suppress( other ) )
       
  1994 
       
  1995         return self
       
  1996 
       
  1997 
       
  1998 
       
  1999     def setDebugActions( self, startAction, successAction, exceptionAction ):
       
  2000 
       
  2001         """Enable display of debugging messages while doing pattern matching."""
       
  2002 
       
  2003         self.debugActions = (startAction or _defaultStartDebugAction, 
       
  2004 
       
  2005                              successAction or _defaultSuccessDebugAction, 
       
  2006 
       
  2007                              exceptionAction or _defaultExceptionDebugAction)
       
  2008 
       
  2009         self.debug = True
       
  2010 
       
  2011         return self
       
  2012 
       
  2013 
       
  2014 
       
  2015     def setDebug( self, flag=True ):
       
  2016 
       
  2017         """Enable display of debugging messages while doing pattern matching."""
       
  2018 
       
  2019         if flag:
       
  2020 
       
  2021             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
       
  2022 
       
  2023         else:
       
  2024 
       
  2025             self.debug = False
       
  2026 
       
  2027         return self
       
  2028 
       
  2029 
       
  2030 
       
  2031     def __str__( self ):
       
  2032 
       
  2033         return self.name
       
  2034 
       
  2035 
       
  2036 
       
  2037     def __repr__( self ):
       
  2038 
       
  2039         return _ustr(self)
       
  2040 
       
  2041         
       
  2042 
       
  2043     def streamline( self ):
       
  2044 
       
  2045         self.streamlined = True
       
  2046 
       
  2047         self.strRepr = None
       
  2048 
       
  2049         return self
       
  2050 
       
  2051         
       
  2052 
       
  2053     def checkRecursion( self, parseElementList ):
       
  2054 
       
  2055         pass
       
  2056 
       
  2057         
       
  2058 
       
  2059     def validate( self, validateTrace=[] ):
       
  2060 
       
  2061         """Check defined expressions for valid structure, check for infinite recursive definitions."""
       
  2062 
       
  2063         self.checkRecursion( [] )
       
  2064 
       
  2065 
       
  2066 
       
  2067     def parseFile( self, file_or_filename ):
       
  2068 
       
  2069         """Execute the parse expression on the given file or filename.
       
  2070 
       
  2071            If a filename is specified (instead of a file object),
       
  2072 
       
  2073            the entire file is opened, read, and closed before parsing.
       
  2074 
       
  2075         """
       
  2076 
       
  2077         try:
       
  2078 
       
  2079             file_contents = file_or_filename.read()
       
  2080 
       
  2081         except AttributeError:
       
  2082 
       
  2083             f = open(file_or_filename, "rb")
       
  2084 
       
  2085             file_contents = f.read()
       
  2086 
       
  2087             f.close()
       
  2088 
       
  2089         return self.parseString(file_contents)
       
  2090 
       
  2091 
       
  2092 
       
  2093 
       
  2094 
       
  2095 class Token(ParserElement):
       
  2096 
       
  2097     """Abstract ParserElement subclass, for defining atomic matching patterns."""
       
  2098 
       
  2099     def __init__( self ):
       
  2100 
       
  2101         super(Token,self).__init__( savelist=False )
       
  2102 
       
  2103         self.myException = ParseException("",0,"",self)
       
  2104 
       
  2105 
       
  2106 
       
  2107     def setName(self, name):
       
  2108 
       
  2109         s = super(Token,self).setName(name)
       
  2110 
       
  2111         self.errmsg = "Expected " + self.name
       
  2112 
       
  2113         s.myException.msg = self.errmsg
       
  2114 
       
  2115         return s
       
  2116 
       
  2117 
       
  2118 
       
  2119 
       
  2120 
       
  2121 class Empty(Token):
       
  2122 
       
  2123     """An empty token, will always match."""
       
  2124 
       
  2125     def __init__( self ):
       
  2126 
       
  2127         super(Empty,self).__init__()
       
  2128 
       
  2129         self.name = "Empty"
       
  2130 
       
  2131         self.mayReturnEmpty = True
       
  2132 
       
  2133         self.mayIndexError = False
       
  2134 
       
  2135 
       
  2136 
       
  2137 
       
  2138 
       
  2139 class NoMatch(Token):
       
  2140 
       
  2141     """A token that will never match."""
       
  2142 
       
  2143     def __init__( self ):
       
  2144 
       
  2145         super(NoMatch,self).__init__()
       
  2146 
       
  2147         self.name = "NoMatch"
       
  2148 
       
  2149         self.mayReturnEmpty = True
       
  2150 
       
  2151         self.mayIndexError = False
       
  2152 
       
  2153         self.errmsg = "Unmatchable token"
       
  2154 
       
  2155         self.myException.msg = self.errmsg
       
  2156 
       
  2157         
       
  2158 
       
  2159     def parseImpl( self, instring, loc, doActions=True ):
       
  2160 
       
  2161         exc = self.myException
       
  2162 
       
  2163         exc.loc = loc
       
  2164 
       
  2165         exc.pstr = instring
       
  2166 
       
  2167         raise exc
       
  2168 
       
  2169 
       
  2170 
       
  2171 
       
  2172 
       
  2173 class Literal(Token):
       
  2174 
       
  2175     """Token to exactly match a specified string."""
       
  2176 
       
  2177     def __init__( self, matchString ):
       
  2178 
       
  2179         super(Literal,self).__init__()
       
  2180 
       
  2181         self.match = matchString
       
  2182 
       
  2183         self.matchLen = len(matchString)
       
  2184 
       
  2185         try:
       
  2186 
       
  2187             self.firstMatchChar = matchString[0]
       
  2188 
       
  2189         except IndexError:
       
  2190 
       
  2191             warnings.warn("null string passed to Literal; use Empty() instead", 
       
  2192 
       
  2193                             SyntaxWarning, stacklevel=2)
       
  2194 
       
  2195             self.__class__ = Empty
       
  2196 
       
  2197         self.name = '"%s"' % self.match
       
  2198 
       
  2199         self.errmsg = "Expected " + self.name
       
  2200 
       
  2201         self.mayReturnEmpty = False
       
  2202 
       
  2203         self.myException.msg = self.errmsg
       
  2204 
       
  2205         self.mayIndexError = False
       
  2206 
       
  2207 
       
  2208 
       
  2209     # Performance tuning: this routine gets called a *lot*
       
  2210 
       
  2211     # if this is a single character match string  and the first character matches,
       
  2212 
       
  2213     # short-circuit as quickly as possible, and avoid calling startswith
       
  2214 
       
  2215     #~ @profile
       
  2216 
       
  2217     def parseImpl( self, instring, loc, doActions=True ):
       
  2218 
       
  2219         if (instring[loc] == self.firstMatchChar and
       
  2220 
       
  2221             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
       
  2222 
       
  2223             return loc+self.matchLen, self.match
       
  2224 
       
  2225         #~ raise ParseException( instring, loc, self.errmsg )
       
  2226 
       
  2227         exc = self.myException
       
  2228 
       
  2229         exc.loc = loc
       
  2230 
       
  2231         exc.pstr = instring
       
  2232 
       
  2233         raise exc
       
  2234 
       
  2235 
       
  2236 
       
  2237 class Keyword(Token):
       
  2238 
       
  2239     """Token to exactly match a specified string as a keyword, that is, it must be 
       
  2240 
       
  2241        immediately followed by a non-keyword character.  Compare with Literal::
       
  2242 
       
  2243          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
       
  2244 
       
  2245          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
       
  2246 
       
  2247        Accepts two optional constructor arguments in addition to the keyword string:
       
  2248 
       
  2249        identChars is a string of characters that would be valid identifier characters,
       
  2250 
       
  2251        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
       
  2252 
       
  2253        matching, default is False.
       
  2254 
       
  2255     """
       
  2256 
       
  2257     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
       
  2258 
       
  2259     
       
  2260 
       
  2261     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
       
  2262 
       
  2263         super(Keyword,self).__init__()
       
  2264 
       
  2265         self.match = matchString
       
  2266 
       
  2267         self.matchLen = len(matchString)
       
  2268 
       
  2269         try:
       
  2270 
       
  2271             self.firstMatchChar = matchString[0]
       
  2272 
       
  2273         except IndexError:
       
  2274 
       
  2275             warnings.warn("null string passed to Keyword; use Empty() instead", 
       
  2276 
       
  2277                             SyntaxWarning, stacklevel=2)
       
  2278 
       
  2279         self.name = '"%s"' % self.match
       
  2280 
       
  2281         self.errmsg = "Expected " + self.name
       
  2282 
       
  2283         self.mayReturnEmpty = False
       
  2284 
       
  2285         self.myException.msg = self.errmsg
       
  2286 
       
  2287         self.mayIndexError = False
       
  2288 
       
  2289         self.caseless = caseless
       
  2290 
       
  2291         if caseless:
       
  2292 
       
  2293             self.caselessmatch = matchString.upper()
       
  2294 
       
  2295             identChars = identChars.upper()
       
  2296 
       
  2297         self.identChars = _str2dict(identChars)
       
  2298 
       
  2299 
       
  2300 
       
  2301     def parseImpl( self, instring, loc, doActions=True ):
       
  2302 
       
  2303         if self.caseless:
       
  2304 
       
  2305             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
       
  2306 
       
  2307                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
       
  2308 
       
  2309                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
       
  2310 
       
  2311                 return loc+self.matchLen, self.match
       
  2312 
       
  2313         else:
       
  2314 
       
  2315             if (instring[loc] == self.firstMatchChar and
       
  2316 
       
  2317                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
       
  2318 
       
  2319                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
       
  2320 
       
  2321                 (loc == 0 or instring[loc-1] not in self.identChars) ):
       
  2322 
       
  2323                 return loc+self.matchLen, self.match
       
  2324 
       
  2325         #~ raise ParseException( instring, loc, self.errmsg )
       
  2326 
       
  2327         exc = self.myException
       
  2328 
       
  2329         exc.loc = loc
       
  2330 
       
  2331         exc.pstr = instring
       
  2332 
       
  2333         raise exc
       
  2334 
       
  2335         
       
  2336 
       
  2337     def copy(self):
       
  2338 
       
  2339         c = super(Keyword,self).copy()
       
  2340 
       
  2341         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
       
  2342 
       
  2343         return c
       
  2344 
       
  2345         
       
  2346 
       
  2347     def setDefaultKeywordChars( chars ):
       
  2348 
       
  2349         """Overrides the default Keyword chars
       
  2350 
       
  2351         """
       
  2352 
       
  2353         Keyword.DEFAULT_KEYWORD_CHARS = chars
       
  2354 
       
  2355     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)        
       
  2356 
       
  2357 
       
  2358 
       
  2359 
       
  2360 
       
  2361 class CaselessLiteral(Literal):
       
  2362 
       
  2363     """Token to match a specified string, ignoring case of letters.
       
  2364 
       
  2365        Note: the matched results will always be in the case of the given
       
  2366 
       
  2367        match string, NOT the case of the input text.
       
  2368 
       
  2369     """
       
  2370 
       
  2371     def __init__( self, matchString ):
       
  2372 
       
  2373         super(CaselessLiteral,self).__init__( matchString.upper() )
       
  2374 
       
  2375         # Preserve the defining literal.
       
  2376 
       
  2377         self.returnString = matchString
       
  2378 
       
  2379         self.name = "'%s'" % self.returnString
       
  2380 
       
  2381         self.errmsg = "Expected " + self.name
       
  2382 
       
  2383         self.myException.msg = self.errmsg
       
  2384 
       
  2385 
       
  2386 
       
  2387     def parseImpl( self, instring, loc, doActions=True ):
       
  2388 
       
  2389         if instring[ loc:loc+self.matchLen ].upper() == self.match:
       
  2390 
       
  2391             return loc+self.matchLen, self.returnString
       
  2392 
       
  2393         #~ raise ParseException( instring, loc, self.errmsg )
       
  2394 
       
  2395         exc = self.myException
       
  2396 
       
  2397         exc.loc = loc
       
  2398 
       
  2399         exc.pstr = instring
       
  2400 
       
  2401         raise exc
       
  2402 
       
  2403 
       
  2404 
       
  2405 class CaselessKeyword(Keyword):
       
  2406 
       
  2407     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
       
  2408 
       
  2409         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
       
  2410 
       
  2411 
       
  2412 
       
  2413     def parseImpl( self, instring, loc, doActions=True ):
       
  2414 
       
  2415         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
       
  2416 
       
  2417              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
       
  2418 
       
  2419             return loc+self.matchLen, self.match
       
  2420 
       
  2421         #~ raise ParseException( instring, loc, self.errmsg )
       
  2422 
       
  2423         exc = self.myException
       
  2424 
       
  2425         exc.loc = loc
       
  2426 
       
  2427         exc.pstr = instring
       
  2428 
       
  2429         raise exc
       
  2430 
       
  2431 
       
  2432 
       
  2433 class Word(Token):
       
  2434 
       
  2435     """Token for matching words composed of allowed character sets.
       
  2436 
       
  2437        Defined with string containing all allowed initial characters,
       
  2438 
       
  2439        an optional string containing allowed body characters (if omitted,
       
  2440 
       
  2441        defaults to the initial character set), and an optional minimum,
       
  2442 
       
  2443        maximum, and/or exact length.
       
  2444 
       
  2445     """
       
  2446 
       
  2447     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
       
  2448 
       
  2449         super(Word,self).__init__()
       
  2450 
       
  2451         self.initCharsOrig = initChars
       
  2452 
       
  2453         self.initChars = _str2dict(initChars)
       
  2454 
       
  2455         if bodyChars :
       
  2456 
       
  2457             self.bodyCharsOrig = bodyChars
       
  2458 
       
  2459             self.bodyChars = _str2dict(bodyChars)
       
  2460 
       
  2461         else:
       
  2462 
       
  2463             self.bodyCharsOrig = initChars
       
  2464 
       
  2465             self.bodyChars = _str2dict(initChars)
       
  2466 
       
  2467             
       
  2468 
       
  2469         self.maxSpecified = max > 0
       
  2470 
       
  2471 
       
  2472 
       
  2473         self.minLen = min
       
  2474 
       
  2475 
       
  2476 
       
  2477         if max > 0:
       
  2478 
       
  2479             self.maxLen = max
       
  2480 
       
  2481         else:
       
  2482 
       
  2483             self.maxLen = sys.maxint
       
  2484 
       
  2485 
       
  2486 
       
  2487         if exact > 0:
       
  2488 
       
  2489             self.maxLen = exact
       
  2490 
       
  2491             self.minLen = exact
       
  2492 
       
  2493 
       
  2494 
       
  2495         self.name = _ustr(self)
       
  2496 
       
  2497         self.errmsg = "Expected " + self.name
       
  2498 
       
  2499         self.myException.msg = self.errmsg
       
  2500 
       
  2501         self.mayIndexError = False
       
  2502 
       
  2503         
       
  2504 
       
  2505         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
       
  2506 
       
  2507             if self.bodyCharsOrig == self.initCharsOrig:
       
  2508 
       
  2509                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
       
  2510 
       
  2511             elif len(self.bodyCharsOrig) == 1:
       
  2512 
       
  2513                 self.reString = "%s[%s]*" % \
       
  2514                                       (re.escape(self.initCharsOrig),
       
  2515 
       
  2516                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
       
  2517 
       
  2518             else:
       
  2519 
       
  2520                 self.reString = "[%s][%s]*" % \
       
  2521                                       (_escapeRegexRangeChars(self.initCharsOrig),
       
  2522 
       
  2523                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
       
  2524 
       
  2525             try:
       
  2526 
       
  2527                 self.re = re.compile( self.reString )
       
  2528 
       
  2529             except:
       
  2530 
       
  2531                 self.re = None
       
  2532 
       
  2533         
       
  2534 
       
  2535     def parseImpl( self, instring, loc, doActions=True ):
       
  2536 
       
  2537         if self.re:
       
  2538 
       
  2539             result = self.re.match(instring,loc)
       
  2540 
       
  2541             if not result:
       
  2542 
       
  2543                 exc = self.myException
       
  2544 
       
  2545                 exc.loc = loc
       
  2546 
       
  2547                 exc.pstr = instring
       
  2548 
       
  2549                 raise exc
       
  2550 
       
  2551             
       
  2552 
       
  2553             loc = result.end()
       
  2554 
       
  2555             return loc,result.group()
       
  2556 
       
  2557         
       
  2558 
       
  2559         if not(instring[ loc ] in self.initChars):
       
  2560 
       
  2561             #~ raise ParseException( instring, loc, self.errmsg )
       
  2562 
       
  2563             exc = self.myException
       
  2564 
       
  2565             exc.loc = loc
       
  2566 
       
  2567             exc.pstr = instring
       
  2568 
       
  2569             raise exc
       
  2570 
       
  2571         start = loc
       
  2572 
       
  2573         loc += 1
       
  2574 
       
  2575         instrlen = len(instring)
       
  2576 
       
  2577         bodychars = self.bodyChars
       
  2578 
       
  2579         maxloc = start + self.maxLen
       
  2580 
       
  2581         maxloc = min( maxloc, instrlen )
       
  2582 
       
  2583         while loc < maxloc and instring[loc] in bodychars:
       
  2584 
       
  2585             loc += 1
       
  2586 
       
  2587             
       
  2588 
       
  2589         throwException = False
       
  2590 
       
  2591         if loc - start < self.minLen:
       
  2592 
       
  2593             throwException = True
       
  2594 
       
  2595         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
       
  2596 
       
  2597             throwException = True
       
  2598 
       
  2599 
       
  2600 
       
  2601         if throwException:
       
  2602 
       
  2603             #~ raise ParseException( instring, loc, self.errmsg )
       
  2604 
       
  2605             exc = self.myException
       
  2606 
       
  2607             exc.loc = loc
       
  2608 
       
  2609             exc.pstr = instring
       
  2610 
       
  2611             raise exc
       
  2612 
       
  2613 
       
  2614 
       
  2615         return loc, instring[start:loc]
       
  2616 
       
  2617 
       
  2618 
       
  2619     def __str__( self ):
       
  2620 
       
  2621         try:
       
  2622 
       
  2623             return super(Word,self).__str__()
       
  2624 
       
  2625         except:
       
  2626 
       
  2627             pass
       
  2628 
       
  2629 
       
  2630 
       
  2631             
       
  2632 
       
  2633         if self.strRepr is None:
       
  2634 
       
  2635             
       
  2636 
       
  2637             def charsAsStr(s):
       
  2638 
       
  2639                 if len(s)>4:
       
  2640 
       
  2641                     return s[:4]+"..."
       
  2642 
       
  2643                 else:
       
  2644 
       
  2645                     return s
       
  2646 
       
  2647             
       
  2648 
       
  2649             if ( self.initCharsOrig != self.bodyCharsOrig ):
       
  2650 
       
  2651                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
       
  2652 
       
  2653             else:
       
  2654 
       
  2655                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
       
  2656 
       
  2657 
       
  2658 
       
  2659         return self.strRepr
       
  2660 
       
  2661 
       
  2662 
       
  2663 
       
  2664 
       
  2665 class Regex(Token):
       
  2666 
       
  2667     """Token for matching strings that match a given regular expression.
       
  2668 
       
  2669        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
       
  2670 
       
  2671     """
       
  2672 
       
  2673     def __init__( self, pattern, flags=0):
       
  2674 
       
  2675         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
       
  2676 
       
  2677         super(Regex,self).__init__()
       
  2678 
       
  2679         
       
  2680 
       
  2681         if len(pattern) == 0:
       
  2682 
       
  2683             warnings.warn("null string passed to Regex; use Empty() instead", 
       
  2684 
       
  2685                     SyntaxWarning, stacklevel=2)
       
  2686 
       
  2687     
       
  2688 
       
  2689         self.pattern = pattern
       
  2690 
       
  2691         self.flags = flags
       
  2692 
       
  2693         
       
  2694 
       
  2695         try:
       
  2696 
       
  2697             self.re = re.compile(self.pattern, self.flags)
       
  2698 
       
  2699             self.reString = self.pattern
       
  2700 
       
  2701         except sre_constants.error,e:
       
  2702 
       
  2703             warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
       
  2704 
       
  2705                 SyntaxWarning, stacklevel=2)
       
  2706 
       
  2707             raise
       
  2708 
       
  2709 
       
  2710 
       
  2711         self.name = _ustr(self)
       
  2712 
       
  2713         self.errmsg = "Expected " + self.name
       
  2714 
       
  2715         self.myException.msg = self.errmsg
       
  2716 
       
  2717         self.mayIndexError = False
       
  2718 
       
  2719         self.mayReturnEmpty = True
       
  2720 
       
  2721     
       
  2722 
       
  2723     def parseImpl( self, instring, loc, doActions=True ):
       
  2724 
       
  2725         result = self.re.match(instring,loc)
       
  2726 
       
  2727         if not result:
       
  2728 
       
  2729             exc = self.myException
       
  2730 
       
  2731             exc.loc = loc
       
  2732 
       
  2733             exc.pstr = instring
       
  2734 
       
  2735             raise exc
       
  2736 
       
  2737         
       
  2738 
       
  2739         loc = result.end()
       
  2740 
       
  2741         d = result.groupdict()
       
  2742 
       
  2743         ret = ParseResults(result.group())
       
  2744 
       
  2745         if d:
       
  2746 
       
  2747             for k in d.keys():
       
  2748 
       
  2749                 ret[k] = d[k]
       
  2750 
       
  2751         return loc,ret
       
  2752 
       
  2753     
       
  2754 
       
  2755     def __str__( self ):
       
  2756 
       
  2757         try:
       
  2758 
       
  2759             return super(Regex,self).__str__()
       
  2760 
       
  2761         except:
       
  2762 
       
  2763             pass
       
  2764 
       
  2765         
       
  2766 
       
  2767         if self.strRepr is None:
       
  2768 
       
  2769             self.strRepr = "Re:(%s)" % repr(self.pattern)
       
  2770 
       
  2771         
       
  2772 
       
  2773         return self.strRepr
       
  2774 
       
  2775 
       
  2776 
       
  2777 
       
  2778 
       
  2779 class QuotedString(Token):
       
  2780 
       
  2781     """Token for matching strings that are delimited by quoting characters.
       
  2782 
       
  2783     """
       
  2784 
       
  2785     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
       
  2786 
       
  2787         """
       
  2788 
       
  2789            Defined with the following parameters:
       
  2790 
       
  2791            - quoteChar - string of one or more characters defining the quote delimiting string
       
  2792 
       
  2793            - escChar - character to escape quotes, typically backslash (default=None)
       
  2794 
       
  2795            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
       
  2796 
       
  2797            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
       
  2798 
       
  2799            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
       
  2800 
       
  2801            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
       
  2802 
       
  2803         """
       
  2804 
       
  2805         super(QuotedString,self).__init__()
       
  2806 
       
  2807         
       
  2808 
       
  2809         # remove white space from quote chars - wont work anyway
       
  2810 
       
  2811         quoteChar = quoteChar.strip()
       
  2812 
       
  2813         if len(quoteChar) == 0:
       
  2814 
       
  2815             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
       
  2816 
       
  2817             raise SyntaxError()
       
  2818 
       
  2819         
       
  2820 
       
  2821         if endQuoteChar is None:
       
  2822 
       
  2823             endQuoteChar = quoteChar
       
  2824 
       
  2825         else:
       
  2826 
       
  2827             endQuoteChar = endQuoteChar.strip()
       
  2828 
       
  2829             if len(endQuoteChar) == 0:
       
  2830 
       
  2831                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
       
  2832 
       
  2833                 raise SyntaxError()
       
  2834 
       
  2835         
       
  2836 
       
  2837         self.quoteChar = quoteChar
       
  2838 
       
  2839         self.quoteCharLen = len(quoteChar)
       
  2840 
       
  2841         self.firstQuoteChar = quoteChar[0]
       
  2842 
       
  2843         self.endQuoteChar = endQuoteChar
       
  2844 
       
  2845         self.endQuoteCharLen = len(endQuoteChar)
       
  2846 
       
  2847         self.escChar = escChar
       
  2848 
       
  2849         self.escQuote = escQuote
       
  2850 
       
  2851         self.unquoteResults = unquoteResults
       
  2852 
       
  2853         
       
  2854 
       
  2855         if multiline:
       
  2856 
       
  2857             self.flags = re.MULTILINE | re.DOTALL
       
  2858 
       
  2859             self.pattern = r'%s(?:[^%s%s]' % \
       
  2860                 ( re.escape(self.quoteChar),
       
  2861 
       
  2862                   _escapeRegexRangeChars(self.endQuoteChar[0]),
       
  2863 
       
  2864                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
       
  2865 
       
  2866         else:
       
  2867 
       
  2868             self.flags = 0
       
  2869 
       
  2870             self.pattern = r'%s(?:[^%s\n\r%s]' % \
       
  2871                 ( re.escape(self.quoteChar),
       
  2872 
       
  2873                   _escapeRegexRangeChars(self.endQuoteChar[0]),
       
  2874 
       
  2875                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
       
  2876 
       
  2877         if len(self.endQuoteChar) > 1:
       
  2878 
       
  2879             self.pattern += (
       
  2880 
       
  2881                 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
       
  2882 
       
  2883                                                _escapeRegexRangeChars(self.endQuoteChar[i])) 
       
  2884 
       
  2885                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
       
  2886 
       
  2887                 )
       
  2888 
       
  2889         if escQuote:
       
  2890 
       
  2891             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
       
  2892 
       
  2893         if escChar:
       
  2894 
       
  2895             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
       
  2896 
       
  2897             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
       
  2898 
       
  2899         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
       
  2900 
       
  2901         
       
  2902 
       
  2903         try:
       
  2904 
       
  2905             self.re = re.compile(self.pattern, self.flags)
       
  2906 
       
  2907             self.reString = self.pattern
       
  2908 
       
  2909         except sre_constants.error,e:
       
  2910 
       
  2911             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
       
  2912 
       
  2913                 SyntaxWarning, stacklevel=2)
       
  2914 
       
  2915             raise
       
  2916 
       
  2917 
       
  2918 
       
  2919         self.name = _ustr(self)
       
  2920 
       
  2921         self.errmsg = "Expected " + self.name
       
  2922 
       
  2923         self.myException.msg = self.errmsg
       
  2924 
       
  2925         self.mayIndexError = False
       
  2926 
       
  2927         self.mayReturnEmpty = True
       
  2928 
       
  2929     
       
  2930 
       
  2931     def parseImpl( self, instring, loc, doActions=True ):
       
  2932 
       
  2933         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
       
  2934 
       
  2935         if not result:
       
  2936 
       
  2937             exc = self.myException
       
  2938 
       
  2939             exc.loc = loc
       
  2940 
       
  2941             exc.pstr = instring
       
  2942 
       
  2943             raise exc
       
  2944 
       
  2945         
       
  2946 
       
  2947         loc = result.end()
       
  2948 
       
  2949         ret = result.group()
       
  2950 
       
  2951         
       
  2952 
       
  2953         if self.unquoteResults:
       
  2954 
       
  2955             
       
  2956 
       
  2957             # strip off quotes
       
  2958 
       
  2959             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
       
  2960 
       
  2961                 
       
  2962 
       
  2963             if isinstance(ret,basestring):
       
  2964 
       
  2965                 # replace escaped characters
       
  2966 
       
  2967                 if self.escChar:
       
  2968 
       
  2969                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
       
  2970 
       
  2971 
       
  2972 
       
  2973                 # replace escaped quotes
       
  2974 
       
  2975                 if self.escQuote:
       
  2976 
       
  2977                     ret = ret.replace(self.escQuote, self.endQuoteChar)
       
  2978 
       
  2979 
       
  2980 
       
  2981         return loc, ret
       
  2982 
       
  2983     
       
  2984 
       
  2985     def __str__( self ):
       
  2986 
       
  2987         try:
       
  2988 
       
  2989             return super(QuotedString,self).__str__()
       
  2990 
       
  2991         except:
       
  2992 
       
  2993             pass
       
  2994 
       
  2995         
       
  2996 
       
  2997         if self.strRepr is None:
       
  2998 
       
  2999             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
       
  3000 
       
  3001         
       
  3002 
       
  3003         return self.strRepr
       
  3004 
       
  3005 
       
  3006 
       
  3007 
       
  3008 
       
  3009 class CharsNotIn(Token):
       
  3010 
       
  3011     """Token for matching words composed of characters *not* in a given set.
       
  3012 
       
  3013        Defined with string containing all disallowed characters, and an optional 
       
  3014 
       
  3015        minimum, maximum, and/or exact length.
       
  3016 
       
  3017     """
       
  3018 
       
  3019     def __init__( self, notChars, min=1, max=0, exact=0 ):
       
  3020 
       
  3021         super(CharsNotIn,self).__init__()
       
  3022 
       
  3023         self.skipWhitespace = False
       
  3024 
       
  3025         self.notChars = notChars
       
  3026 
       
  3027         
       
  3028 
       
  3029         self.minLen = min
       
  3030 
       
  3031 
       
  3032 
       
  3033         if max > 0:
       
  3034 
       
  3035             self.maxLen = max
       
  3036 
       
  3037         else:
       
  3038 
       
  3039             self.maxLen = sys.maxint
       
  3040 
       
  3041 
       
  3042 
       
  3043         if exact > 0:
       
  3044 
       
  3045             self.maxLen = exact
       
  3046 
       
  3047             self.minLen = exact
       
  3048 
       
  3049         
       
  3050 
       
  3051         self.name = _ustr(self)
       
  3052 
       
  3053         self.errmsg = "Expected " + self.name
       
  3054 
       
  3055         self.mayReturnEmpty = ( self.minLen == 0 )
       
  3056 
       
  3057         self.myException.msg = self.errmsg
       
  3058 
       
  3059         self.mayIndexError = False
       
  3060 
       
  3061 
       
  3062 
       
  3063     def parseImpl( self, instring, loc, doActions=True ):
       
  3064 
       
  3065         if instring[loc] in self.notChars:
       
  3066 
       
  3067             #~ raise ParseException( instring, loc, self.errmsg )
       
  3068 
       
  3069             exc = self.myException
       
  3070 
       
  3071             exc.loc = loc
       
  3072 
       
  3073             exc.pstr = instring
       
  3074 
       
  3075             raise exc
       
  3076 
       
  3077             
       
  3078 
       
  3079         start = loc
       
  3080 
       
  3081         loc += 1
       
  3082 
       
  3083         notchars = self.notChars
       
  3084 
       
  3085         maxlen = min( start+self.maxLen, len(instring) )
       
  3086 
       
  3087         while loc < maxlen and (instring[loc] not in notchars):
       
  3088 
       
  3089             loc += 1
       
  3090 
       
  3091 
       
  3092 
       
  3093         if loc - start < self.minLen:
       
  3094 
       
  3095             #~ raise ParseException( instring, loc, self.errmsg )
       
  3096 
       
  3097             exc = self.myException
       
  3098 
       
  3099             exc.loc = loc
       
  3100 
       
  3101             exc.pstr = instring
       
  3102 
       
  3103             raise exc
       
  3104 
       
  3105 
       
  3106 
       
  3107         return loc, instring[start:loc]
       
  3108 
       
  3109 
       
  3110 
       
  3111     def __str__( self ):
       
  3112 
       
  3113         try:
       
  3114 
       
  3115             return super(CharsNotIn, self).__str__()
       
  3116 
       
  3117         except:
       
  3118 
       
  3119             pass
       
  3120 
       
  3121 
       
  3122 
       
  3123         if self.strRepr is None:
       
  3124 
       
  3125             if len(self.notChars) > 4:
       
  3126 
       
  3127                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
       
  3128 
       
  3129             else:
       
  3130 
       
  3131                 self.strRepr = "!W:(%s)" % self.notChars
       
  3132 
       
  3133         
       
  3134 
       
  3135         return self.strRepr
       
  3136 
       
  3137 
       
  3138 
       
  3139 class White(Token):
       
  3140 
       
  3141     """Special matching class for matching whitespace.  Normally, whitespace is ignored
       
  3142 
       
  3143        by pyparsing grammars.  This class is included when some whitespace structures
       
  3144 
       
  3145        are significant.  Define with a string containing the whitespace characters to be
       
  3146 
       
  3147        matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
       
  3148 
       
  3149        as defined for the Word class."""
       
  3150 
       
  3151     whiteStrs = {
       
  3152 
       
  3153         " " : "<SPC>",
       
  3154 
       
  3155         "\t": "<TAB>",
       
  3156 
       
  3157         "\n": "<LF>",
       
  3158 
       
  3159         "\r": "<CR>",
       
  3160 
       
  3161         "\f": "<FF>",
       
  3162 
       
  3163         }
       
  3164 
       
  3165     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
       
  3166 
       
  3167         super(White,self).__init__()
       
  3168 
       
  3169         self.matchWhite = ws
       
  3170 
       
  3171         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
       
  3172 
       
  3173         #~ self.leaveWhitespace()
       
  3174 
       
  3175         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
       
  3176 
       
  3177         self.mayReturnEmpty = True
       
  3178 
       
  3179         self.errmsg = "Expected " + self.name
       
  3180 
       
  3181         self.myException.msg = self.errmsg
       
  3182 
       
  3183 
       
  3184 
       
  3185         self.minLen = min
       
  3186 
       
  3187 
       
  3188 
       
  3189         if max > 0:
       
  3190 
       
  3191             self.maxLen = max
       
  3192 
       
  3193         else:
       
  3194 
       
  3195             self.maxLen = sys.maxint
       
  3196 
       
  3197 
       
  3198 
       
  3199         if exact > 0:
       
  3200 
       
  3201             self.maxLen = exact
       
  3202 
       
  3203             self.minLen = exact
       
  3204 
       
  3205             
       
  3206 
       
  3207     def parseImpl( self, instring, loc, doActions=True ):
       
  3208 
       
  3209         if not(instring[ loc ] in self.matchWhite):
       
  3210 
       
  3211             #~ raise ParseException( instring, loc, self.errmsg )
       
  3212 
       
  3213             exc = self.myException
       
  3214 
       
  3215             exc.loc = loc
       
  3216 
       
  3217             exc.pstr = instring
       
  3218 
       
  3219             raise exc
       
  3220 
       
  3221         start = loc
       
  3222 
       
  3223         loc += 1
       
  3224 
       
  3225         maxloc = start + self.maxLen
       
  3226 
       
  3227         maxloc = min( maxloc, len(instring) )
       
  3228 
       
  3229         while loc < maxloc and instring[loc] in self.matchWhite:
       
  3230 
       
  3231             loc += 1
       
  3232 
       
  3233 
       
  3234 
       
  3235         if loc - start < self.minLen:
       
  3236 
       
  3237             #~ raise ParseException( instring, loc, self.errmsg )
       
  3238 
       
  3239             exc = self.myException
       
  3240 
       
  3241             exc.loc = loc
       
  3242 
       
  3243             exc.pstr = instring
       
  3244 
       
  3245             raise exc
       
  3246 
       
  3247 
       
  3248 
       
  3249         return loc, instring[start:loc]
       
  3250 
       
  3251 
       
  3252 
       
  3253 
       
  3254 
       
  3255 class PositionToken(Token):
       
  3256 
       
  3257     def __init__( self ):
       
  3258 
       
  3259         super(PositionToken,self).__init__()
       
  3260 
       
  3261         self.name=self.__class__.__name__
       
  3262 
       
  3263         self.mayReturnEmpty = True
       
  3264 
       
  3265         self.mayIndexError = False
       
  3266 
       
  3267 
       
  3268 
       
  3269 class GoToColumn(PositionToken):
       
  3270 
       
  3271     """Token to advance to a specific column of input text; useful for tabular report scraping."""
       
  3272 
       
  3273     def __init__( self, colno ):
       
  3274 
       
  3275         super(GoToColumn,self).__init__()
       
  3276 
       
  3277         self.col = colno
       
  3278 
       
  3279 
       
  3280 
       
  3281     def preParse( self, instring, loc ):
       
  3282 
       
  3283         if col(loc,instring) != self.col:
       
  3284 
       
  3285             instrlen = len(instring)
       
  3286 
       
  3287             if self.ignoreExprs:
       
  3288 
       
  3289                 loc = self.skipIgnorables( instring, loc )
       
  3290 
       
  3291             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
       
  3292 
       
  3293                 loc += 1
       
  3294 
       
  3295         return loc
       
  3296 
       
  3297 
       
  3298 
       
  3299     def parseImpl( self, instring, loc, doActions=True ):
       
  3300 
       
  3301         thiscol = col( loc, instring )
       
  3302 
       
  3303         if thiscol > self.col:
       
  3304 
       
  3305             raise ParseException( instring, loc, "Text not in expected column", self )
       
  3306 
       
  3307         newloc = loc + self.col - thiscol
       
  3308 
       
  3309         ret = instring[ loc: newloc ]
       
  3310 
       
  3311         return newloc, ret
       
  3312 
       
  3313 
       
  3314 
       
  3315 class LineStart(PositionToken):
       
  3316 
       
  3317     """Matches if current position is at the beginning of a line within the parse string"""
       
  3318 
       
  3319     def __init__( self ):
       
  3320 
       
  3321         super(LineStart,self).__init__()
       
  3322 
       
  3323         self.setWhitespaceChars( " \t" )
       
  3324 
       
  3325         self.errmsg = "Expected start of line"
       
  3326 
       
  3327         self.myException.msg = self.errmsg
       
  3328 
       
  3329 
       
  3330 
       
  3331     def preParse( self, instring, loc ):
       
  3332 
       
  3333         preloc = super(LineStart,self).preParse(instring,loc)
       
  3334 
       
  3335         if instring[preloc] == "\n":
       
  3336 
       
  3337             loc += 1
       
  3338 
       
  3339         return loc
       
  3340 
       
  3341 
       
  3342 
       
  3343     def parseImpl( self, instring, loc, doActions=True ):
       
  3344 
       
  3345         if not( loc==0 or
       
  3346 
       
  3347             (loc == self.preParse( instring, 0 )) or
       
  3348 
       
  3349             (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
       
  3350 
       
  3351             #~ raise ParseException( instring, loc, "Expected start of line" )
       
  3352 
       
  3353             exc = self.myException
       
  3354 
       
  3355             exc.loc = loc
       
  3356 
       
  3357             exc.pstr = instring
       
  3358 
       
  3359             raise exc
       
  3360 
       
  3361         return loc, []
       
  3362 
       
  3363 
       
  3364 
       
  3365 class LineEnd(PositionToken):
       
  3366 
       
  3367     """Matches if current position is at the end of a line within the parse string"""
       
  3368 
       
  3369     def __init__( self ):
       
  3370 
       
  3371         super(LineEnd,self).__init__()
       
  3372 
       
  3373         self.setWhitespaceChars( " \t" )
       
  3374 
       
  3375         self.errmsg = "Expected end of line"
       
  3376 
       
  3377         self.myException.msg = self.errmsg
       
  3378 
       
  3379     
       
  3380 
       
  3381     def parseImpl( self, instring, loc, doActions=True ):
       
  3382 
       
  3383         if loc<len(instring):
       
  3384 
       
  3385             if instring[loc] == "\n":
       
  3386 
       
  3387                 return loc+1, "\n"
       
  3388 
       
  3389             else:
       
  3390 
       
  3391                 #~ raise ParseException( instring, loc, "Expected end of line" )
       
  3392 
       
  3393                 exc = self.myException
       
  3394 
       
  3395                 exc.loc = loc
       
  3396 
       
  3397                 exc.pstr = instring
       
  3398 
       
  3399                 raise exc
       
  3400 
       
  3401         elif loc == len(instring):
       
  3402 
       
  3403             return loc+1, []
       
  3404 
       
  3405         else:
       
  3406 
       
  3407             exc = self.myException
       
  3408 
       
  3409             exc.loc = loc
       
  3410 
       
  3411             exc.pstr = instring
       
  3412 
       
  3413             raise exc
       
  3414 
       
  3415 
       
  3416 
       
  3417 class StringStart(PositionToken):
       
  3418 
       
  3419     """Matches if current position is at the beginning of the parse string"""
       
  3420 
       
  3421     def __init__( self ):
       
  3422 
       
  3423         super(StringStart,self).__init__()
       
  3424 
       
  3425         self.errmsg = "Expected start of text"
       
  3426 
       
  3427         self.myException.msg = self.errmsg
       
  3428 
       
  3429     
       
  3430 
       
  3431     def parseImpl( self, instring, loc, doActions=True ):
       
  3432 
       
  3433         if loc != 0:
       
  3434 
       
  3435             # see if entire string up to here is just whitespace and ignoreables
       
  3436 
       
  3437             if loc != self.preParse( instring, 0 ):
       
  3438 
       
  3439                 #~ raise ParseException( instring, loc, "Expected start of text" )
       
  3440 
       
  3441                 exc = self.myException
       
  3442 
       
  3443                 exc.loc = loc
       
  3444 
       
  3445                 exc.pstr = instring
       
  3446 
       
  3447                 raise exc
       
  3448 
       
  3449         return loc, []
       
  3450 
       
  3451 
       
  3452 
       
  3453 class StringEnd(PositionToken):
       
  3454 
       
  3455     """Matches if current position is at the end of the parse string"""
       
  3456 
       
  3457     def __init__( self ):
       
  3458 
       
  3459         super(StringEnd,self).__init__()
       
  3460 
       
  3461         self.errmsg = "Expected end of text"
       
  3462 
       
  3463         self.myException.msg = self.errmsg
       
  3464 
       
  3465     
       
  3466 
       
  3467     def parseImpl( self, instring, loc, doActions=True ):
       
  3468 
       
  3469         if loc < len(instring):
       
  3470 
       
  3471             #~ raise ParseException( instring, loc, "Expected end of text" )
       
  3472 
       
  3473             exc = self.myException
       
  3474 
       
  3475             exc.loc = loc
       
  3476 
       
  3477             exc.pstr = instring
       
  3478 
       
  3479             raise exc
       
  3480 
       
  3481         elif loc == len(instring):
       
  3482 
       
  3483             return loc+1, []
       
  3484 
       
  3485         else:
       
  3486 
       
  3487             exc = self.myException
       
  3488 
       
  3489             exc.loc = loc
       
  3490 
       
  3491             exc.pstr = instring
       
  3492 
       
  3493             raise exc
       
  3494 
       
  3495 
       
  3496 
       
  3497 
       
  3498 
       
  3499 class ParseExpression(ParserElement):
       
  3500 
       
  3501     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
       
  3502 
       
  3503     def __init__( self, exprs, savelist = False ):
       
  3504 
       
  3505         super(ParseExpression,self).__init__(savelist)
       
  3506 
       
  3507         if isinstance( exprs, list ):
       
  3508 
       
  3509             self.exprs = exprs
       
  3510 
       
  3511         elif isinstance( exprs, basestring ):
       
  3512 
       
  3513             self.exprs = [ Literal( exprs ) ]
       
  3514 
       
  3515         else:
       
  3516 
       
  3517             self.exprs = [ exprs ]
       
  3518 
       
  3519 
       
  3520 
       
  3521     def __getitem__( self, i ):
       
  3522 
       
  3523         return self.exprs[i]
       
  3524 
       
  3525 
       
  3526 
       
  3527     def append( self, other ):
       
  3528 
       
  3529         self.exprs.append( other )
       
  3530 
       
  3531         self.strRepr = None
       
  3532 
       
  3533         return self
       
  3534 
       
  3535 
       
  3536 
       
  3537     def leaveWhitespace( self ):
       
  3538 
       
  3539         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
       
  3540 
       
  3541            all contained expressions."""
       
  3542 
       
  3543         self.skipWhitespace = False
       
  3544 
       
  3545         self.exprs = [ e.copy() for e in self.exprs ]
       
  3546 
       
  3547         for e in self.exprs:
       
  3548 
       
  3549             e.leaveWhitespace()
       
  3550 
       
  3551         return self
       
  3552 
       
  3553 
       
  3554 
       
  3555     def ignore( self, other ):
       
  3556 
       
  3557         if isinstance( other, Suppress ):
       
  3558 
       
  3559             if other not in self.ignoreExprs:
       
  3560 
       
  3561                 super( ParseExpression, self).ignore( other )
       
  3562 
       
  3563                 for e in self.exprs:
       
  3564 
       
  3565                     e.ignore( self.ignoreExprs[-1] )
       
  3566 
       
  3567         else:
       
  3568 
       
  3569             super( ParseExpression, self).ignore( other )
       
  3570 
       
  3571             for e in self.exprs:
       
  3572 
       
  3573                 e.ignore( self.ignoreExprs[-1] )
       
  3574 
       
  3575         return self
       
  3576 
       
  3577 
       
  3578 
       
  3579     def __str__( self ):
       
  3580 
       
  3581         try:
       
  3582 
       
  3583             return super(ParseExpression,self).__str__()
       
  3584 
       
  3585         except:
       
  3586 
       
  3587             pass
       
  3588 
       
  3589             
       
  3590 
       
  3591         if self.strRepr is None:
       
  3592 
       
  3593             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
       
  3594 
       
  3595         return self.strRepr
       
  3596 
       
  3597 
       
  3598 
       
  3599     def streamline( self ):
       
  3600 
       
  3601         super(ParseExpression,self).streamline()
       
  3602 
       
  3603 
       
  3604 
       
  3605         for e in self.exprs:
       
  3606 
       
  3607             e.streamline()
       
  3608 
       
  3609 
       
  3610 
       
  3611         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
       
  3612 
       
  3613         # but only if there are no parse actions or resultsNames on the nested And's
       
  3614 
       
  3615         # (likewise for Or's and MatchFirst's)
       
  3616 
       
  3617         if ( len(self.exprs) == 2 ):
       
  3618 
       
  3619             other = self.exprs[0]
       
  3620 
       
  3621             if ( isinstance( other, self.__class__ ) and
       
  3622 
       
  3623                   not(other.parseAction) and
       
  3624 
       
  3625                   other.resultsName is None and
       
  3626 
       
  3627                   not other.debug ):
       
  3628 
       
  3629                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
       
  3630 
       
  3631                 self.strRepr = None
       
  3632 
       
  3633                 self.mayReturnEmpty |= other.mayReturnEmpty
       
  3634 
       
  3635                 self.mayIndexError  |= other.mayIndexError
       
  3636 
       
  3637 
       
  3638 
       
  3639             other = self.exprs[-1]
       
  3640 
       
  3641             if ( isinstance( other, self.__class__ ) and
       
  3642 
       
  3643                   not(other.parseAction) and
       
  3644 
       
  3645                   other.resultsName is None and
       
  3646 
       
  3647                   not other.debug ):
       
  3648 
       
  3649                 self.exprs = self.exprs[:-1] + other.exprs[:]
       
  3650 
       
  3651                 self.strRepr = None
       
  3652 
       
  3653                 self.mayReturnEmpty |= other.mayReturnEmpty
       
  3654 
       
  3655                 self.mayIndexError  |= other.mayIndexError
       
  3656 
       
  3657 
       
  3658 
       
  3659         return self
       
  3660 
       
  3661 
       
  3662 
       
  3663     def setResultsName( self, name, listAllMatches=False ):
       
  3664 
       
  3665         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
       
  3666 
       
  3667         return ret
       
  3668 
       
  3669     
       
  3670 
       
  3671     def validate( self, validateTrace=[] ):
       
  3672 
       
  3673         tmp = validateTrace[:]+[self]
       
  3674 
       
  3675         for e in self.exprs:
       
  3676 
       
  3677             e.validate(tmp)
       
  3678 
       
  3679         self.checkRecursion( [] )
       
  3680 
       
  3681 
       
  3682 
       
  3683 class And(ParseExpression):
       
  3684 
       
  3685     """Requires all given ParseExpressions to be found in the given order.
       
  3686 
       
  3687        Expressions may be separated by whitespace.
       
  3688 
       
  3689        May be constructed using the '+' operator.
       
  3690 
       
  3691     """
       
  3692 
       
  3693     def __init__( self, exprs, savelist = True ):
       
  3694 
       
  3695         super(And,self).__init__(exprs, savelist)
       
  3696 
       
  3697         self.mayReturnEmpty = True
       
  3698 
       
  3699         for e in self.exprs:
       
  3700 
       
  3701             if not e.mayReturnEmpty:
       
  3702 
       
  3703                 self.mayReturnEmpty = False
       
  3704 
       
  3705                 break
       
  3706 
       
  3707         self.setWhitespaceChars( exprs[0].whiteChars )
       
  3708 
       
  3709         self.skipWhitespace = exprs[0].skipWhitespace
       
  3710 
       
  3711 
       
  3712 
       
  3713     def parseImpl( self, instring, loc, doActions=True ):
       
  3714 
       
  3715         # pass False as last arg to _parse for first element, since we already
       
  3716 
       
  3717         # pre-parsed the string as part of our And pre-parsing
       
  3718 
       
  3719         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
       
  3720 
       
  3721         for e in self.exprs[1:]:
       
  3722 
       
  3723             loc, exprtokens = e._parse( instring, loc, doActions )
       
  3724 
       
  3725             if exprtokens or exprtokens.keys():
       
  3726 
       
  3727                 resultlist += exprtokens
       
  3728 
       
  3729         return loc, resultlist
       
  3730 
       
  3731 
       
  3732 
       
  3733     def __iadd__(self, other ):
       
  3734 
       
  3735         if isinstance( other, basestring ):
       
  3736 
       
  3737             other = Literal( other )
       
  3738 
       
  3739         return self.append( other ) #And( [ self, other ] )
       
  3740 
       
  3741         
       
  3742 
       
  3743     def checkRecursion( self, parseElementList ):
       
  3744 
       
  3745         subRecCheckList = parseElementList[:] + [ self ]
       
  3746 
       
  3747         for e in self.exprs:
       
  3748 
       
  3749             e.checkRecursion( subRecCheckList )
       
  3750 
       
  3751             if not e.mayReturnEmpty:
       
  3752 
       
  3753                 break
       
  3754 
       
  3755                 
       
  3756 
       
  3757     def __str__( self ):
       
  3758 
       
  3759         if hasattr(self,"name"):
       
  3760 
       
  3761             return self.name
       
  3762 
       
  3763             
       
  3764 
       
  3765         if self.strRepr is None:
       
  3766 
       
  3767             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
       
  3768 
       
  3769         
       
  3770 
       
  3771         return self.strRepr
       
  3772 
       
  3773     
       
  3774 
       
  3775 
       
  3776 
       
  3777 class Or(ParseExpression):
       
  3778 
       
  3779     """Requires that at least one ParseExpression is found.
       
  3780 
       
  3781        If two expressions match, the expression that matches the longest string will be used.
       
  3782 
       
  3783        May be constructed using the '^' operator.
       
  3784 
       
  3785     """
       
  3786 
       
  3787     def __init__( self, exprs, savelist = False ):
       
  3788 
       
  3789         super(Or,self).__init__(exprs, savelist)
       
  3790 
       
  3791         self.mayReturnEmpty = False
       
  3792 
       
  3793         for e in self.exprs:
       
  3794 
       
  3795             if e.mayReturnEmpty:
       
  3796 
       
  3797                 self.mayReturnEmpty = True
       
  3798 
       
  3799                 break
       
  3800 
       
  3801     
       
  3802 
       
  3803     def parseImpl( self, instring, loc, doActions=True ):
       
  3804 
       
  3805         maxExcLoc = -1
       
  3806 
       
  3807         maxMatchLoc = -1
       
  3808 
       
  3809         for e in self.exprs:
       
  3810 
       
  3811             try:
       
  3812 
       
  3813                 loc2 = e.tryParse( instring, loc )
       
  3814 
       
  3815             except ParseException, err:
       
  3816 
       
  3817                 if err.loc > maxExcLoc:
       
  3818 
       
  3819                     maxException = err
       
  3820 
       
  3821                     maxExcLoc = err.loc
       
  3822 
       
  3823             except IndexError, err:
       
  3824 
       
  3825                 if len(instring) > maxExcLoc:
       
  3826 
       
  3827                     maxException = ParseException(instring,len(instring),e.errmsg,self)
       
  3828 
       
  3829                     maxExcLoc = len(instring)
       
  3830 
       
  3831             else:
       
  3832 
       
  3833                 if loc2 > maxMatchLoc:
       
  3834 
       
  3835                     maxMatchLoc = loc2
       
  3836 
       
  3837                     maxMatchExp = e
       
  3838 
       
  3839         
       
  3840 
       
  3841         if maxMatchLoc < 0:
       
  3842 
       
  3843             if self.exprs:
       
  3844 
       
  3845                 raise maxException
       
  3846 
       
  3847             else:
       
  3848 
       
  3849                 raise ParseException(instring, loc, "no defined alternatives to match", self)
       
  3850 
       
  3851 
       
  3852 
       
  3853         return maxMatchExp._parse( instring, loc, doActions )
       
  3854 
       
  3855 
       
  3856 
       
  3857     def __ixor__(self, other ):
       
  3858 
       
  3859         if isinstance( other, basestring ):
       
  3860 
       
  3861             other = Literal( other )
       
  3862 
       
  3863         return self.append( other ) #Or( [ self, other ] )
       
  3864 
       
  3865 
       
  3866 
       
  3867     def __str__( self ):
       
  3868 
       
  3869         if hasattr(self,"name"):
       
  3870 
       
  3871             return self.name
       
  3872 
       
  3873             
       
  3874 
       
  3875         if self.strRepr is None:
       
  3876 
       
  3877             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
       
  3878 
       
  3879         
       
  3880 
       
  3881         return self.strRepr
       
  3882 
       
  3883     
       
  3884 
       
  3885     def checkRecursion( self, parseElementList ):
       
  3886 
       
  3887         subRecCheckList = parseElementList[:] + [ self ]
       
  3888 
       
  3889         for e in self.exprs:
       
  3890 
       
  3891             e.checkRecursion( subRecCheckList )
       
  3892 
       
  3893 
       
  3894 
       
  3895 
       
  3896 
       
  3897 class MatchFirst(ParseExpression):
       
  3898 
       
  3899     """Requires that at least one ParseExpression is found.
       
  3900 
       
  3901        If two expressions match, the first one listed is the one that will match.
       
  3902 
       
  3903        May be constructed using the '|' operator.
       
  3904 
       
  3905     """
       
  3906 
       
  3907     def __init__( self, exprs, savelist = False ):
       
  3908 
       
  3909         super(MatchFirst,self).__init__(exprs, savelist)
       
  3910 
       
  3911         if exprs:
       
  3912 
       
  3913             self.mayReturnEmpty = False
       
  3914 
       
  3915             for e in self.exprs:
       
  3916 
       
  3917                 if e.mayReturnEmpty:
       
  3918 
       
  3919                     self.mayReturnEmpty = True
       
  3920 
       
  3921                     break
       
  3922 
       
  3923         else:
       
  3924 
       
  3925             self.mayReturnEmpty = True
       
  3926 
       
  3927     
       
  3928 
       
  3929     def parseImpl( self, instring, loc, doActions=True ):
       
  3930 
       
  3931         maxExcLoc = -1
       
  3932 
       
  3933         for e in self.exprs:
       
  3934 
       
  3935             try:
       
  3936 
       
  3937                 ret = e._parse( instring, loc, doActions )
       
  3938 
       
  3939                 return ret
       
  3940 
       
  3941             except ParseException, err:
       
  3942 
       
  3943                 if err.loc > maxExcLoc:
       
  3944 
       
  3945                     maxException = err
       
  3946 
       
  3947                     maxExcLoc = err.loc
       
  3948 
       
  3949             except IndexError, err:
       
  3950 
       
  3951                 if len(instring) > maxExcLoc:
       
  3952 
       
  3953                     maxException = ParseException(instring,len(instring),e.errmsg,self)
       
  3954 
       
  3955                     maxExcLoc = len(instring)
       
  3956 
       
  3957 
       
  3958 
       
  3959         # only got here if no expression matched, raise exception for match that made it the furthest
       
  3960 
       
  3961         else:
       
  3962 
       
  3963             if self.exprs:
       
  3964 
       
  3965                 raise maxException
       
  3966 
       
  3967             else:
       
  3968 
       
  3969                 raise ParseException(instring, loc, "no defined alternatives to match", self)
       
  3970 
       
  3971 
       
  3972 
       
  3973     def __ior__(self, other ):
       
  3974 
       
  3975         if isinstance( other, basestring ):
       
  3976 
       
  3977             other = Literal( other )
       
  3978 
       
  3979         return self.append( other ) #MatchFirst( [ self, other ] )
       
  3980 
       
  3981 
       
  3982 
       
  3983     def __str__( self ):
       
  3984 
       
  3985         if hasattr(self,"name"):
       
  3986 
       
  3987             return self.name
       
  3988 
       
  3989             
       
  3990 
       
  3991         if self.strRepr is None:
       
  3992 
       
  3993             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
       
  3994 
       
  3995         
       
  3996 
       
  3997         return self.strRepr
       
  3998 
       
  3999     
       
  4000 
       
  4001     def checkRecursion( self, parseElementList ):
       
  4002 
       
  4003         subRecCheckList = parseElementList[:] + [ self ]
       
  4004 
       
  4005         for e in self.exprs:
       
  4006 
       
  4007             e.checkRecursion( subRecCheckList )
       
  4008 
       
  4009 
       
  4010 
       
  4011 class Each(ParseExpression):
       
  4012 
       
  4013     """Requires all given ParseExpressions to be found, but in any order.
       
  4014 
       
  4015        Expressions may be separated by whitespace.
       
  4016 
       
  4017        May be constructed using the '&' operator.
       
  4018 
       
  4019     """
       
  4020 
       
  4021     def __init__( self, exprs, savelist = True ):
       
  4022 
       
  4023         super(Each,self).__init__(exprs, savelist)
       
  4024 
       
  4025         self.mayReturnEmpty = True
       
  4026 
       
  4027         for e in self.exprs:
       
  4028 
       
  4029             if not e.mayReturnEmpty:
       
  4030 
       
  4031                 self.mayReturnEmpty = False
       
  4032 
       
  4033                 break
       
  4034 
       
  4035         self.skipWhitespace = True
       
  4036 
       
  4037         self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
       
  4038 
       
  4039         self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
       
  4040 
       
  4041         self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
       
  4042 
       
  4043         self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
       
  4044 
       
  4045         self.required += self.multirequired
       
  4046 
       
  4047 
       
  4048 
       
  4049     def parseImpl( self, instring, loc, doActions=True ):
       
  4050 
       
  4051         tmpLoc = loc
       
  4052 
       
  4053         tmpReqd = self.required[:]
       
  4054 
       
  4055         tmpOpt  = self.optionals[:]
       
  4056 
       
  4057         matchOrder = []
       
  4058 
       
  4059 
       
  4060 
       
  4061         keepMatching = True
       
  4062 
       
  4063         while keepMatching:
       
  4064 
       
  4065             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
       
  4066 
       
  4067             failed = []
       
  4068 
       
  4069             for e in tmpExprs:
       
  4070 
       
  4071                 try:
       
  4072 
       
  4073                     tmpLoc = e.tryParse( instring, tmpLoc )
       
  4074 
       
  4075                 except ParseException:
       
  4076 
       
  4077                     failed.append(e)
       
  4078 
       
  4079                 else:
       
  4080 
       
  4081                     matchOrder.append(e)
       
  4082 
       
  4083                     if e in tmpReqd:
       
  4084 
       
  4085                         tmpReqd.remove(e)
       
  4086 
       
  4087                     elif e in tmpOpt:
       
  4088 
       
  4089                         tmpOpt.remove(e)
       
  4090 
       
  4091             if len(failed) == len(tmpExprs):
       
  4092 
       
  4093                 keepMatching = False
       
  4094 
       
  4095         
       
  4096 
       
  4097         if tmpReqd:
       
  4098 
       
  4099             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
       
  4100 
       
  4101             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
       
  4102 
       
  4103 
       
  4104 
       
  4105         resultlist = []
       
  4106 
       
  4107         for e in matchOrder:
       
  4108 
       
  4109             loc,results = e._parse(instring,loc,doActions)
       
  4110 
       
  4111             resultlist.append(results)
       
  4112 
       
  4113             
       
  4114 
       
  4115         finalResults = ParseResults([])
       
  4116 
       
  4117         for r in resultlist:
       
  4118 
       
  4119             dups = {}
       
  4120 
       
  4121             for k in r.keys():
       
  4122 
       
  4123                 if k in finalResults.keys():
       
  4124 
       
  4125                     tmp = ParseResults(finalResults[k])
       
  4126 
       
  4127                     tmp += ParseResults(r[k])
       
  4128 
       
  4129                     dups[k] = tmp
       
  4130 
       
  4131             finalResults += ParseResults(r)
       
  4132 
       
  4133             for k,v in dups.items():
       
  4134 
       
  4135                 finalResults[k] = v
       
  4136 
       
  4137         return loc, finalResults
       
  4138 
       
  4139 
       
  4140 
       
  4141     def __str__( self ):
       
  4142 
       
  4143         if hasattr(self,"name"):
       
  4144 
       
  4145             return self.name
       
  4146 
       
  4147             
       
  4148 
       
  4149         if self.strRepr is None:
       
  4150 
       
  4151             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
       
  4152 
       
  4153         
       
  4154 
       
  4155         return self.strRepr
       
  4156 
       
  4157     
       
  4158 
       
  4159     def checkRecursion( self, parseElementList ):
       
  4160 
       
  4161         subRecCheckList = parseElementList[:] + [ self ]
       
  4162 
       
  4163         for e in self.exprs:
       
  4164 
       
  4165             e.checkRecursion( subRecCheckList )
       
  4166 
       
  4167 
       
  4168 
       
  4169 
       
  4170 
       
  4171 class ParseElementEnhance(ParserElement):
       
  4172 
       
  4173     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
       
  4174 
       
  4175     def __init__( self, expr, savelist=False ):
       
  4176 
       
  4177         super(ParseElementEnhance,self).__init__(savelist)
       
  4178 
       
  4179         if isinstance( expr, basestring ):
       
  4180 
       
  4181             expr = Literal(expr)
       
  4182 
       
  4183         self.expr = expr
       
  4184 
       
  4185         self.strRepr = None
       
  4186 
       
  4187         if expr is not None:
       
  4188 
       
  4189             self.mayIndexError = expr.mayIndexError
       
  4190 
       
  4191             self.setWhitespaceChars( expr.whiteChars )
       
  4192 
       
  4193             self.skipWhitespace = expr.skipWhitespace
       
  4194 
       
  4195             self.saveAsList = expr.saveAsList
       
  4196 
       
  4197     
       
  4198 
       
  4199     def parseImpl( self, instring, loc, doActions=True ):
       
  4200 
       
  4201         if self.expr is not None:
       
  4202 
       
  4203             return self.expr._parse( instring, loc, doActions, callPreParse=False )
       
  4204 
       
  4205         else:
       
  4206 
       
  4207             raise ParseException("",loc,self.errmsg,self)
       
  4208 
       
  4209             
       
  4210 
       
  4211     def leaveWhitespace( self ):
       
  4212 
       
  4213         self.skipWhitespace = False
       
  4214 
       
  4215         self.expr = self.expr.copy()
       
  4216 
       
  4217         if self.expr is not None:
       
  4218 
       
  4219             self.expr.leaveWhitespace()
       
  4220 
       
  4221         return self
       
  4222 
       
  4223 
       
  4224 
       
  4225     def ignore( self, other ):
       
  4226 
       
  4227         if isinstance( other, Suppress ):
       
  4228 
       
  4229             if other not in self.ignoreExprs:
       
  4230 
       
  4231                 super( ParseElementEnhance, self).ignore( other )
       
  4232 
       
  4233                 if self.expr is not None:
       
  4234 
       
  4235                     self.expr.ignore( self.ignoreExprs[-1] )
       
  4236 
       
  4237         else:
       
  4238 
       
  4239             super( ParseElementEnhance, self).ignore( other )
       
  4240 
       
  4241             if self.expr is not None:
       
  4242 
       
  4243                 self.expr.ignore( self.ignoreExprs[-1] )
       
  4244 
       
  4245         return self
       
  4246 
       
  4247 
       
  4248 
       
  4249     def streamline( self ):
       
  4250 
       
  4251         super(ParseElementEnhance,self).streamline()
       
  4252 
       
  4253         if self.expr is not None:
       
  4254 
       
  4255             self.expr.streamline()
       
  4256 
       
  4257         return self
       
  4258 
       
  4259 
       
  4260 
       
  4261     def checkRecursion( self, parseElementList ):
       
  4262 
       
  4263         if self in parseElementList:
       
  4264 
       
  4265             raise RecursiveGrammarException( parseElementList+[self] )
       
  4266 
       
  4267         subRecCheckList = parseElementList[:] + [ self ]
       
  4268 
       
  4269         if self.expr is not None:
       
  4270 
       
  4271             self.expr.checkRecursion( subRecCheckList )
       
  4272 
       
  4273         
       
  4274 
       
  4275     def validate( self, validateTrace=[] ):
       
  4276 
       
  4277         tmp = validateTrace[:]+[self]
       
  4278 
       
  4279         if self.expr is not None:
       
  4280 
       
  4281             self.expr.validate(tmp)
       
  4282 
       
  4283         self.checkRecursion( [] )
       
  4284 
       
  4285     
       
  4286 
       
  4287     def __str__( self ):
       
  4288 
       
  4289         try:
       
  4290 
       
  4291             return super(ParseElementEnhance,self).__str__()
       
  4292 
       
  4293         except:
       
  4294 
       
  4295             pass
       
  4296 
       
  4297             
       
  4298 
       
  4299         if self.strRepr is None and self.expr is not None:
       
  4300 
       
  4301             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
       
  4302 
       
  4303         return self.strRepr
       
  4304 
       
  4305 
       
  4306 
       
  4307 
       
  4308 
       
  4309 class FollowedBy(ParseElementEnhance):
       
  4310 
       
  4311     """Lookahead matching of the given parse expression.  FollowedBy
       
  4312 
       
  4313     does *not* advance the parsing position within the input string, it only 
       
  4314 
       
  4315     verifies that the specified parse expression matches at the current 
       
  4316 
       
  4317     position.  FollowedBy always returns a null token list."""
       
  4318 
       
  4319     def __init__( self, expr ):
       
  4320 
       
  4321         super(FollowedBy,self).__init__(expr)
       
  4322 
       
  4323         self.mayReturnEmpty = True
       
  4324 
       
  4325         
       
  4326 
       
  4327     def parseImpl( self, instring, loc, doActions=True ):
       
  4328 
       
  4329         self.expr.tryParse( instring, loc )
       
  4330 
       
  4331         return loc, []
       
  4332 
       
  4333 
       
  4334 
       
  4335 
       
  4336 
       
  4337 class NotAny(ParseElementEnhance):
       
  4338 
       
  4339     """Lookahead to disallow matching with the given parse expression.  NotAny
       
  4340 
       
  4341     does *not* advance the parsing position within the input string, it only 
       
  4342 
       
  4343     verifies that the specified parse expression does *not* match at the current 
       
  4344 
       
  4345     position.  Also, NotAny does *not* skip over leading whitespace. NotAny 
       
  4346 
       
  4347     always returns a null token list.  May be constructed using the '~' operator."""
       
  4348 
       
  4349     def __init__( self, expr ):
       
  4350 
       
  4351         super(NotAny,self).__init__(expr)
       
  4352 
       
  4353         #~ self.leaveWhitespace()
       
  4354 
       
  4355         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
       
  4356 
       
  4357         self.mayReturnEmpty = True
       
  4358 
       
  4359         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
       
  4360 
       
  4361         self.myException = ParseException("",0,self.errmsg,self)
       
  4362 
       
  4363         
       
  4364 
       
  4365     def parseImpl( self, instring, loc, doActions=True ):
       
  4366 
       
  4367         try:
       
  4368 
       
  4369             self.expr.tryParse( instring, loc )
       
  4370 
       
  4371         except (ParseException,IndexError):
       
  4372 
       
  4373             pass
       
  4374 
       
  4375         else:
       
  4376 
       
  4377             #~ raise ParseException(instring, loc, self.errmsg )
       
  4378 
       
  4379             exc = self.myException
       
  4380 
       
  4381             exc.loc = loc
       
  4382 
       
  4383             exc.pstr = instring
       
  4384 
       
  4385             raise exc
       
  4386 
       
  4387         return loc, []
       
  4388 
       
  4389 
       
  4390 
       
  4391     def __str__( self ):
       
  4392 
       
  4393         if hasattr(self,"name"):
       
  4394 
       
  4395             return self.name
       
  4396 
       
  4397             
       
  4398 
       
  4399         if self.strRepr is None:
       
  4400 
       
  4401             self.strRepr = "~{" + _ustr(self.expr) + "}"
       
  4402 
       
  4403         
       
  4404 
       
  4405         return self.strRepr
       
  4406 
       
  4407 
       
  4408 
       
  4409 
       
  4410 
       
  4411 class ZeroOrMore(ParseElementEnhance):
       
  4412 
       
  4413     """Optional repetition of zero or more of the given expression."""
       
  4414 
       
  4415     def __init__( self, expr ):
       
  4416 
       
  4417         super(ZeroOrMore,self).__init__(expr)
       
  4418 
       
  4419         self.mayReturnEmpty = True
       
  4420 
       
  4421     
       
  4422 
       
  4423     def parseImpl( self, instring, loc, doActions=True ):
       
  4424 
       
  4425         tokens = []
       
  4426 
       
  4427         try:
       
  4428 
       
  4429             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
       
  4430 
       
  4431             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
       
  4432 
       
  4433             while 1:
       
  4434 
       
  4435                 if hasIgnoreExprs:
       
  4436 
       
  4437                     preloc = self.skipIgnorables( instring, loc )
       
  4438 
       
  4439                 else:
       
  4440 
       
  4441                     preloc = loc
       
  4442 
       
  4443                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
       
  4444 
       
  4445                 if tmptokens or tmptokens.keys():
       
  4446 
       
  4447                     tokens += tmptokens
       
  4448 
       
  4449         except (ParseException,IndexError):
       
  4450 
       
  4451             pass
       
  4452 
       
  4453 
       
  4454 
       
  4455         return loc, tokens
       
  4456 
       
  4457 
       
  4458 
       
  4459     def __str__( self ):
       
  4460 
       
  4461         if hasattr(self,"name"):
       
  4462 
       
  4463             return self.name
       
  4464 
       
  4465             
       
  4466 
       
  4467         if self.strRepr is None:
       
  4468 
       
  4469             self.strRepr = "[" + _ustr(self.expr) + "]..."
       
  4470 
       
  4471         
       
  4472 
       
  4473         return self.strRepr
       
  4474 
       
  4475     
       
  4476 
       
  4477     def setResultsName( self, name, listAllMatches=False ):
       
  4478 
       
  4479         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
       
  4480 
       
  4481         ret.saveAsList = True
       
  4482 
       
  4483         return ret
       
  4484 
       
  4485     
       
  4486 
       
  4487 
       
  4488 
       
  4489 class OneOrMore(ParseElementEnhance):
       
  4490 
       
  4491     """Repetition of one or more of the given expression."""
       
  4492 
       
  4493     def parseImpl( self, instring, loc, doActions=True ):
       
  4494 
       
  4495         # must be at least one
       
  4496 
       
  4497         loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
       
  4498 
       
  4499         try:
       
  4500 
       
  4501             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
       
  4502 
       
  4503             while 1:
       
  4504 
       
  4505                 if hasIgnoreExprs:
       
  4506 
       
  4507                     preloc = self.skipIgnorables( instring, loc )
       
  4508 
       
  4509                 else:
       
  4510 
       
  4511                     preloc = loc
       
  4512 
       
  4513                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
       
  4514 
       
  4515                 if tmptokens or tmptokens.keys():
       
  4516 
       
  4517                     tokens += tmptokens
       
  4518 
       
  4519         except (ParseException,IndexError):
       
  4520 
       
  4521             pass
       
  4522 
       
  4523 
       
  4524 
       
  4525         return loc, tokens
       
  4526 
       
  4527 
       
  4528 
       
  4529     def __str__( self ):
       
  4530 
       
  4531         if hasattr(self,"name"):
       
  4532 
       
  4533             return self.name
       
  4534 
       
  4535             
       
  4536 
       
  4537         if self.strRepr is None:
       
  4538 
       
  4539             self.strRepr = "{" + _ustr(self.expr) + "}..."
       
  4540 
       
  4541         
       
  4542 
       
  4543         return self.strRepr
       
  4544 
       
  4545     
       
  4546 
       
  4547     def setResultsName( self, name, listAllMatches=False ):
       
  4548 
       
  4549         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
       
  4550 
       
  4551         ret.saveAsList = True
       
  4552 
       
  4553         return ret
       
  4554 
       
  4555 
       
  4556 
       
  4557 class _NullToken(object):
       
  4558 
       
  4559     def __bool__(self):
       
  4560 
       
  4561         return False
       
  4562 
       
  4563     def __str__(self):
       
  4564 
       
  4565         return ""
       
  4566 
       
  4567 
       
  4568 
       
  4569 _optionalNotMatched = _NullToken()
       
  4570 
       
  4571 class Optional(ParseElementEnhance):
       
  4572 
       
  4573     """Optional matching of the given expression.
       
  4574 
       
  4575        A default return string can also be specified, if the optional expression
       
  4576 
       
  4577        is not found.
       
  4578 
       
  4579     """
       
  4580 
       
  4581     def __init__( self, exprs, default=_optionalNotMatched ):
       
  4582 
       
  4583         super(Optional,self).__init__( exprs, savelist=False )
       
  4584 
       
  4585         self.defaultValue = default
       
  4586 
       
  4587         self.mayReturnEmpty = True
       
  4588 
       
  4589 
       
  4590 
       
  4591     def parseImpl( self, instring, loc, doActions=True ):
       
  4592 
       
  4593         try:
       
  4594 
       
  4595             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
       
  4596 
       
  4597         except (ParseException,IndexError):
       
  4598 
       
  4599             if self.defaultValue is not _optionalNotMatched:
       
  4600 
       
  4601                 tokens = [ self.defaultValue ]
       
  4602 
       
  4603             else:
       
  4604 
       
  4605                 tokens = []
       
  4606 
       
  4607         return loc, tokens
       
  4608 
       
  4609 
       
  4610 
       
  4611     def __str__( self ):
       
  4612 
       
  4613         if hasattr(self,"name"):
       
  4614 
       
  4615             return self.name
       
  4616 
       
  4617             
       
  4618 
       
  4619         if self.strRepr is None:
       
  4620 
       
  4621             self.strRepr = "[" + _ustr(self.expr) + "]"
       
  4622 
       
  4623         
       
  4624 
       
  4625         return self.strRepr
       
  4626 
       
  4627 
       
  4628 
       
  4629 
       
  4630 
       
  4631 class SkipTo(ParseElementEnhance):
       
  4632 
       
  4633     """Token for skipping over all undefined text until the matched expression is found.
       
  4634 
       
  4635        If include is set to true, the matched expression is also consumed.  The ignore
       
  4636 
       
  4637        argument is used to define grammars (typically quoted strings and comments) that 
       
  4638 
       
  4639        might contain false matches.
       
  4640 
       
  4641     """
       
  4642 
       
  4643     def __init__( self, other, include=False, ignore=None ):
       
  4644 
       
  4645         super( SkipTo, self ).__init__( other )
       
  4646 
       
  4647         if ignore is not None:
       
  4648 
       
  4649             self.expr = self.expr.copy()
       
  4650 
       
  4651             self.expr.ignore(ignore)
       
  4652 
       
  4653         self.mayReturnEmpty = True
       
  4654 
       
  4655         self.mayIndexError = False
       
  4656 
       
  4657         self.includeMatch = include
       
  4658 
       
  4659         self.asList = False
       
  4660 
       
  4661         self.errmsg = "No match found for "+_ustr(self.expr)
       
  4662 
       
  4663         self.myException = ParseException("",0,self.errmsg,self)
       
  4664 
       
  4665 
       
  4666 
       
  4667     def parseImpl( self, instring, loc, doActions=True ):
       
  4668 
       
  4669         startLoc = loc
       
  4670 
       
  4671         instrlen = len(instring)
       
  4672 
       
  4673         expr = self.expr
       
  4674 
       
  4675         while loc <= instrlen:
       
  4676 
       
  4677             try:
       
  4678 
       
  4679                 loc = expr.skipIgnorables( instring, loc )
       
  4680 
       
  4681                 expr._parse( instring, loc, doActions=False, callPreParse=False )
       
  4682 
       
  4683                 if self.includeMatch:
       
  4684 
       
  4685                     skipText = instring[startLoc:loc]
       
  4686 
       
  4687                     loc,mat = expr._parse(instring,loc)
       
  4688 
       
  4689                     if mat:
       
  4690 
       
  4691                         return loc, [ skipText, mat ]
       
  4692 
       
  4693                     else:
       
  4694 
       
  4695                         return loc, [ skipText ]
       
  4696 
       
  4697                 else:
       
  4698 
       
  4699                     return loc, [ instring[startLoc:loc] ]
       
  4700 
       
  4701             except (ParseException,IndexError):
       
  4702 
       
  4703                 loc += 1
       
  4704 
       
  4705         exc = self.myException
       
  4706 
       
  4707         exc.loc = loc
       
  4708 
       
  4709         exc.pstr = instring
       
  4710 
       
  4711         raise exc
       
  4712 
       
  4713 
       
  4714 
       
  4715 class Forward(ParseElementEnhance):
       
  4716 
       
  4717     """Forward declaration of an expression to be defined later -
       
  4718 
       
  4719        used for recursive grammars, such as algebraic infix notation.
       
  4720 
       
  4721        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
       
  4722 
       
  4723        
       
  4724 
       
  4725        Note: take care when assigning to Forward not to overlook precedence of operators.
       
  4726 
       
  4727        Specifically, '|' has a lower precedence than '<<', so that::
       
  4728 
       
  4729           fwdExpr << a | b | c
       
  4730 
       
  4731        will actually be evaluated as::
       
  4732 
       
  4733           (fwdExpr << a) | b | c
       
  4734 
       
  4735        thereby leaving b and c out as parseable alternatives.  It is recommended that you
       
  4736 
       
  4737        explicitly group the values inserted into the Forward::
       
  4738 
       
  4739           fwdExpr << (a | b | c)
       
  4740 
       
  4741     """
       
  4742 
       
  4743     def __init__( self, other=None ):
       
  4744 
       
  4745         super(Forward,self).__init__( other, savelist=False )
       
  4746 
       
  4747 
       
  4748 
       
  4749     def __lshift__( self, other ):
       
  4750 
       
  4751         if isinstance( other, basestring ):
       
  4752 
       
  4753             other = Literal(other)
       
  4754 
       
  4755         self.expr = other
       
  4756 
       
  4757         self.mayReturnEmpty = other.mayReturnEmpty
       
  4758 
       
  4759         self.strRepr = None
       
  4760 
       
  4761         return self
       
  4762 
       
  4763 
       
  4764 
       
  4765     def leaveWhitespace( self ):
       
  4766 
       
  4767         self.skipWhitespace = False
       
  4768 
       
  4769         return self
       
  4770 
       
  4771 
       
  4772 
       
  4773     def streamline( self ):
       
  4774 
       
  4775         if not self.streamlined:
       
  4776 
       
  4777             self.streamlined = True
       
  4778 
       
  4779             if self.expr is not None: 
       
  4780 
       
  4781                 self.expr.streamline()
       
  4782 
       
  4783         return self
       
  4784 
       
  4785 
       
  4786 
       
  4787     def validate( self, validateTrace=[] ):
       
  4788 
       
  4789         if self not in validateTrace:
       
  4790 
       
  4791             tmp = validateTrace[:]+[self]
       
  4792 
       
  4793             if self.expr is not None: 
       
  4794 
       
  4795                 self.expr.validate(tmp)
       
  4796 
       
  4797         self.checkRecursion([])        
       
  4798 
       
  4799         
       
  4800 
       
  4801     def __str__( self ):
       
  4802 
       
  4803         if hasattr(self,"name"):
       
  4804 
       
  4805             return self.name
       
  4806 
       
  4807 
       
  4808 
       
  4809         self.__class__ = _ForwardNoRecurse
       
  4810 
       
  4811         try:
       
  4812 
       
  4813             if self.expr is not None: 
       
  4814 
       
  4815                 retString = _ustr(self.expr)
       
  4816 
       
  4817             else:
       
  4818 
       
  4819                 retString = "None"
       
  4820 
       
  4821         finally:
       
  4822 
       
  4823             self.__class__ = Forward
       
  4824 
       
  4825         return "Forward: "+retString
       
  4826 
       
  4827         
       
  4828 
       
  4829     def copy(self):
       
  4830 
       
  4831         if self.expr is not None:
       
  4832 
       
  4833             return super(Forward,self).copy()
       
  4834 
       
  4835         else:
       
  4836 
       
  4837             ret = Forward()
       
  4838 
       
  4839             ret << self
       
  4840 
       
  4841             return ret
       
  4842 
       
  4843 
       
  4844 
       
  4845 class _ForwardNoRecurse(Forward):
       
  4846 
       
  4847     def __str__( self ):
       
  4848 
       
  4849         return "..."
       
  4850 
       
  4851         
       
  4852 
       
  4853 class TokenConverter(ParseElementEnhance):
       
  4854 
       
  4855     """Abstract subclass of ParseExpression, for converting parsed results."""
       
  4856 
       
  4857     def __init__( self, expr, savelist=False ):
       
  4858 
       
  4859         super(TokenConverter,self).__init__( expr )#, savelist )
       
  4860 
       
  4861         self.saveAsList = False
       
  4862 
       
  4863 
       
  4864 
       
  4865 
       
  4866 
       
  4867 class Upcase(TokenConverter):
       
  4868 
       
  4869     """Converter to upper case all matching tokens."""
       
  4870 
       
  4871     def __init__(self, *args):
       
  4872 
       
  4873         super(Upcase,self).__init__(*args)
       
  4874 
       
  4875         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
       
  4876 
       
  4877                        DeprecationWarning,stacklevel=2)
       
  4878 
       
  4879     
       
  4880 
       
  4881     def postParse( self, instring, loc, tokenlist ):
       
  4882 
       
  4883         return map( string.upper, tokenlist )
       
  4884 
       
  4885 
       
  4886 
       
  4887 
       
  4888 
       
  4889 class Combine(TokenConverter):
       
  4890 
       
  4891     """Converter to concatenate all matching tokens to a single string.
       
  4892 
       
  4893        By default, the matching patterns must also be contiguous in the input string;
       
  4894 
       
  4895        this can be disabled by specifying 'adjacent=False' in the constructor.
       
  4896 
       
  4897     """
       
  4898 
       
  4899     def __init__( self, expr, joinString="", adjacent=True ):
       
  4900 
       
  4901         super(Combine,self).__init__( expr )
       
  4902 
       
  4903         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
       
  4904 
       
  4905         if adjacent:
       
  4906 
       
  4907             self.leaveWhitespace()
       
  4908 
       
  4909         self.adjacent = adjacent
       
  4910 
       
  4911         self.skipWhitespace = True
       
  4912 
       
  4913         self.joinString = joinString
       
  4914 
       
  4915 
       
  4916 
       
  4917     def ignore( self, other ):
       
  4918 
       
  4919         if self.adjacent:
       
  4920 
       
  4921             ParserElement.ignore(self, other)
       
  4922 
       
  4923         else:
       
  4924 
       
  4925             super( Combine, self).ignore( other )
       
  4926 
       
  4927         return self
       
  4928 
       
  4929 
       
  4930 
       
  4931     def postParse( self, instring, loc, tokenlist ):
       
  4932 
       
  4933         retToks = tokenlist.copy()
       
  4934 
       
  4935         del retToks[:]
       
  4936 
       
  4937         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
       
  4938 
       
  4939 
       
  4940 
       
  4941         if self.resultsName and len(retToks.keys())>0:
       
  4942 
       
  4943             return [ retToks ]
       
  4944 
       
  4945         else:
       
  4946 
       
  4947             return retToks
       
  4948 
       
  4949 
       
  4950 
       
  4951 class Group(TokenConverter):
       
  4952 
       
  4953     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
       
  4954 
       
  4955     def __init__( self, expr ):
       
  4956 
       
  4957         super(Group,self).__init__( expr )
       
  4958 
       
  4959         self.saveAsList = True
       
  4960 
       
  4961 
       
  4962 
       
  4963     def postParse( self, instring, loc, tokenlist ):
       
  4964 
       
  4965         return [ tokenlist ]
       
  4966 
       
  4967         
       
  4968 
       
  4969 class Dict(TokenConverter):
       
  4970 
       
  4971     """Converter to return a repetitive expression as a list, but also as a dictionary.
       
  4972 
       
  4973        Each element can also be referenced using the first token in the expression as its key.
       
  4974 
       
  4975        Useful for tabular report scraping when the first column can be used as a item key.
       
  4976 
       
  4977     """
       
  4978 
       
  4979     def __init__( self, exprs ):
       
  4980 
       
  4981         super(Dict,self).__init__( exprs )
       
  4982 
       
  4983         self.saveAsList = True
       
  4984 
       
  4985 
       
  4986 
       
  4987     def postParse( self, instring, loc, tokenlist ):
       
  4988 
       
  4989         for i,tok in enumerate(tokenlist):
       
  4990 
       
  4991             ikey = _ustr(tok[0]).strip()
       
  4992 
       
  4993             if len(tok)==1:
       
  4994 
       
  4995                 tokenlist[ikey] = ("",i)
       
  4996 
       
  4997             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
       
  4998 
       
  4999                 tokenlist[ikey] = (tok[1],i)
       
  5000 
       
  5001             else:
       
  5002 
       
  5003                 dictvalue = tok.copy() #ParseResults(i)
       
  5004 
       
  5005                 del dictvalue[0]
       
  5006 
       
  5007                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
       
  5008 
       
  5009                     tokenlist[ikey] = (dictvalue,i)
       
  5010 
       
  5011                 else:
       
  5012 
       
  5013                     tokenlist[ikey] = (dictvalue[0],i)
       
  5014 
       
  5015 
       
  5016 
       
  5017         if self.resultsName:
       
  5018 
       
  5019             return [ tokenlist ]
       
  5020 
       
  5021         else:
       
  5022 
       
  5023             return tokenlist
       
  5024 
       
  5025 
       
  5026 
       
  5027 
       
  5028 
       
  5029 class Suppress(TokenConverter):
       
  5030 
       
  5031     """Converter for ignoring the results of a parsed expression."""
       
  5032 
       
  5033     def postParse( self, instring, loc, tokenlist ):
       
  5034 
       
  5035         return []
       
  5036 
       
  5037     
       
  5038 
       
  5039     def suppress( self ):
       
  5040 
       
  5041         return self
       
  5042 
       
  5043 
       
  5044 
       
  5045 
       
  5046 
       
  5047 class OnlyOnce(object):
       
  5048 
       
  5049     """Wrapper for parse actions, to ensure they are only called once."""
       
  5050 
       
  5051     def __init__(self, methodCall):
       
  5052 
       
  5053         self.callable = ParserElement.normalizeParseActionArgs(methodCall)
       
  5054 
       
  5055         self.called = False
       
  5056 
       
  5057     def __call__(self,s,l,t):
       
  5058 
       
  5059         if not self.called:
       
  5060 
       
  5061             results = self.callable(s,l,t)
       
  5062 
       
  5063             self.called = True
       
  5064 
       
  5065             return results
       
  5066 
       
  5067         raise ParseException(s,l,"")
       
  5068 
       
  5069     def reset():
       
  5070 
       
  5071         self.called = False
       
  5072 
       
  5073 
       
  5074 
       
  5075 def traceParseAction(f):
       
  5076 
       
  5077     """Decorator for debugging parse actions."""
       
  5078 
       
  5079     f = ParserElement.normalizeParseActionArgs(f)
       
  5080 
       
  5081     def z(*paArgs):
       
  5082 
       
  5083         thisFunc = f.func_name
       
  5084 
       
  5085         s,l,t = paArgs[-3:]
       
  5086 
       
  5087         if len(paArgs)>3:
       
  5088 
       
  5089             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
       
  5090 
       
  5091         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
       
  5092 
       
  5093         try:
       
  5094 
       
  5095             ret = f(*paArgs)
       
  5096 
       
  5097         except Exception, exc:
       
  5098 
       
  5099             sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
       
  5100 
       
  5101             raise
       
  5102 
       
  5103         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
       
  5104 
       
  5105         return ret
       
  5106 
       
  5107     return z
       
  5108 
       
  5109         
       
  5110 
       
  5111 #
       
  5112 
       
  5113 # global helpers
       
  5114 
       
  5115 #
       
  5116 
       
  5117 def delimitedList( expr, delim=",", combine=False ):
       
  5118 
       
  5119     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
       
  5120 
       
  5121        By default, the list elements and delimiters can have intervening whitespace, and 
       
  5122 
       
  5123        comments, but this can be overridden by passing 'combine=True' in the constructor.
       
  5124 
       
  5125        If combine is set to True, the matching tokens are returned as a single token
       
  5126 
       
  5127        string, with the delimiters included; otherwise, the matching tokens are returned
       
  5128 
       
  5129        as a list of tokens, with the delimiters suppressed.
       
  5130 
       
  5131     """
       
  5132 
       
  5133     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
       
  5134 
       
  5135     if combine:
       
  5136 
       
  5137         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
       
  5138 
       
  5139     else:
       
  5140 
       
  5141         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
       
  5142 
       
  5143 
       
  5144 
       
  5145 def countedArray( expr ):
       
  5146 
       
  5147     """Helper to define a counted list of expressions.
       
  5148 
       
  5149        This helper defines a pattern of the form::
       
  5150 
       
  5151            integer expr expr expr...
       
  5152 
       
  5153        where the leading integer tells how many expr expressions follow.
       
  5154 
       
  5155        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
       
  5156 
       
  5157     """
       
  5158 
       
  5159     arrayExpr = Forward()
       
  5160 
       
  5161     def countFieldParseAction(s,l,t):
       
  5162 
       
  5163         n = int(t[0])
       
  5164 
       
  5165         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
       
  5166 
       
  5167         return []
       
  5168 
       
  5169     return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
       
  5170 
       
  5171 
       
  5172 
       
  5173 def _flatten(L):
       
  5174 
       
  5175     if type(L) is not list: return [L]
       
  5176 
       
  5177     if L == []: return L
       
  5178 
       
  5179     return _flatten(L[0]) + _flatten(L[1:])
       
  5180 
       
  5181 
       
  5182 
       
  5183 def matchPreviousLiteral(expr):
       
  5184 
       
  5185     """Helper to define an expression that is indirectly defined from
       
  5186 
       
  5187        the tokens matched in a previous expression, that is, it looks
       
  5188 
       
  5189        for a 'repeat' of a previous expression.  For example::
       
  5190 
       
  5191            first = Word(nums)
       
  5192 
       
  5193            second = matchPreviousLiteral(first)
       
  5194 
       
  5195            matchExpr = first + ":" + second
       
  5196 
       
  5197        will match "1:1", but not "1:2".  Because this matches a 
       
  5198 
       
  5199        previous literal, will also match the leading "1:1" in "1:10".  
       
  5200 
       
  5201        If this is not desired, use matchPreviousExpr.
       
  5202 
       
  5203        Do *not* use with packrat parsing enabled.
       
  5204 
       
  5205     """
       
  5206 
       
  5207     rep = Forward()
       
  5208 
       
  5209     def copyTokenToRepeater(s,l,t):
       
  5210 
       
  5211         if t:
       
  5212 
       
  5213             if len(t) == 1:
       
  5214 
       
  5215                 rep << t[0]
       
  5216 
       
  5217             else:
       
  5218 
       
  5219                 # flatten t tokens
       
  5220 
       
  5221                 tflat = _flatten(t.asList())
       
  5222 
       
  5223                 rep << And( [ Literal(tt) for tt in tflat ] )
       
  5224 
       
  5225         else:
       
  5226 
       
  5227             rep << Empty()
       
  5228 
       
  5229     expr.addParseAction(copyTokenToRepeater)
       
  5230 
       
  5231     return rep
       
  5232 
       
  5233     
       
  5234 
       
  5235 def matchPreviousExpr(expr):
       
  5236 
       
  5237     """Helper to define an expression that is indirectly defined from
       
  5238 
       
  5239        the tokens matched in a previous expression, that is, it looks
       
  5240 
       
  5241        for a 'repeat' of a previous expression.  For example::
       
  5242 
       
  5243            first = Word(nums)
       
  5244 
       
  5245            second = matchPreviousExpr(first)
       
  5246 
       
  5247            matchExpr = first + ":" + second
       
  5248 
       
  5249        will match "1:1", but not "1:2".  Because this matches by
       
  5250 
       
  5251        expressions, will *not* match the leading "1:1" in "1:10";
       
  5252 
       
  5253        the expressions are evaluated first, and then compared, so
       
  5254 
       
  5255        "1" is compared with "10".
       
  5256 
       
  5257        Do *not* use with packrat parsing enabled.
       
  5258 
       
  5259     """
       
  5260 
       
  5261     rep = Forward()
       
  5262 
       
  5263     e2 = expr.copy()
       
  5264 
       
  5265     rep << e2
       
  5266 
       
  5267     def copyTokenToRepeater(s,l,t):
       
  5268 
       
  5269         matchTokens = _flatten(t.asList())
       
  5270 
       
  5271         def mustMatchTheseTokens(s,l,t):
       
  5272 
       
  5273             theseTokens = _flatten(t.asList())
       
  5274 
       
  5275             if  theseTokens != matchTokens:
       
  5276 
       
  5277                 raise ParseException("",0,"")
       
  5278 
       
  5279         rep.setParseAction( mustMatchTheseTokens )
       
  5280 
       
  5281     expr.addParseAction(copyTokenToRepeater)
       
  5282 
       
  5283     return rep
       
  5284 
       
  5285     
       
  5286 
       
  5287 def _escapeRegexRangeChars(s):
       
  5288 
       
  5289     #~  escape these chars: ^-]
       
  5290 
       
  5291     for c in r"\^-]":
       
  5292 
       
  5293         s = s.replace(c,"\\"+c)
       
  5294 
       
  5295     s = s.replace("\n",r"\n")
       
  5296 
       
  5297     s = s.replace("\t",r"\t")
       
  5298 
       
  5299     return _ustr(s)
       
  5300 
       
  5301     
       
  5302 
       
  5303 def oneOf( strs, caseless=False, useRegex=True ):
       
  5304 
       
  5305     """Helper to quickly define a set of alternative Literals, and makes sure to do 
       
  5306 
       
  5307        longest-first testing when there is a conflict, regardless of the input order, 
       
  5308 
       
  5309        but returns a MatchFirst for best performance.  
       
  5310 
       
  5311        
       
  5312 
       
  5313        Parameters:
       
  5314 
       
  5315         - strs - a string of space-delimited literals, or a list of string literals
       
  5316 
       
  5317         - caseless - (default=False) - treat all literals as caseless
       
  5318 
       
  5319         - useRegex - (default=True) - as an optimization, will generate a Regex
       
  5320 
       
  5321           object; otherwise, will generate a MatchFirst object (if caseless=True, or
       
  5322 
       
  5323           if creating a Regex raises an exception)
       
  5324 
       
  5325     """
       
  5326 
       
  5327     if caseless:
       
  5328 
       
  5329         isequal = ( lambda a,b: a.upper() == b.upper() )
       
  5330 
       
  5331         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
       
  5332 
       
  5333         parseElementClass = CaselessLiteral
       
  5334 
       
  5335     else:
       
  5336 
       
  5337         isequal = ( lambda a,b: a == b )
       
  5338 
       
  5339         masks = ( lambda a,b: b.startswith(a) )
       
  5340 
       
  5341         parseElementClass = Literal
       
  5342 
       
  5343     
       
  5344 
       
  5345     if isinstance(strs,(list,tuple)):
       
  5346 
       
  5347         symbols = strs[:]
       
  5348 
       
  5349     elif isinstance(strs,basestring):
       
  5350 
       
  5351         symbols = strs.split()
       
  5352 
       
  5353     else:
       
  5354 
       
  5355         warnings.warn("Invalid argument to oneOf, expected string or list",
       
  5356 
       
  5357                 SyntaxWarning, stacklevel=2)
       
  5358 
       
  5359         
       
  5360 
       
  5361     i = 0
       
  5362 
       
  5363     while i < len(symbols)-1:
       
  5364 
       
  5365         cur = symbols[i]
       
  5366 
       
  5367         for j,other in enumerate(symbols[i+1:]):
       
  5368 
       
  5369             if ( isequal(other, cur) ):
       
  5370 
       
  5371                 del symbols[i+j+1]
       
  5372 
       
  5373                 break
       
  5374 
       
  5375             elif ( masks(cur, other) ):
       
  5376 
       
  5377                 del symbols[i+j+1]
       
  5378 
       
  5379                 symbols.insert(i,other)
       
  5380 
       
  5381                 cur = other
       
  5382 
       
  5383                 break
       
  5384 
       
  5385         else:
       
  5386 
       
  5387             i += 1
       
  5388 
       
  5389 
       
  5390 
       
  5391     if not caseless and useRegex:
       
  5392 
       
  5393         #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
       
  5394 
       
  5395         try:
       
  5396 
       
  5397             if len(symbols)==len("".join(symbols)):
       
  5398 
       
  5399                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
       
  5400 
       
  5401             else:
       
  5402 
       
  5403                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
       
  5404 
       
  5405         except:
       
  5406 
       
  5407             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
       
  5408 
       
  5409                     SyntaxWarning, stacklevel=2)
       
  5410 
       
  5411 
       
  5412 
       
  5413 
       
  5414 
       
  5415     # last resort, just use MatchFirst
       
  5416 
       
  5417     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
       
  5418 
       
  5419 
       
  5420 
       
  5421 def dictOf( key, value ):
       
  5422 
       
  5423     """Helper to easily and clearly define a dictionary by specifying the respective patterns
       
  5424 
       
  5425        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
       
  5426 
       
  5427        in the proper order.  The key pattern can include delimiting markers or punctuation,
       
  5428 
       
  5429        as long as they are suppressed, thereby leaving the significant key text.  The value
       
  5430 
       
  5431        pattern can include named results, so that the Dict results can include named token 
       
  5432 
       
  5433        fields.
       
  5434 
       
  5435     """
       
  5436 
       
  5437     return Dict( ZeroOrMore( Group ( key + value ) ) )
       
  5438 
       
  5439 
       
  5440 
       
  5441 _bslash = "\\"
       
  5442 
       
  5443 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
       
  5444 
       
  5445 
       
  5446 
       
  5447 # convenience constants for positional expressions
       
  5448 
       
  5449 empty       = Empty().setName("empty")
       
  5450 
       
  5451 lineStart   = LineStart().setName("lineStart")
       
  5452 
       
  5453 lineEnd     = LineEnd().setName("lineEnd")
       
  5454 
       
  5455 stringStart = StringStart().setName("stringStart")
       
  5456 
       
  5457 stringEnd   = StringEnd().setName("stringEnd")
       
  5458 
       
  5459 
       
  5460 
       
  5461 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
       
  5462 
       
  5463 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
       
  5464 
       
  5465 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
       
  5466 
       
  5467 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
       
  5468 
       
  5469 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
       
  5470 
       
  5471 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
       
  5472 
       
  5473 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
       
  5474 
       
  5475 
       
  5476 
       
  5477 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
       
  5478 
       
  5479         
       
  5480 
       
  5481 def srange(s):
       
  5482 
       
  5483     r"""Helper to easily define string ranges for use in Word construction.  Borrows
       
  5484 
       
  5485        syntax from regexp '[]' string range definitions::
       
  5486 
       
  5487           srange("[0-9]")   -> "0123456789"
       
  5488 
       
  5489           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
       
  5490 
       
  5491           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
       
  5492 
       
  5493        The input string must be enclosed in []'s, and the returned string is the expanded 
       
  5494 
       
  5495        character set joined into a single string.
       
  5496 
       
  5497        The values enclosed in the []'s may be::
       
  5498 
       
  5499           a single character
       
  5500 
       
  5501           an escaped character with a leading backslash (such as \- or \])
       
  5502 
       
  5503           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
       
  5504 
       
  5505           an escaped octal character with a leading '\0' (\041, which is a '!' character)
       
  5506 
       
  5507           a range of any of the above, separated by a dash ('a-z', etc.)
       
  5508 
       
  5509           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
       
  5510 
       
  5511     """
       
  5512 
       
  5513     try:
       
  5514 
       
  5515         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
       
  5516 
       
  5517     except:
       
  5518 
       
  5519         return ""
       
  5520 
       
  5521 
       
  5522 
       
  5523 def replaceWith(replStr):
       
  5524 
       
  5525     """Helper method for common parse actions that simply return a literal value.  Especially 
       
  5526 
       
  5527        useful when used with transformString().
       
  5528 
       
  5529     """
       
  5530 
       
  5531     def _replFunc(*args):
       
  5532 
       
  5533         return [replStr]
       
  5534 
       
  5535     return _replFunc
       
  5536 
       
  5537 
       
  5538 
       
  5539 def removeQuotes(s,l,t):
       
  5540 
       
  5541     """Helper parse action for removing quotation marks from parsed quoted strings.
       
  5542 
       
  5543        To use, add this parse action to quoted string using::
       
  5544 
       
  5545          quotedString.setParseAction( removeQuotes )
       
  5546 
       
  5547     """
       
  5548 
       
  5549     return t[0][1:-1]
       
  5550 
       
  5551 
       
  5552 
       
  5553 def upcaseTokens(s,l,t):
       
  5554 
       
  5555     """Helper parse action to convert tokens to upper case."""
       
  5556 
       
  5557     return [ str(tt).upper() for tt in t ]
       
  5558 
       
  5559 
       
  5560 
       
  5561 def downcaseTokens(s,l,t):
       
  5562 
       
  5563     """Helper parse action to convert tokens to lower case."""
       
  5564 
       
  5565     return [ str(tt).lower() for tt in t ]
       
  5566 
       
  5567 
       
  5568 
       
  5569 def keepOriginalText(s,startLoc,t):
       
  5570 
       
  5571     import inspect
       
  5572 
       
  5573     """Helper parse action to preserve original parsed text,
       
  5574 
       
  5575        overriding any nested parse actions."""
       
  5576 
       
  5577     f = inspect.stack()[1][0]
       
  5578 
       
  5579     try:
       
  5580 
       
  5581         endloc = f.f_locals["loc"]
       
  5582 
       
  5583     finally:
       
  5584 
       
  5585         del f
       
  5586 
       
  5587     return s[startLoc:endloc]
       
  5588 
       
  5589         
       
  5590 
       
  5591 def _makeTags(tagStr, xml):
       
  5592 
       
  5593     """Internal helper to construct opening and closing tag expressions, given a tag name"""
       
  5594 
       
  5595     if isinstance(tagStr,basestring):
       
  5596 
       
  5597         resname = tagStr
       
  5598 
       
  5599         tagStr = Keyword(tagStr, caseless=not xml)
       
  5600 
       
  5601     else:
       
  5602 
       
  5603         resname = tagStr.name
       
  5604 
       
  5605         
       
  5606 
       
  5607     tagAttrName = Word(alphas,alphanums+"_-")
       
  5608 
       
  5609     if (xml):
       
  5610 
       
  5611         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
       
  5612 
       
  5613         openTag = Suppress("<") + tagStr + \
       
  5614                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
       
  5615                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
       
  5616 
       
  5617     else:
       
  5618 
       
  5619         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
       
  5620 
       
  5621         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
       
  5622 
       
  5623         openTag = Suppress("<") + tagStr + \
       
  5624                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
       
  5625 
       
  5626                 Suppress("=") + tagAttrValue ))) + \
       
  5627                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
       
  5628 
       
  5629     closeTag = Combine("</" + tagStr + ">")
       
  5630 
       
  5631     
       
  5632 
       
  5633     openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
       
  5634 
       
  5635     closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
       
  5636 
       
  5637     
       
  5638 
       
  5639     return openTag, closeTag
       
  5640 
       
  5641 
       
  5642 
       
  5643 def makeHTMLTags(tagStr):
       
  5644 
       
  5645     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
       
  5646 
       
  5647     return _makeTags( tagStr, False )
       
  5648 
       
  5649 
       
  5650 
       
  5651 def makeXMLTags(tagStr):
       
  5652 
       
  5653     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
       
  5654 
       
  5655     return _makeTags( tagStr, True )
       
  5656 
       
  5657 
       
  5658 
       
  5659 opAssoc = _Constants()
       
  5660 
       
  5661 opAssoc.LEFT = object()
       
  5662 
       
  5663 opAssoc.RIGHT = object()
       
  5664 
       
  5665 
       
  5666 
       
  5667 def operatorPrecedence( baseExpr, opList ):
       
  5668 
       
  5669     """Helper method for constructing grammars of expressions made up of 
       
  5670 
       
  5671        operators working in a precedence hierarchy.  Operators may be unary or
       
  5672 
       
  5673        binary, left- or right-associative.  Parse actions can also be attached
       
  5674 
       
  5675        to operator expressions.
       
  5676 
       
  5677         
       
  5678 
       
  5679        Parameters:
       
  5680 
       
  5681         - baseExpr - expression representing the most basic element for the nested 
       
  5682 
       
  5683         - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form
       
  5684 
       
  5685           (opExpr, numTerms, rightLeftAssoc, parseAction), where:
       
  5686 
       
  5687            - opExpr is the pyparsing expression for the operator;
       
  5688 
       
  5689               may also be a string, which will be converted to a Literal
       
  5690 
       
  5691            - numTerms is the number of terms for this operator (must
       
  5692 
       
  5693               be 1 or 2)
       
  5694 
       
  5695            - rightLeftAssoc is the indicator whether the operator is
       
  5696 
       
  5697               right or left associative, using the pyparsing-defined
       
  5698 
       
  5699               constants opAssoc.RIGHT and opAssoc.LEFT.
       
  5700 
       
  5701            - parseAction is the parse action to be associated with 
       
  5702 
       
  5703               expressions matching this operator expression (the
       
  5704 
       
  5705               parse action tuple member may be omitted)
       
  5706 
       
  5707     """
       
  5708 
       
  5709     ret = Forward()
       
  5710 
       
  5711     lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
       
  5712 
       
  5713     for i,operDef in enumerate(opList):
       
  5714 
       
  5715         opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
       
  5716 
       
  5717         thisExpr = Forward().setName("expr%d" % i)
       
  5718 
       
  5719         if rightLeftAssoc == opAssoc.LEFT:
       
  5720 
       
  5721             if arity == 1:
       
  5722 
       
  5723                 matchExpr = Group( lastExpr + opExpr )
       
  5724 
       
  5725             elif arity == 2:
       
  5726 
       
  5727                 matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
       
  5728 
       
  5729             else:
       
  5730 
       
  5731                 raise ValueError, "operator must be unary (1) or binary (2)"
       
  5732 
       
  5733         elif rightLeftAssoc == opAssoc.RIGHT:
       
  5734 
       
  5735             if arity == 1:
       
  5736 
       
  5737                 # try to avoid LR with this extra test
       
  5738 
       
  5739                 if not isinstance(opExpr, Optional):
       
  5740 
       
  5741                     opExpr = Optional(opExpr)
       
  5742 
       
  5743                 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
       
  5744 
       
  5745             elif arity == 2:
       
  5746 
       
  5747                 matchExpr = Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
       
  5748 
       
  5749             else:
       
  5750 
       
  5751                 raise ValueError, "operator must be unary (1) or binary (2)"
       
  5752 
       
  5753         else:
       
  5754 
       
  5755             raise ValueError, "operator must indicate right or left associativity"
       
  5756 
       
  5757         if pa:
       
  5758 
       
  5759             matchExpr.setParseAction( pa )
       
  5760 
       
  5761         thisExpr << ( matchExpr | lastExpr )
       
  5762 
       
  5763         lastExpr = thisExpr
       
  5764 
       
  5765     ret << lastExpr
       
  5766 
       
  5767     return ret
       
  5768 
       
  5769 
       
  5770 
       
  5771 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
       
  5772 
       
  5773 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
       
  5774 
       
  5775 
       
  5776 
       
  5777 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
       
  5778 
       
  5779 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
       
  5780 
       
  5781 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
       
  5782 
       
  5783 
       
  5784 
       
  5785 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_"))
       
  5786 
       
  5787 commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
       
  5788 
       
  5789 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
       
  5790 
       
  5791 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
       
  5792 
       
  5793     
       
  5794 
       
  5795 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
       
  5796 
       
  5797 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
       
  5798 
       
  5799 
       
  5800 
       
  5801 htmlComment = Regex(r"<!--[\s\S]*?-->")
       
  5802 
       
  5803 restOfLine = Regex(r".*").leaveWhitespace()
       
  5804 
       
  5805 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
       
  5806 
       
  5807 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
       
  5808 
       
  5809 
       
  5810 
       
  5811 javaStyleComment = cppStyleComment
       
  5812 
       
  5813 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
       
  5814 
       
  5815 _noncomma = "".join( [ c for c in printables if c != "," ] )
       
  5816 
       
  5817 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
       
  5818 
       
  5819                                   Optional( Word(" \t") + 
       
  5820 
       
  5821                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
       
  5822 
       
  5823 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
       
  5824 
       
  5825 
       
  5826 
       
  5827 
       
  5828 
       
  5829 if __name__ == "__main__":
       
  5830 
       
  5831 
       
  5832 
       
  5833     def test( teststring ):
       
  5834 
       
  5835         print teststring,"->",
       
  5836 
       
  5837         try:
       
  5838 
       
  5839             tokens = simpleSQL.parseString( teststring )
       
  5840 
       
  5841             tokenlist = tokens.asList()
       
  5842 
       
  5843             print tokenlist
       
  5844 
       
  5845             print "tokens = ",        tokens
       
  5846 
       
  5847             print "tokens.columns =", tokens.columns
       
  5848 
       
  5849             print "tokens.tables =",  tokens.tables
       
  5850 
       
  5851             print tokens.asXML("SQL",True)
       
  5852 
       
  5853         except ParseException, err:
       
  5854 
       
  5855             print err.line
       
  5856 
       
  5857             print " "*(err.column-1) + "^"
       
  5858 
       
  5859             print err
       
  5860 
       
  5861         print
       
  5862 
       
  5863 
       
  5864 
       
  5865     selectToken    = CaselessLiteral( "select" )
       
  5866 
       
  5867     fromToken      = CaselessLiteral( "from" )
       
  5868 
       
  5869 
       
  5870 
       
  5871     ident          = Word( alphas, alphanums + "_$" )
       
  5872 
       
  5873     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
       
  5874 
       
  5875     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
       
  5876 
       
  5877     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
       
  5878 
       
  5879     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
       
  5880 
       
  5881     simpleSQL      = ( selectToken + \
       
  5882 
       
  5883                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
       
  5884 
       
  5885                      fromToken + \
       
  5886 
       
  5887                      tableNameList.setResultsName( "tables" ) )
       
  5888 
       
  5889     
       
  5890 
       
  5891     test( "SELECT * from XYZZY, ABC" )
       
  5892 
       
  5893     test( "select * from SYS.XYZZY" )
       
  5894 
       
  5895     test( "Select A from Sys.dual" )
       
  5896 
       
  5897     test( "Select AA,BB,CC from Sys.dual" )
       
  5898 
       
  5899     test( "Select A, B, C from Sys.dual" )
       
  5900 
       
  5901     test( "Select A, B, C from Sys.dual" )
       
  5902 
       
  5903     test( "Xelect A, B, C from Sys.dual" )
       
  5904 
       
  5905     test( "Select A, B, C frox Sys.dual" )
       
  5906 
       
  5907     test( "Select" )
       
  5908 
       
  5909     test( "Select ^^^ frox Sys.dual" )
       
  5910 
       
  5911     test( "Select A, B, C from Sys.dual, Table2   " )
       
  5912