sbsv2/raptor/python/pyparsing.py
changeset 3 e1eecf4d390d
parent 0 044383f39525
equal deleted inserted replaced
1:be27ed110b50 3:e1eecf4d390d
     1 # module pyparsing.py
     1 # module pyparsing.py
     2 
       
     3 #
     2 #
     4 
       
     5 # Copyright (c) 2003-2006  Paul T. McGuire
     3 # Copyright (c) 2003-2006  Paul T. McGuire
     6 
       
     7 #
     4 #
     8 
       
     9 # Permission is hereby granted, free of charge, to any person obtaining
     5 # Permission is hereby granted, free of charge, to any person obtaining
    10 
       
    11 # a copy of this software and associated documentation files (the
     6 # a copy of this software and associated documentation files (the
    12 
       
    13 # "Software"), to deal in the Software without restriction, including
     7 # "Software"), to deal in the Software without restriction, including
    14 
       
    15 # without limitation the rights to use, copy, modify, merge, publish,
     8 # without limitation the rights to use, copy, modify, merge, publish,
    16 
       
    17 # distribute, sublicense, and/or sell copies of the Software, and to
     9 # distribute, sublicense, and/or sell copies of the Software, and to
    18 
       
    19 # permit persons to whom the Software is furnished to do so, subject to
    10 # permit persons to whom the Software is furnished to do so, subject to
    20 
       
    21 # the following conditions:
    11 # the following conditions:
    22 
       
    23 #
    12 #
    24 
       
    25 # The above copyright notice and this permission notice shall be
    13 # The above copyright notice and this permission notice shall be
    26 
       
    27 # included in all copies or substantial portions of the Software.
    14 # included in all copies or substantial portions of the Software.
    28 
       
    29 #
    15 #
    30 
       
    31 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    32 
       
    33 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    34 
       
    35 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    36 
       
    37 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    38 
       
    39 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    40 
       
    41 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    42 
       
    43 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    44 
       
    45 #
    23 #
    46 
       
    47 #from __future__ import generators
    24 #from __future__ import generators
    48 
       
    49 
       
    50 
    25 
    51 __doc__ = \
    26 __doc__ = \
    52 """
    27 """
    53 
       
    54 pyparsing module - Classes and methods to define and execute parsing grammars
    28 pyparsing module - Classes and methods to define and execute parsing grammars
    55 
    29 
    56 
       
    57 
       
    58 The pyparsing module is an alternative approach to creating and executing simple grammars, 
    30 The pyparsing module is an alternative approach to creating and executing simple grammars, 
    59 
       
    60 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
    31 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
    61 
       
    62 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
    32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
    63 
       
    64 provides a library of classes that you use to construct the grammar directly in Python.
    33 provides a library of classes that you use to construct the grammar directly in Python.
    65 
    34 
    66 
       
    67 
       
    68 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
    35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
    69 
    36 
    70 
       
    71 
       
    72     from pyparsing import Word, alphas
    37     from pyparsing import Word, alphas
    73 
    38     
    74     
       
    75 
       
    76     # define grammar of a greeting
    39     # define grammar of a greeting
    77 
       
    78     greet = Word( alphas ) + "," + Word( alphas ) + "!" 
    40     greet = Word( alphas ) + "," + Word( alphas ) + "!" 
    79 
    41     
    80     
       
    81 
       
    82     hello = "Hello, World!"
    42     hello = "Hello, World!"
    83 
       
    84     print hello, "->", greet.parseString( hello )
    43     print hello, "->", greet.parseString( hello )
    85 
    44 
    86 
       
    87 
       
    88 The program outputs the following::
    45 The program outputs the following::
    89 
    46 
    90 
       
    91 
       
    92     Hello, World! -> ['Hello', ',', 'World', '!']
    47     Hello, World! -> ['Hello', ',', 'World', '!']
    93 
    48 
    94 
       
    95 
       
    96 The Python representation of the grammar is quite readable, owing to the self-explanatory 
    49 The Python representation of the grammar is quite readable, owing to the self-explanatory 
    97 
       
    98 class names, and the use of '+', '|' and '^' operators.
    50 class names, and the use of '+', '|' and '^' operators.
    99 
    51 
   100 
       
   101 
       
   102 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
    52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
   103 
       
   104 object with named attributes.
    53 object with named attributes.
   105 
    54 
   106 
       
   107 
       
   108 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
    55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
   109 
       
   110  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
    56  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
   111 
       
   112  - quoted strings
    57  - quoted strings
   113 
       
   114  - embedded comments
    58  - embedded comments
   115 
       
   116 """
    59 """
   117 
       
   118 __version__ = "1.4.5"
    60 __version__ = "1.4.5"
   119 
       
   120 __versionTime__ = "16 December 2006 07:20"
    61 __versionTime__ = "16 December 2006 07:20"
   121 
       
   122 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
    62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
   123 
    63 
   124 
       
   125 
       
   126 import string
    64 import string
   127 
       
   128 import copy,sys
    65 import copy,sys
   129 
       
   130 import warnings
    66 import warnings
   131 
       
   132 import re
    67 import re
   133 
       
   134 import sre_constants
    68 import sre_constants
   135 
       
   136 import xml.sax.saxutils
    69 import xml.sax.saxutils
   137 
       
   138 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
    70 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
   139 
    71 
   140 
       
   141 
       
   142 def _ustr(obj):
    72 def _ustr(obj):
   143 
       
   144     """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
    73     """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
   145 
       
   146        str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
    74        str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
   147 
       
   148        then < returns the unicode object | encodes it with the default encoding | ... >.
    75        then < returns the unicode object | encodes it with the default encoding | ... >.
   149 
       
   150     """
    76     """
   151 
       
   152     try:
    77     try:
   153 
       
   154         # If this works, then _ustr(obj) has the same behaviour as str(obj), so
    78         # If this works, then _ustr(obj) has the same behaviour as str(obj), so
   155 
       
   156         # it won't break any existing code.
    79         # it won't break any existing code.
   157 
       
   158         return str(obj)
    80         return str(obj)
   159 
    81         
   160         
       
   161 
       
   162     except UnicodeEncodeError, e:
    82     except UnicodeEncodeError, e:
   163 
       
   164         # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
    83         # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
   165 
       
   166         # state that "The return value must be a string object". However, does a
    84         # state that "The return value must be a string object". However, does a
   167 
       
   168         # unicode object (being a subclass of basestring) count as a "string
    85         # unicode object (being a subclass of basestring) count as a "string
   169 
       
   170         # object"?
    86         # object"?
   171 
       
   172         # If so, then return a unicode object:
    87         # If so, then return a unicode object:
   173 
       
   174         return unicode(obj)
    88         return unicode(obj)
   175 
       
   176         # Else encode it... but how? There are many choices... :)
    89         # Else encode it... but how? There are many choices... :)
   177 
       
   178         # Replace unprintables with escape codes?
    90         # Replace unprintables with escape codes?
   179 
       
   180         #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
    91         #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
   181 
       
   182         # Replace unprintables with question marks?
    92         # Replace unprintables with question marks?
   183 
       
   184         #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
    93         #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
   185 
       
   186         # ...
    94         # ...
   187 
    95 
   188 
       
   189 
       
   190 def _str2dict(strg):
    96 def _str2dict(strg):
   191 
       
   192     return dict( [(c,0) for c in strg] )
    97     return dict( [(c,0) for c in strg] )
   193 
       
   194     #~ return set( [c for c in strg] )
    98     #~ return set( [c for c in strg] )
   195 
    99 
   196 
       
   197 
       
   198 class _Constants(object):
   100 class _Constants(object):
   199 
       
   200     pass
   101     pass
   201 
   102     
   202     
       
   203 
       
   204 alphas     = string.lowercase + string.uppercase
   103 alphas     = string.lowercase + string.uppercase
   205 
       
   206 nums       = string.digits
   104 nums       = string.digits
   207 
       
   208 hexnums    = nums + "ABCDEFabcdef"
   105 hexnums    = nums + "ABCDEFabcdef"
   209 
       
   210 alphanums  = alphas + nums    
   106 alphanums  = alphas + nums    
   211 
   107 
   212 
       
   213 
       
   214 class ParseBaseException(Exception):
   108 class ParseBaseException(Exception):
   215 
       
   216     """base exception class for all parsing runtime exceptions"""
   109     """base exception class for all parsing runtime exceptions"""
   217 
       
   218     __slots__ = ( "loc","msg","pstr","parserElement" )
   110     __slots__ = ( "loc","msg","pstr","parserElement" )
   219 
       
   220     # Performance tuning: we construct a *lot* of these, so keep this
   111     # Performance tuning: we construct a *lot* of these, so keep this
   221 
       
   222     # constructor as small and fast as possible        
   112     # constructor as small and fast as possible        
   223 
       
   224     def __init__( self, pstr, loc, msg, elem=None ):
   113     def __init__( self, pstr, loc, msg, elem=None ):
   225 
       
   226         self.loc = loc
   114         self.loc = loc
   227 
       
   228         self.msg = msg
   115         self.msg = msg
   229 
       
   230         self.pstr = pstr
   116         self.pstr = pstr
   231 
       
   232         self.parserElement = elem
   117         self.parserElement = elem
   233 
   118 
   234 
       
   235 
       
   236     def __getattr__( self, aname ):
   119     def __getattr__( self, aname ):
   237 
       
   238         """supported attributes by name are:
   120         """supported attributes by name are:
   239 
       
   240             - lineno - returns the line number of the exception text
   121             - lineno - returns the line number of the exception text
   241 
       
   242             - col - returns the column number of the exception text
   122             - col - returns the column number of the exception text
   243 
       
   244             - line - returns the line containing the exception text
   123             - line - returns the line containing the exception text
   245 
       
   246         """
   124         """
   247 
       
   248         if( aname == "lineno" ):
   125         if( aname == "lineno" ):
   249 
       
   250             return lineno( self.loc, self.pstr )
   126             return lineno( self.loc, self.pstr )
   251 
       
   252         elif( aname in ("col", "column") ):
   127         elif( aname in ("col", "column") ):
   253 
       
   254             return col( self.loc, self.pstr )
   128             return col( self.loc, self.pstr )
   255 
       
   256         elif( aname == "line" ):
   129         elif( aname == "line" ):
   257 
       
   258             return line( self.loc, self.pstr )
   130             return line( self.loc, self.pstr )
   259 
   131         else:
   260         else:
       
   261 
       
   262             raise AttributeError, aname
   132             raise AttributeError, aname
   263 
   133 
   264 
       
   265 
       
   266     def __str__( self ):
   134     def __str__( self ):
   267 
       
   268         return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
   135         return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
   269 
       
   270     def __repr__( self ):
   136     def __repr__( self ):
   271 
       
   272         return _ustr(self)
   137         return _ustr(self)
   273 
       
   274     def markInputline( self, markerString = ">!<" ):
   138     def markInputline( self, markerString = ">!<" ):
   275 
       
   276         """Extracts the exception line from the input string, and marks 
   139         """Extracts the exception line from the input string, and marks 
   277 
       
   278            the location of the exception with a special symbol.
   140            the location of the exception with a special symbol.
   279 
       
   280         """
   141         """
   281 
       
   282         line_str = self.line
   142         line_str = self.line
   283 
       
   284         line_column = self.column - 1
   143         line_column = self.column - 1
   285 
       
   286         if markerString:
   144         if markerString:
   287 
       
   288             line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
   145             line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
   289 
       
   290         return line_str.strip()
   146         return line_str.strip()
   291 
   147 
   292 
       
   293 
       
   294 class ParseException(ParseBaseException):
   148 class ParseException(ParseBaseException):
   295 
       
   296     """exception thrown when parse expressions don't match class"""
   149     """exception thrown when parse expressions don't match class"""
   297 
       
   298     """supported attributes by name are:
   150     """supported attributes by name are:
   299 
       
   300         - lineno - returns the line number of the exception text
   151         - lineno - returns the line number of the exception text
   301 
       
   302         - col - returns the column number of the exception text
   152         - col - returns the column number of the exception text
   303 
       
   304         - line - returns the line containing the exception text
   153         - line - returns the line containing the exception text
   305 
       
   306     """
   154     """
   307 
       
   308     pass
   155     pass
   309 
   156     
   310     
       
   311 
       
   312 class ParseFatalException(ParseBaseException):
   157 class ParseFatalException(ParseBaseException):
   313 
       
   314     """user-throwable exception thrown when inconsistent parse content
   158     """user-throwable exception thrown when inconsistent parse content
   315 
       
   316        is found; stops all parsing immediately"""
   159        is found; stops all parsing immediately"""
   317 
       
   318     pass
   160     pass
   319 
   161 
   320 
       
   321 
       
   322 class ReparseException(ParseBaseException):
   162 class ReparseException(ParseBaseException):
   323 
       
   324     def __init_( self, newstring, restartLoc ):
   163     def __init_( self, newstring, restartLoc ):
   325 
       
   326         self.newParseText = newstring
   164         self.newParseText = newstring
   327 
       
   328         self.reparseLoc = restartLoc
   165         self.reparseLoc = restartLoc
   329 
   166 
   330 
   167 
   331 
       
   332 
       
   333 
       
   334 class RecursiveGrammarException(Exception):
   168 class RecursiveGrammarException(Exception):
   335 
       
   336     """exception thrown by validate() if the grammar could be improperly recursive"""
   169     """exception thrown by validate() if the grammar could be improperly recursive"""
   337 
       
   338     def __init__( self, parseElementList ):
   170     def __init__( self, parseElementList ):
   339 
       
   340         self.parseElementTrace = parseElementList
   171         self.parseElementTrace = parseElementList
   341 
   172     
   342     
       
   343 
       
   344     def __str__( self ):
   173     def __str__( self ):
   345 
       
   346         return "RecursiveGrammarException: %s" % self.parseElementTrace
   174         return "RecursiveGrammarException: %s" % self.parseElementTrace
   347 
   175 
   348 
       
   349 
       
   350 class ParseResults(object):
   176 class ParseResults(object):
   351 
       
   352     """Structured parse results, to provide multiple means of access to the parsed data:
   177     """Structured parse results, to provide multiple means of access to the parsed data:
   353 
       
   354        - as a list (len(results))
   178        - as a list (len(results))
   355 
       
   356        - by list index (results[0], results[1], etc.)
   179        - by list index (results[0], results[1], etc.)
   357 
       
   358        - by attribute (results.<resultsName>)
   180        - by attribute (results.<resultsName>)
   359 
       
   360        """
   181        """
   361 
       
   362     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames" )
   182     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames" )
   363 
       
   364     def __new__(cls, toklist, name=None, asList=True, modal=True ):
   183     def __new__(cls, toklist, name=None, asList=True, modal=True ):
   365 
       
   366         if isinstance(toklist, cls):
   184         if isinstance(toklist, cls):
   367 
       
   368             return toklist
   185             return toklist
   369 
       
   370         retobj = object.__new__(cls)
   186         retobj = object.__new__(cls)
   371 
       
   372         retobj.__doinit = True
   187         retobj.__doinit = True
   373 
       
   374         return retobj
   188         return retobj
   375 
   189         
   376         
       
   377 
       
   378     # Performance tuning: we construct a *lot* of these, so keep this
   190     # Performance tuning: we construct a *lot* of these, so keep this
   379 
       
   380     # constructor as small and fast as possible
   191     # constructor as small and fast as possible
   381 
       
   382     def __init__( self, toklist, name=None, asList=True, modal=True ):
   192     def __init__( self, toklist, name=None, asList=True, modal=True ):
   383 
       
   384         if self.__doinit:
   193         if self.__doinit:
   385 
       
   386             self.__doinit = False
   194             self.__doinit = False
   387 
       
   388             self.__name = None
   195             self.__name = None
   389 
       
   390             self.__parent = None
   196             self.__parent = None
   391 
       
   392             self.__accumNames = {}
   197             self.__accumNames = {}
   393 
       
   394             if isinstance(toklist, list):
   198             if isinstance(toklist, list):
   395 
       
   396                 self.__toklist = toklist[:]
   199                 self.__toklist = toklist[:]
   397 
       
   398             else:
   200             else:
   399 
       
   400                 self.__toklist = [toklist]
   201                 self.__toklist = [toklist]
   401 
       
   402             self.__tokdict = dict()
   202             self.__tokdict = dict()
   403 
   203 
   404 
       
   405 
       
   406         # this line is related to debugging the asXML bug
   204         # this line is related to debugging the asXML bug
   407 
       
   408         #~ asList = False
   205         #~ asList = False
   409 
   206         
   410         
       
   411 
       
   412         if name:
   207         if name:
   413 
       
   414             if not modal:
   208             if not modal:
   415 
       
   416                 self.__accumNames[name] = 0
   209                 self.__accumNames[name] = 0
   417 
       
   418             if isinstance(name,int):
   210             if isinstance(name,int):
   419 
       
   420                 name = _ustr(name) # will always return a str, but use _ustr for consistency
   211                 name = _ustr(name) # will always return a str, but use _ustr for consistency
   421 
       
   422             self.__name = name
   212             self.__name = name
   423 
       
   424             if not toklist in (None,'',[]):
   213             if not toklist in (None,'',[]):
   425 
       
   426                 if isinstance(toklist,basestring): 
   214                 if isinstance(toklist,basestring): 
   427 
       
   428                     toklist = [ toklist ]
   215                     toklist = [ toklist ]
   429 
       
   430                 if asList:
   216                 if asList:
   431 
       
   432                     if isinstance(toklist,ParseResults):
   217                     if isinstance(toklist,ParseResults):
   433 
       
   434                         self[name] = (toklist.copy(),-1)
   218                         self[name] = (toklist.copy(),-1)
   435 
       
   436                     else:
   219                     else:
   437 
       
   438                         self[name] = (ParseResults(toklist[0]),-1)
   220                         self[name] = (ParseResults(toklist[0]),-1)
   439 
       
   440                     self[name].__name = name
   221                     self[name].__name = name
   441 
       
   442                 else:
   222                 else:
   443 
       
   444                     try:
   223                     try:
   445 
       
   446                         self[name] = toklist[0]
   224                         self[name] = toklist[0]
   447 
       
   448                     except (KeyError,TypeError):
   225                     except (KeyError,TypeError):
   449 
       
   450                         self[name] = toklist
   226                         self[name] = toklist
   451 
   227 
   452 
       
   453 
       
   454     def __getitem__( self, i ):
   228     def __getitem__( self, i ):
   455 
       
   456         if isinstance( i, (int,slice) ):
   229         if isinstance( i, (int,slice) ):
   457 
       
   458             return self.__toklist[i]
   230             return self.__toklist[i]
   459 
   231         else:
   460         else:
       
   461 
       
   462             if i not in self.__accumNames:
   232             if i not in self.__accumNames:
   463 
       
   464                 return self.__tokdict[i][-1][0]
   233                 return self.__tokdict[i][-1][0]
   465 
       
   466             else:
   234             else:
   467 
       
   468                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
   235                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
   469 
   236 
   470 
       
   471 
       
   472     def __setitem__( self, k, v ):
   237     def __setitem__( self, k, v ):
   473 
       
   474         if isinstance(v,tuple):
   238         if isinstance(v,tuple):
   475 
       
   476             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
   239             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
   477 
       
   478             sub = v[0]
   240             sub = v[0]
   479 
       
   480         elif isinstance(k,int):
   241         elif isinstance(k,int):
   481 
       
   482             self.__toklist[k] = v
   242             self.__toklist[k] = v
   483 
       
   484             sub = v
   243             sub = v
   485 
   244         else:
   486         else:
       
   487 
       
   488             self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
   245             self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
   489 
       
   490             sub = v
   246             sub = v
   491 
       
   492         if isinstance(sub,ParseResults):
   247         if isinstance(sub,ParseResults):
   493 
       
   494             sub.__parent = self
   248             sub.__parent = self
   495 
   249         
   496         
       
   497 
       
   498     def __delitem__( self, i ):
   250     def __delitem__( self, i ):
   499 
       
   500         if isinstance(i,(int,slice)):
   251         if isinstance(i,(int,slice)):
   501 
       
   502             del self.__toklist[i]
   252             del self.__toklist[i]
   503 
   253         else:
   504         else:
       
   505 
       
   506             del self._tokdict[i]
   254             del self._tokdict[i]
   507 
   255 
   508 
       
   509 
       
   510     def __contains__( self, k ):
   256     def __contains__( self, k ):
   511 
       
   512         return self.__tokdict.has_key(k)
   257         return self.__tokdict.has_key(k)
   513 
   258         
   514         
       
   515 
       
   516     def __len__( self ): return len( self.__toklist )
   259     def __len__( self ): return len( self.__toklist )
   517 
       
   518     def __nonzero__( self ): return len( self.__toklist ) > 0
   260     def __nonzero__( self ): return len( self.__toklist ) > 0
   519 
       
   520     def __iter__( self ): return iter( self.__toklist )
   261     def __iter__( self ): return iter( self.__toklist )
   521 
       
   522     def keys( self ): 
   262     def keys( self ): 
   523 
       
   524         """Returns all named result keys."""
   263         """Returns all named result keys."""
   525 
       
   526         return self.__tokdict.keys()
   264         return self.__tokdict.keys()
   527 
   265     
   528     
       
   529 
       
   530     def items( self ): 
   266     def items( self ): 
   531 
       
   532         """Returns all named result keys and values as a list of tuples."""
   267         """Returns all named result keys and values as a list of tuples."""
   533 
       
   534         return [(k,self[k]) for k in self.__tokdict.keys()]
   268         return [(k,self[k]) for k in self.__tokdict.keys()]
   535 
   269     
   536     
       
   537 
       
   538     def values( self ): 
   270     def values( self ): 
   539 
       
   540         """Returns all named result values."""
   271         """Returns all named result values."""
   541 
       
   542         return [ v[-1][0] for v in self.__tokdict.values() ]
   272         return [ v[-1][0] for v in self.__tokdict.values() ]
   543 
   273 
   544 
       
   545 
       
   546     def __getattr__( self, name ):
   274     def __getattr__( self, name ):
   547 
       
   548         if name not in self.__slots__:
   275         if name not in self.__slots__:
   549 
       
   550             if self.__tokdict.has_key( name ):
   276             if self.__tokdict.has_key( name ):
   551 
       
   552                 if name not in self.__accumNames:
   277                 if name not in self.__accumNames:
   553 
       
   554                     return self.__tokdict[name][-1][0]
   278                     return self.__tokdict[name][-1][0]
   555 
       
   556                 else:
   279                 else:
   557 
       
   558                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
   280                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
   559 
       
   560             else:
   281             else:
   561 
       
   562                 return ""
   282                 return ""
   563 
       
   564         return None
   283         return None
   565 
   284 
   566 
       
   567 
       
   568     def __add__( self, other ):
   285     def __add__( self, other ):
   569 
       
   570         ret = self.copy()
   286         ret = self.copy()
   571 
       
   572         ret += other
   287         ret += other
   573 
       
   574         return ret
   288         return ret
   575 
   289         
   576         
       
   577 
       
   578     def __iadd__( self, other ):
   290     def __iadd__( self, other ):
   579 
       
   580         if other.__tokdict:
   291         if other.__tokdict:
   581 
       
   582             offset = len(self.__toklist)
   292             offset = len(self.__toklist)
   583 
       
   584             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
   293             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
   585 
       
   586             otheritems = other.__tokdict.items()
   294             otheritems = other.__tokdict.items()
   587 
       
   588             otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in otheritems for v in vlist]
   295             otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in otheritems for v in vlist]
   589 
       
   590             for k,v in otherdictitems:
   296             for k,v in otherdictitems:
   591 
       
   592                 self[k] = v
   297                 self[k] = v
   593 
       
   594                 if isinstance(v[0],ParseResults):
   298                 if isinstance(v[0],ParseResults):
   595 
       
   596                     v[0].__parent = self
   299                     v[0].__parent = self
   597 
       
   598         self.__toklist += other.__toklist
   300         self.__toklist += other.__toklist
   599 
       
   600         self.__accumNames.update( other.__accumNames )
   301         self.__accumNames.update( other.__accumNames )
   601 
       
   602         del other
   302         del other
   603 
       
   604         return self
   303         return self
   605 
       
   606        
   304        
   607 
       
   608     def __repr__( self ):
   305     def __repr__( self ):
   609 
       
   610         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
   306         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
   611 
   307 
   612 
       
   613 
       
   614     def __str__( self ):
   308     def __str__( self ):
   615 
       
   616         out = "["
   309         out = "["
   617 
       
   618         sep = ""
   310         sep = ""
   619 
       
   620         for i in self.__toklist:
   311         for i in self.__toklist:
   621 
       
   622             if isinstance(i, ParseResults):
   312             if isinstance(i, ParseResults):
   623 
       
   624                 out += sep + _ustr(i)
   313                 out += sep + _ustr(i)
   625 
       
   626             else:
   314             else:
   627 
       
   628                 out += sep + repr(i)
   315                 out += sep + repr(i)
   629 
       
   630             sep = ", "
   316             sep = ", "
   631 
       
   632         out += "]"
   317         out += "]"
   633 
       
   634         return out
   318         return out
   635 
   319 
   636 
       
   637 
       
   638     def _asStringList( self, sep='' ):
   320     def _asStringList( self, sep='' ):
   639 
       
   640         out = []
   321         out = []
   641 
       
   642         for item in self.__toklist:
   322         for item in self.__toklist:
   643 
       
   644             if out and sep:
   323             if out and sep:
   645 
       
   646                 out.append(sep)
   324                 out.append(sep)
   647 
       
   648             if isinstance( item, ParseResults ):
   325             if isinstance( item, ParseResults ):
   649 
       
   650                 out += item._asStringList()
   326                 out += item._asStringList()
   651 
       
   652             else:
   327             else:
   653 
       
   654                 out.append( _ustr(item) )
   328                 out.append( _ustr(item) )
   655 
       
   656         return out
   329         return out
   657 
   330 
   658 
       
   659 
       
   660     def asList( self ):
   331     def asList( self ):
   661 
       
   662         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
   332         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
   663 
       
   664         out = []
   333         out = []
   665 
       
   666         for res in self.__toklist:
   334         for res in self.__toklist:
   667 
       
   668             if isinstance(res,ParseResults):
   335             if isinstance(res,ParseResults):
   669 
       
   670                 out.append( res.asList() )
   336                 out.append( res.asList() )
   671 
       
   672             else:
   337             else:
   673 
       
   674                 out.append( res )
   338                 out.append( res )
   675 
       
   676         return out
   339         return out
   677 
   340 
   678 
       
   679 
       
   680     def asDict( self ):
   341     def asDict( self ):
   681 
       
   682         """Returns the named parse results as dictionary."""
   342         """Returns the named parse results as dictionary."""
   683 
       
   684         return dict( self.items() )
   343         return dict( self.items() )
   685 
   344 
   686 
       
   687 
       
   688     def copy( self ):
   345     def copy( self ):
   689 
       
   690         """Returns a new copy of a ParseResults object."""
   346         """Returns a new copy of a ParseResults object."""
   691 
       
   692         ret = ParseResults( self.__toklist )
   347         ret = ParseResults( self.__toklist )
   693 
       
   694         ret.__tokdict = self.__tokdict.copy()
   348         ret.__tokdict = self.__tokdict.copy()
   695 
       
   696         ret.__parent = self.__parent
   349         ret.__parent = self.__parent
   697 
       
   698         ret.__accumNames.update( self.__accumNames )
   350         ret.__accumNames.update( self.__accumNames )
   699 
       
   700         ret.__name = self.__name
   351         ret.__name = self.__name
   701 
       
   702         return ret
   352         return ret
   703 
   353         
   704         
       
   705 
       
   706     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
   354     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
   707 
       
   708         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
   355         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
   709 
       
   710         nl = "\n"
   356         nl = "\n"
   711 
       
   712         out = []
   357         out = []
   713 
       
   714         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
   358         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
   715 
       
   716         nextLevelIndent = indent + "  "
   359         nextLevelIndent = indent + "  "
   717 
   360         
   718         
       
   719 
       
   720         # collapse out indents if formatting is not desired
   361         # collapse out indents if formatting is not desired
   721 
       
   722         if not formatted:
   362         if not formatted:
   723 
       
   724             indent = ""
   363             indent = ""
   725 
       
   726             nextLevelIndent = ""
   364             nextLevelIndent = ""
   727 
       
   728             nl = ""
   365             nl = ""
   729 
       
   730             
   366             
   731 
       
   732         selfTag = None
   367         selfTag = None
   733 
       
   734         if doctag is not None:
   368         if doctag is not None:
   735 
       
   736             selfTag = doctag
   369             selfTag = doctag
   737 
   370         else:
   738         else:
       
   739 
       
   740             if self.__name:
   371             if self.__name:
   741 
       
   742                 selfTag = self.__name
   372                 selfTag = self.__name
   743 
   373         
   744         
       
   745 
       
   746         if not selfTag:
   374         if not selfTag:
   747 
       
   748             if namedItemsOnly:
   375             if namedItemsOnly:
   749 
       
   750                 return ""
   376                 return ""
   751 
       
   752             else:
   377             else:
   753 
       
   754                 selfTag = "ITEM"
   378                 selfTag = "ITEM"
   755 
   379         
   756         
       
   757 
       
   758         out += [ nl, indent, "<", selfTag, ">" ]
   380         out += [ nl, indent, "<", selfTag, ">" ]
   759 
   381         
   760         
       
   761 
       
   762         worklist = self.__toklist
   382         worklist = self.__toklist
   763 
       
   764         for i,res in enumerate(worklist):
   383         for i,res in enumerate(worklist):
   765 
       
   766             if isinstance(res,ParseResults):
   384             if isinstance(res,ParseResults):
   767 
       
   768                 if i in namedItems:
   385                 if i in namedItems:
   769 
       
   770                     out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
   386                     out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
   771 
       
   772                 else:
   387                 else:
   773 
       
   774                     out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
   388                     out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
   775 
       
   776             else:
   389             else:
   777 
       
   778                 # individual token, see if there is a name for it
   390                 # individual token, see if there is a name for it
   779 
       
   780                 resTag = None
   391                 resTag = None
   781 
       
   782                 if i in namedItems:
   392                 if i in namedItems:
   783 
       
   784                     resTag = namedItems[i]
   393                     resTag = namedItems[i]
   785 
       
   786                 if not resTag:
   394                 if not resTag:
   787 
       
   788                     if namedItemsOnly:
   395                     if namedItemsOnly:
   789 
       
   790                         continue
   396                         continue
   791 
       
   792                     else:
   397                     else:
   793 
       
   794                         resTag = "ITEM"
   398                         resTag = "ITEM"
   795 
       
   796                 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
   399                 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
   797 
       
   798                 out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
   400                 out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
   799 
   401         
   800         
       
   801 
       
   802         out += [ nl, indent, "</", selfTag, ">" ]
   402         out += [ nl, indent, "</", selfTag, ">" ]
   803 
       
   804         return "".join(out)
   403         return "".join(out)
   805 
   404 
   806 
       
   807 
       
   808     def __lookup(self,sub):
   405     def __lookup(self,sub):
   809 
       
   810         for k,vlist in self.__tokdict.items():
   406         for k,vlist in self.__tokdict.items():
   811 
       
   812             for v,loc in vlist:
   407             for v,loc in vlist:
   813 
       
   814                 if sub is v:
   408                 if sub is v:
   815 
       
   816                     return k
   409                     return k
   817 
       
   818         return None
   410         return None
   819 
       
   820             
   411             
   821 
       
   822     def getName(self):
   412     def getName(self):
   823 
       
   824         """Returns the results name for this token expression."""
   413         """Returns the results name for this token expression."""
   825 
       
   826         if self.__name:
   414         if self.__name:
   827 
       
   828             return self.__name
   415             return self.__name
   829 
       
   830         elif self.__parent:
   416         elif self.__parent:
   831 
       
   832             par = self.__parent
   417             par = self.__parent
   833 
       
   834             if par:
   418             if par:
   835 
       
   836                 return par.__lookup(self)
   419                 return par.__lookup(self)
   837 
       
   838             else:
   420             else:
   839 
       
   840                 return None
   421                 return None
   841 
       
   842         elif (len(self) == 1 and 
   422         elif (len(self) == 1 and 
   843 
       
   844                len(self.__tokdict) == 1 and
   423                len(self.__tokdict) == 1 and
   845 
       
   846                self.__tokdict.values()[0][0][1] in (0,-1)):
   424                self.__tokdict.values()[0][0][1] in (0,-1)):
   847 
       
   848             return self.__tokdict.keys()[0]
   425             return self.__tokdict.keys()[0]
   849 
   426         else:
   850         else:
       
   851 
       
   852             return None
   427             return None
   853 
       
   854             
   428             
   855 
       
   856     def dump(self,indent='',depth=0):
   429     def dump(self,indent='',depth=0):
   857 
       
   858         """Diagnostic method for listing out the contents of a ParseResults.
   430         """Diagnostic method for listing out the contents of a ParseResults.
   859 
       
   860            Accepts an optional indent argument so that this string can be embedded
   431            Accepts an optional indent argument so that this string can be embedded
   861 
       
   862            in a nested display of other data."""
   432            in a nested display of other data."""
   863 
       
   864         out = []
   433         out = []
   865 
       
   866         out.append( indent+str(self.asList()) )
   434         out.append( indent+str(self.asList()) )
   867 
       
   868         keys = self.items()
   435         keys = self.items()
   869 
       
   870         keys.sort()
   436         keys.sort()
   871 
       
   872         for k,v in keys:
   437         for k,v in keys:
   873 
       
   874             if out:
   438             if out:
   875 
       
   876                 out.append('\n')
   439                 out.append('\n')
   877 
       
   878             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
   440             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
   879 
       
   880             if isinstance(v,ParseResults):
   441             if isinstance(v,ParseResults):
   881 
       
   882                 if v.keys():
   442                 if v.keys():
   883 
       
   884                     #~ out.append('\n')
   443                     #~ out.append('\n')
   885 
       
   886                     out.append( v.dump(indent,depth+1) )
   444                     out.append( v.dump(indent,depth+1) )
   887 
       
   888                     #~ out.append('\n')
   445                     #~ out.append('\n')
   889 
       
   890                 else:
   446                 else:
   891 
       
   892                     out.append(str(v))
   447                     out.append(str(v))
   893 
       
   894             else:
   448             else:
   895 
       
   896                 out.append(str(v))
   449                 out.append(str(v))
   897 
       
   898         #~ out.append('\n')
   450         #~ out.append('\n')
   899 
       
   900         return "".join(out)
   451         return "".join(out)
   901 
   452 
   902 
       
   903 
       
   904     # add support for pickle protocol
   453     # add support for pickle protocol
   905 
       
   906     def __getstate__(self):
   454     def __getstate__(self):
   907 
       
   908         return ( self.__toklist,
   455         return ( self.__toklist,
   909 
       
   910                  ( self.__tokdict.copy(),
   456                  ( self.__tokdict.copy(),
   911 
       
   912                    self.__parent,
   457                    self.__parent,
   913 
       
   914                    self.__accumNames,
   458                    self.__accumNames,
   915 
       
   916                    self.__name ) )
   459                    self.__name ) )
   917 
   460     
   918     
       
   919 
       
   920     def __setstate__(self,state):
   461     def __setstate__(self,state):
   921 
       
   922         self.__toklist = state[0]
   462         self.__toklist = state[0]
   923 
       
   924         self.__tokdict, \
   463         self.__tokdict, \
   925 
       
   926         self.__parent, \
   464         self.__parent, \
   927 
       
   928         inAccumNames, \
   465         inAccumNames, \
   929 
       
   930         self.__name = state[1]
   466         self.__name = state[1]
   931 
       
   932         self.__accumNames = {}
   467         self.__accumNames = {}
   933 
       
   934         self.__accumNames.update(inAccumNames)
   468         self.__accumNames.update(inAccumNames)
   935 
   469 
   936 
   470 
   937 
       
   938 
       
   939 
       
   940 def col (loc,strg):
   471 def col (loc,strg):
   941 
       
   942     """Returns current column within a string, counting newlines as line separators.
   472     """Returns current column within a string, counting newlines as line separators.
   943 
       
   944    The first column is number 1.
   473    The first column is number 1.
   945 
       
   946    """
   474    """
   947 
       
   948     return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
   475     return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
   949 
   476 
   950 
       
   951 
       
   952 def lineno(loc,strg):
   477 def lineno(loc,strg):
   953 
       
   954     """Returns current line number within a string, counting newlines as line separators.
   478     """Returns current line number within a string, counting newlines as line separators.
   955 
       
   956    The first line is number 1.
   479    The first line is number 1.
   957 
       
   958    """
   480    """
   959 
       
   960     return strg.count("\n",0,loc) + 1
   481     return strg.count("\n",0,loc) + 1
   961 
   482 
   962 
       
   963 
       
   964 def line( loc, strg ):
   483 def line( loc, strg ):
   965 
       
   966     """Returns the line of text containing loc within a string, counting newlines as line separators.
   484     """Returns the line of text containing loc within a string, counting newlines as line separators.
   967 
       
   968        """
   485        """
   969 
       
   970     lastCR = strg.rfind("\n", 0, loc)
   486     lastCR = strg.rfind("\n", 0, loc)
   971 
       
   972     nextCR = strg.find("\n", loc)
   487     nextCR = strg.find("\n", loc)
   973 
       
   974     if nextCR > 0:
   488     if nextCR > 0:
   975 
       
   976         return strg[lastCR+1:nextCR]
   489         return strg[lastCR+1:nextCR]
   977 
       
   978     else:
   490     else:
   979 
       
   980         return strg[lastCR+1:]
   491         return strg[lastCR+1:]
   981 
   492 
   982 
       
   983 
       
   984 def _defaultStartDebugAction( instring, loc, expr ):
   493 def _defaultStartDebugAction( instring, loc, expr ):
   985 
       
   986     print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
   494     print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
   987 
   495 
   988 
       
   989 
       
   990 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
   496 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
   991 
       
   992     print "Matched",expr,"->",toks.asList()
   497     print "Matched",expr,"->",toks.asList()
   993 
   498     
   994     
       
   995 
       
   996 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
   499 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
   997 
       
   998     print "Exception raised:", exc
   500     print "Exception raised:", exc
   999 
   501 
  1000 
       
  1001 
       
  1002 def nullDebugAction(*args):
   502 def nullDebugAction(*args):
  1003 
       
  1004     """'Do-nothing' debug action, to suppress debugging output during parsing."""
   503     """'Do-nothing' debug action, to suppress debugging output during parsing."""
  1005 
       
  1006     pass
   504     pass
  1007 
   505 
  1008 
       
  1009 
       
  1010 class ParserElement(object):
   506 class ParserElement(object):
  1011 
       
  1012     """Abstract base level parser element class."""
   507     """Abstract base level parser element class."""
  1013 
       
  1014     DEFAULT_WHITE_CHARS = " \n\t\r"
   508     DEFAULT_WHITE_CHARS = " \n\t\r"
  1015 
   509     
  1016     
       
  1017 
       
  1018     def setDefaultWhitespaceChars( chars ):
   510     def setDefaultWhitespaceChars( chars ):
  1019 
       
  1020         """Overrides the default whitespace chars
   511         """Overrides the default whitespace chars
  1021 
       
  1022         """
   512         """
  1023 
       
  1024         ParserElement.DEFAULT_WHITE_CHARS = chars
   513         ParserElement.DEFAULT_WHITE_CHARS = chars
  1025 
       
  1026     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
   514     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
  1027 
   515     
  1028     
       
  1029 
       
  1030     def __init__( self, savelist=False ):
   516     def __init__( self, savelist=False ):
  1031 
       
  1032         self.parseAction = list()
   517         self.parseAction = list()
  1033 
       
  1034         self.failAction = None
   518         self.failAction = None
  1035 
       
  1036         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
   519         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
  1037 
       
  1038         self.strRepr = None
   520         self.strRepr = None
  1039 
       
  1040         self.resultsName = None
   521         self.resultsName = None
  1041 
       
  1042         self.saveAsList = savelist
   522         self.saveAsList = savelist
  1043 
       
  1044         self.skipWhitespace = True
   523         self.skipWhitespace = True
  1045 
       
  1046         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
   524         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1047 
       
  1048         self.copyDefaultWhiteChars = True
   525         self.copyDefaultWhiteChars = True
  1049 
       
  1050         self.mayReturnEmpty = False
   526         self.mayReturnEmpty = False
  1051 
       
  1052         self.keepTabs = False
   527         self.keepTabs = False
  1053 
       
  1054         self.ignoreExprs = list()
   528         self.ignoreExprs = list()
  1055 
       
  1056         self.debug = False
   529         self.debug = False
  1057 
       
  1058         self.streamlined = False
   530         self.streamlined = False
  1059 
       
  1060         self.mayIndexError = True
   531         self.mayIndexError = True
  1061 
       
  1062         self.errmsg = ""
   532         self.errmsg = ""
  1063 
       
  1064         self.modalResults = True
   533         self.modalResults = True
  1065 
       
  1066         self.debugActions = ( None, None, None )
   534         self.debugActions = ( None, None, None )
  1067 
       
  1068         self.re = None
   535         self.re = None
  1069 
   536 
  1070 
       
  1071 
       
  1072     def copy( self ):
   537     def copy( self ):
  1073 
       
  1074         """Make a copy of this ParserElement.  Useful for defining different parse actions
   538         """Make a copy of this ParserElement.  Useful for defining different parse actions
  1075 
       
  1076            for the same parsing pattern, using copies of the original parse element."""
   539            for the same parsing pattern, using copies of the original parse element."""
  1077 
       
  1078         cpy = copy.copy( self )
   540         cpy = copy.copy( self )
  1079 
       
  1080         cpy.parseAction = self.parseAction[:]
   541         cpy.parseAction = self.parseAction[:]
  1081 
       
  1082         cpy.ignoreExprs = self.ignoreExprs[:]
   542         cpy.ignoreExprs = self.ignoreExprs[:]
  1083 
       
  1084         if self.copyDefaultWhiteChars:
   543         if self.copyDefaultWhiteChars:
  1085 
       
  1086             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
   544             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1087 
       
  1088         return cpy
   545         return cpy
  1089 
   546 
  1090 
       
  1091 
       
  1092     def setName( self, name ):
   547     def setName( self, name ):
  1093 
       
  1094         """Define name for this expression, for use in debugging."""
   548         """Define name for this expression, for use in debugging."""
  1095 
       
  1096         self.name = name
   549         self.name = name
  1097 
       
  1098         self.errmsg = "Expected " + self.name
   550         self.errmsg = "Expected " + self.name
  1099 
       
  1100         return self
   551         return self
  1101 
   552 
  1102 
       
  1103 
       
  1104     def setResultsName( self, name, listAllMatches=False ):
   553     def setResultsName( self, name, listAllMatches=False ):
  1105 
       
  1106         """Define name for referencing matching tokens as a nested attribute 
   554         """Define name for referencing matching tokens as a nested attribute 
  1107 
       
  1108            of the returned parse results.
   555            of the returned parse results.
  1109 
       
  1110            NOTE: this returns a *copy* of the original ParserElement object;
   556            NOTE: this returns a *copy* of the original ParserElement object;
  1111 
       
  1112            this is so that the client can define a basic element, such as an
   557            this is so that the client can define a basic element, such as an
  1113 
       
  1114            integer, and reference it in multiple places with different names.
   558            integer, and reference it in multiple places with different names.
  1115 
       
  1116         """
   559         """
  1117 
       
  1118         newself = self.copy()
   560         newself = self.copy()
  1119 
       
  1120         newself.resultsName = name
   561         newself.resultsName = name
  1121 
       
  1122         newself.modalResults = not listAllMatches
   562         newself.modalResults = not listAllMatches
  1123 
       
  1124         return newself
   563         return newself
  1125 
   564 
  1126 
       
  1127 
       
  1128     def normalizeParseActionArgs( f ):
   565     def normalizeParseActionArgs( f ):
  1129 
       
  1130         """Internal method used to decorate parse actions that take fewer than 3 arguments,
   566         """Internal method used to decorate parse actions that take fewer than 3 arguments,
  1131 
       
  1132            so that all parse actions can be called as f(s,l,t)."""
   567            so that all parse actions can be called as f(s,l,t)."""
  1133 
       
  1134         STAR_ARGS = 4
   568         STAR_ARGS = 4
  1135 
   569 
  1136 
       
  1137 
       
  1138         try:
   570         try:
  1139 
       
  1140             restore = None
   571             restore = None
  1141 
       
  1142             if isinstance(f,type):
   572             if isinstance(f,type):
  1143 
       
  1144                 restore = f
   573                 restore = f
  1145 
       
  1146                 f = f.__init__
   574                 f = f.__init__
  1147 
       
  1148             if f.func_code.co_flags & STAR_ARGS:
   575             if f.func_code.co_flags & STAR_ARGS:
  1149 
       
  1150                 return f
   576                 return f
  1151 
       
  1152             numargs = f.func_code.co_argcount
   577             numargs = f.func_code.co_argcount
  1153 
       
  1154             if hasattr(f,"im_self"):
   578             if hasattr(f,"im_self"):
  1155 
       
  1156                 numargs -= 1
   579                 numargs -= 1
  1157 
       
  1158             if restore:
   580             if restore:
  1159 
       
  1160                 f = restore
   581                 f = restore
  1161 
       
  1162         except AttributeError:
   582         except AttributeError:
  1163 
       
  1164             try:
   583             try:
  1165 
       
  1166                 # not a function, must be a callable object, get info from the
   584                 # not a function, must be a callable object, get info from the
  1167 
       
  1168                 # im_func binding of its bound __call__ method
   585                 # im_func binding of its bound __call__ method
  1169 
       
  1170                 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
   586                 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
  1171 
       
  1172                     return f
   587                     return f
  1173 
       
  1174                 numargs = f.__call__.im_func.func_code.co_argcount
   588                 numargs = f.__call__.im_func.func_code.co_argcount
  1175 
       
  1176                 if hasattr(f.__call__,"im_self"):
   589                 if hasattr(f.__call__,"im_self"):
  1177 
       
  1178                     numargs -= 1
   590                     numargs -= 1
  1179 
       
  1180             except AttributeError:
   591             except AttributeError:
  1181 
       
  1182                 # not a bound method, get info directly from __call__ method
   592                 # not a bound method, get info directly from __call__ method
  1183 
       
  1184                 if f.__call__.func_code.co_flags & STAR_ARGS:
   593                 if f.__call__.func_code.co_flags & STAR_ARGS:
  1185 
       
  1186                     return f
   594                     return f
  1187 
       
  1188                 numargs = f.__call__.func_code.co_argcount
   595                 numargs = f.__call__.func_code.co_argcount
  1189 
       
  1190                 if hasattr(f.__call__,"im_self"):
   596                 if hasattr(f.__call__,"im_self"):
  1191 
       
  1192                     numargs -= 1
   597                     numargs -= 1
  1193 
   598 
  1194 
       
  1195 
       
  1196         #~ print "adding function %s with %d args" % (f.func_name,numargs)
   599         #~ print "adding function %s with %d args" % (f.func_name,numargs)
  1197 
       
  1198         if numargs == 3:
   600         if numargs == 3:
  1199 
       
  1200             return f
   601             return f
  1201 
   602         else:
  1202         else:
       
  1203 
       
  1204             if numargs == 2:
   603             if numargs == 2:
  1205 
       
  1206                 def tmp(s,l,t):
   604                 def tmp(s,l,t):
  1207 
       
  1208                     return f(l,t)
   605                     return f(l,t)
  1209 
       
  1210             elif numargs == 1:
   606             elif numargs == 1:
  1211 
       
  1212                 def tmp(s,l,t):
   607                 def tmp(s,l,t):
  1213 
       
  1214                     return f(t)
   608                     return f(t)
  1215 
       
  1216             else: #~ numargs == 0:
   609             else: #~ numargs == 0:
  1217 
       
  1218                 def tmp(s,l,t):
   610                 def tmp(s,l,t):
  1219 
       
  1220                     return f()
   611                     return f()
  1221 
       
  1222             return tmp
   612             return tmp
  1223 
       
  1224     normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
   613     normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
  1225 
       
  1226             
   614             
  1227 
       
  1228     def setParseAction( self, *fns ):
   615     def setParseAction( self, *fns ):
  1229 
       
  1230         """Define action to perform when successfully matching parse element definition.
   616         """Define action to perform when successfully matching parse element definition.
  1231 
       
  1232            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
   617            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
  1233 
       
  1234            fn(loc,toks), fn(toks), or just fn(), where:
   618            fn(loc,toks), fn(toks), or just fn(), where:
  1235 
       
  1236             - s   = the original string being parsed
   619             - s   = the original string being parsed
  1237 
       
  1238             - loc = the location of the matching substring
   620             - loc = the location of the matching substring
  1239 
       
  1240             - toks = a list of the matched tokens, packaged as a ParseResults object
   621             - toks = a list of the matched tokens, packaged as a ParseResults object
  1241 
       
  1242            If the functions in fns modify the tokens, they can return them as the return
   622            If the functions in fns modify the tokens, they can return them as the return
  1243 
       
  1244            value from fn, and the modified list of tokens will replace the original.
   623            value from fn, and the modified list of tokens will replace the original.
  1245 
       
  1246            Otherwise, fn does not need to return any value."""
   624            Otherwise, fn does not need to return any value."""
  1247 
       
  1248         self.parseAction = map(self.normalizeParseActionArgs, list(fns))
   625         self.parseAction = map(self.normalizeParseActionArgs, list(fns))
  1249 
       
  1250         return self
   626         return self
  1251 
   627 
  1252 
       
  1253 
       
  1254     def addParseAction( self, *fns ):
   628     def addParseAction( self, *fns ):
  1255 
       
  1256         """Add parse action to expression's list of parse actions. See setParseAction_."""
   629         """Add parse action to expression's list of parse actions. See setParseAction_."""
  1257 
       
  1258         self.parseAction += map(self.normalizeParseActionArgs, list(fns))
   630         self.parseAction += map(self.normalizeParseActionArgs, list(fns))
  1259 
       
  1260         return self
   631         return self
  1261 
   632 
  1262 
       
  1263 
       
  1264     def setFailAction( self, fn ):
   633     def setFailAction( self, fn ):
  1265 
       
  1266         """Define action to perform if parsing fails at this expression. 
   634         """Define action to perform if parsing fails at this expression. 
  1267 
       
  1268            Fail acton fn is a callable function that takes the arguments 
   635            Fail acton fn is a callable function that takes the arguments 
  1269 
       
  1270            fn(s,loc,expr,err) where:
   636            fn(s,loc,expr,err) where:
  1271 
       
  1272             - s = string being parsed
   637             - s = string being parsed
  1273 
       
  1274             - loc = location where expression match was attempted and failed
   638             - loc = location where expression match was attempted and failed
  1275 
       
  1276             - expr = the parse expression that failed
   639             - expr = the parse expression that failed
  1277 
       
  1278             - err = the exception thrown
   640             - err = the exception thrown
  1279 
       
  1280            The function returns no value.  It may throw ParseFatalException
   641            The function returns no value.  It may throw ParseFatalException
  1281 
       
  1282            if it is desired to stop parsing immediately."""
   642            if it is desired to stop parsing immediately."""
  1283 
       
  1284         self.failAction = fn
   643         self.failAction = fn
  1285 
       
  1286         return self
   644         return self
  1287 
   645         
  1288         
       
  1289 
       
  1290     def skipIgnorables( self, instring, loc ):
   646     def skipIgnorables( self, instring, loc ):
  1291 
       
  1292         exprsFound = True
   647         exprsFound = True
  1293 
       
  1294         while exprsFound:
   648         while exprsFound:
  1295 
       
  1296             exprsFound = False
   649             exprsFound = False
  1297 
       
  1298             for e in self.ignoreExprs:
   650             for e in self.ignoreExprs:
  1299 
       
  1300                 try:
   651                 try:
  1301 
       
  1302                     while 1:
   652                     while 1:
  1303 
       
  1304                         loc,dummy = e._parse( instring, loc )
   653                         loc,dummy = e._parse( instring, loc )
  1305 
       
  1306                         exprsFound = True
   654                         exprsFound = True
  1307 
       
  1308                 except ParseException:
   655                 except ParseException:
  1309 
       
  1310                     pass
   656                     pass
  1311 
       
  1312         return loc
   657         return loc
  1313 
   658 
  1314 
       
  1315 
       
  1316     def preParse( self, instring, loc ):
   659     def preParse( self, instring, loc ):
  1317 
       
  1318         if self.ignoreExprs:
   660         if self.ignoreExprs:
  1319 
       
  1320             loc = self.skipIgnorables( instring, loc )
   661             loc = self.skipIgnorables( instring, loc )
  1321 
   662         
  1322         
       
  1323 
       
  1324         if self.skipWhitespace:
   663         if self.skipWhitespace:
  1325 
       
  1326             wt = self.whiteChars
   664             wt = self.whiteChars
  1327 
       
  1328             instrlen = len(instring)
   665             instrlen = len(instring)
  1329 
       
  1330             while loc < instrlen and instring[loc] in wt:
   666             while loc < instrlen and instring[loc] in wt:
  1331 
       
  1332                 loc += 1
   667                 loc += 1
  1333 
       
  1334                 
   668                 
  1335 
       
  1336         return loc
   669         return loc
  1337 
   670 
  1338 
       
  1339 
       
  1340     def parseImpl( self, instring, loc, doActions=True ):
   671     def parseImpl( self, instring, loc, doActions=True ):
  1341 
       
  1342         return loc, []
   672         return loc, []
  1343 
   673 
  1344 
       
  1345 
       
  1346     def postParse( self, instring, loc, tokenlist ):
   674     def postParse( self, instring, loc, tokenlist ):
  1347 
       
  1348         return tokenlist
   675         return tokenlist
  1349 
   676 
  1350 
       
  1351 
       
  1352     #~ @profile
   677     #~ @profile
  1353 
       
  1354     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
   678     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1355 
       
  1356         debugging = ( self.debug ) #and doActions )
   679         debugging = ( self.debug ) #and doActions )
  1357 
   680 
  1358 
       
  1359 
       
  1360         if debugging or self.failAction:
   681         if debugging or self.failAction:
  1361 
       
  1362             #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
   682             #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
  1363 
       
  1364             if (self.debugActions[0] ):
   683             if (self.debugActions[0] ):
  1365 
       
  1366                 self.debugActions[0]( instring, loc, self )
   684                 self.debugActions[0]( instring, loc, self )
  1367 
       
  1368             if callPreParse:
   685             if callPreParse:
  1369 
       
  1370                 preloc = self.preParse( instring, loc )
   686                 preloc = self.preParse( instring, loc )
  1371 
       
  1372             else:
   687             else:
  1373 
       
  1374                 preloc = loc
   688                 preloc = loc
  1375 
       
  1376             tokensStart = loc
   689             tokensStart = loc
  1377 
       
  1378             try:
   690             try:
  1379 
       
  1380                 try:
   691                 try:
  1381 
       
  1382                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   692                     loc,tokens = self.parseImpl( instring, preloc, doActions )
  1383 
       
  1384                 except IndexError:
   693                 except IndexError:
  1385 
       
  1386                     raise ParseException( instring, len(instring), self.errmsg, self )
   694                     raise ParseException( instring, len(instring), self.errmsg, self )
  1387 
       
  1388             #~ except ReparseException, retryEx:
   695             #~ except ReparseException, retryEx:
  1389 
       
  1390                 #~ pass
   696                 #~ pass
  1391 
       
  1392             except ParseException, err:
   697             except ParseException, err:
  1393 
       
  1394                 #~ print "Exception raised:", err
   698                 #~ print "Exception raised:", err
  1395 
       
  1396                 if self.debugActions[2]:
   699                 if self.debugActions[2]:
  1397 
       
  1398                     self.debugActions[2]( instring, tokensStart, self, err )
   700                     self.debugActions[2]( instring, tokensStart, self, err )
  1399 
       
  1400                 if self.failAction:
   701                 if self.failAction:
  1401 
       
  1402                     self.failAction( instring, tokensStart, self, err )
   702                     self.failAction( instring, tokensStart, self, err )
  1403 
       
  1404                 raise
   703                 raise
  1405 
   704         else:
  1406         else:
       
  1407 
       
  1408             if callPreParse:
   705             if callPreParse:
  1409 
       
  1410                 preloc = self.preParse( instring, loc )
   706                 preloc = self.preParse( instring, loc )
  1411 
       
  1412             else:
   707             else:
  1413 
       
  1414                 preloc = loc
   708                 preloc = loc
  1415 
       
  1416             tokensStart = loc
   709             tokensStart = loc
  1417 
       
  1418             if self.mayIndexError or loc >= len(instring):
   710             if self.mayIndexError or loc >= len(instring):
  1419 
       
  1420                 try:
   711                 try:
  1421 
       
  1422                     loc,tokens = self.parseImpl( instring, preloc, doActions )
   712                     loc,tokens = self.parseImpl( instring, preloc, doActions )
  1423 
       
  1424                 except IndexError:
   713                 except IndexError:
  1425 
       
  1426                     raise ParseException( instring, len(instring), self.errmsg, self )
   714                     raise ParseException( instring, len(instring), self.errmsg, self )
  1427 
       
  1428             else:
   715             else:
  1429 
       
  1430                 loc,tokens = self.parseImpl( instring, preloc, doActions )
   716                 loc,tokens = self.parseImpl( instring, preloc, doActions )
  1431 
   717         
  1432         
       
  1433 
       
  1434         tokens = self.postParse( instring, loc, tokens )
   718         tokens = self.postParse( instring, loc, tokens )
  1435 
   719 
  1436 
       
  1437 
       
  1438         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
   720         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1439 
       
  1440         if self.parseAction and doActions:
   721         if self.parseAction and doActions:
  1441 
       
  1442             if debugging:
   722             if debugging:
  1443 
       
  1444                 try:
   723                 try:
  1445 
       
  1446                     for fn in self.parseAction:
   724                     for fn in self.parseAction:
  1447 
       
  1448                         tokens = fn( instring, tokensStart, retTokens )
   725                         tokens = fn( instring, tokensStart, retTokens )
  1449 
       
  1450                         if tokens is not None:
   726                         if tokens is not None:
  1451 
       
  1452                             retTokens = ParseResults( tokens, 
   727                             retTokens = ParseResults( tokens, 
  1453 
       
  1454                                                       self.resultsName, 
   728                                                       self.resultsName, 
  1455 
       
  1456                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
   729                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
  1457 
       
  1458                                                       modal=self.modalResults )
   730                                                       modal=self.modalResults )
  1459 
       
  1460                 except ParseException, err:
   731                 except ParseException, err:
  1461 
       
  1462                     #~ print "Exception raised in user parse action:", err
   732                     #~ print "Exception raised in user parse action:", err
  1463 
       
  1464                     if (self.debugActions[2] ):
   733                     if (self.debugActions[2] ):
  1465 
       
  1466                         self.debugActions[2]( instring, tokensStart, self, err )
   734                         self.debugActions[2]( instring, tokensStart, self, err )
  1467 
       
  1468                     raise
   735                     raise
  1469 
       
  1470             else:
   736             else:
  1471 
       
  1472                 for fn in self.parseAction:
   737                 for fn in self.parseAction:
  1473 
       
  1474                     tokens = fn( instring, tokensStart, retTokens )
   738                     tokens = fn( instring, tokensStart, retTokens )
  1475 
       
  1476                     if tokens is not None:
   739                     if tokens is not None:
  1477 
       
  1478                         retTokens = ParseResults( tokens, 
   740                         retTokens = ParseResults( tokens, 
  1479 
       
  1480                                                   self.resultsName, 
   741                                                   self.resultsName, 
  1481 
       
  1482                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
   742                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 
  1483 
       
  1484                                                   modal=self.modalResults )
   743                                                   modal=self.modalResults )
  1485 
   744 
  1486 
       
  1487 
       
  1488         if debugging:
   745         if debugging:
  1489 
       
  1490             #~ print "Matched",self,"->",retTokens.asList()
   746             #~ print "Matched",self,"->",retTokens.asList()
  1491 
       
  1492             if (self.debugActions[1] ):
   747             if (self.debugActions[1] ):
  1493 
       
  1494                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
   748                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1495 
   749 
  1496 
       
  1497 
       
  1498         return loc, retTokens
   750         return loc, retTokens
  1499 
   751 
  1500 
       
  1501 
       
  1502     def tryParse( self, instring, loc ):
   752     def tryParse( self, instring, loc ):
  1503 
       
  1504         return self._parse( instring, loc, doActions=False )[0]
   753         return self._parse( instring, loc, doActions=False )[0]
  1505 
   754     
  1506     
       
  1507 
       
  1508     # this method gets repeatedly called during backtracking with the same arguments -
   755     # this method gets repeatedly called during backtracking with the same arguments -
  1509 
       
  1510     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
   756     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1511 
       
  1512     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
   757     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1513 
       
  1514         if doActions and self.parseAction:
   758         if doActions and self.parseAction:
  1515 
       
  1516             return self._parseNoCache( instring, loc, doActions, callPreParse )
   759             return self._parseNoCache( instring, loc, doActions, callPreParse )
  1517 
       
  1518         lookup = (self,instring,loc,callPreParse)
   760         lookup = (self,instring,loc,callPreParse)
  1519 
       
  1520         if lookup in ParserElement._exprArgCache:
   761         if lookup in ParserElement._exprArgCache:
  1521 
       
  1522             value = ParserElement._exprArgCache[ lookup ]
   762             value = ParserElement._exprArgCache[ lookup ]
  1523 
       
  1524             if isinstance(value,Exception):
   763             if isinstance(value,Exception):
  1525 
       
  1526                 if isinstance(value,ParseBaseException):
   764                 if isinstance(value,ParseBaseException):
  1527 
       
  1528                     value.loc = loc
   765                     value.loc = loc
  1529 
       
  1530                 raise value
   766                 raise value
  1531 
       
  1532             return value
   767             return value
  1533 
   768         else:
  1534         else:
       
  1535 
       
  1536             try:
   769             try:
  1537 
       
  1538                 ParserElement._exprArgCache[ lookup ] = \
   770                 ParserElement._exprArgCache[ lookup ] = \
  1539                     value = self._parseNoCache( instring, loc, doActions, callPreParse )
   771                     value = self._parseNoCache( instring, loc, doActions, callPreParse )
  1540 
       
  1541                 return value
   772                 return value
  1542 
       
  1543             except ParseBaseException, pe:
   773             except ParseBaseException, pe:
  1544 
       
  1545                 ParserElement._exprArgCache[ lookup ] = pe
   774                 ParserElement._exprArgCache[ lookup ] = pe
  1546 
       
  1547                 raise
   775                 raise
  1548 
   776 
  1549 
       
  1550 
       
  1551     _parse = _parseNoCache
   777     _parse = _parseNoCache
  1552 
   778 
  1553 
       
  1554 
       
  1555     # argument cache for optimizing repeated calls when backtracking through recursive expressions
   779     # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1556 
       
  1557     _exprArgCache = {}
   780     _exprArgCache = {}
  1558 
       
  1559     def resetCache():
   781     def resetCache():
  1560 
       
  1561         ParserElement._exprArgCache.clear()
   782         ParserElement._exprArgCache.clear()
  1562 
       
  1563     resetCache = staticmethod(resetCache)
   783     resetCache = staticmethod(resetCache)
  1564 
   784     
  1565     
       
  1566 
       
  1567     _packratEnabled = False
   785     _packratEnabled = False
  1568 
       
  1569     def enablePackrat():
   786     def enablePackrat():
  1570 
       
  1571         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
   787         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1572 
       
  1573            Repeated parse attempts at the same string location (which happens 
   788            Repeated parse attempts at the same string location (which happens 
  1574 
       
  1575            often in many complex grammars) can immediately return a cached value, 
   789            often in many complex grammars) can immediately return a cached value, 
  1576 
       
  1577            instead of re-executing parsing/validating code.  Memoizing is done of
   790            instead of re-executing parsing/validating code.  Memoizing is done of
  1578 
       
  1579            both valid results and parsing exceptions.
   791            both valid results and parsing exceptions.
  1580 
       
  1581             
   792             
  1582 
       
  1583            This speedup may break existing programs that use parse actions that 
   793            This speedup may break existing programs that use parse actions that 
  1584 
       
  1585            have side-effects.  For this reason, packrat parsing is disabled when
   794            have side-effects.  For this reason, packrat parsing is disabled when
  1586 
       
  1587            you first import pyparsing.  To activate the packrat feature, your
   795            you first import pyparsing.  To activate the packrat feature, your
  1588 
       
  1589            program must call the class method ParserElement.enablePackrat().  If
   796            program must call the class method ParserElement.enablePackrat().  If
  1590 
       
  1591            your program uses psyco to "compile as you go", you must call 
   797            your program uses psyco to "compile as you go", you must call 
  1592 
       
  1593            enablePackrat before calling psyco.full().  If you do not do this,
   798            enablePackrat before calling psyco.full().  If you do not do this,
  1594 
       
  1595            Python will crash.  For best results, call enablePackrat() immediately
   799            Python will crash.  For best results, call enablePackrat() immediately
  1596 
       
  1597            after importing pyparsing.
   800            after importing pyparsing.
  1598 
       
  1599         """
   801         """
  1600 
       
  1601         if not ParserElement._packratEnabled:
   802         if not ParserElement._packratEnabled:
  1602 
       
  1603             ParserElement._packratEnabled = True
   803             ParserElement._packratEnabled = True
  1604 
       
  1605             ParserElement._parse = ParserElement._parseCache
   804             ParserElement._parse = ParserElement._parseCache
  1606 
       
  1607     enablePackrat = staticmethod(enablePackrat)
   805     enablePackrat = staticmethod(enablePackrat)
  1608 
   806 
  1609 
       
  1610 
       
  1611     def parseString( self, instring ):
   807     def parseString( self, instring ):
  1612 
       
  1613         """Execute the parse expression with the given string.
   808         """Execute the parse expression with the given string.
  1614 
       
  1615            This is the main interface to the client code, once the complete 
   809            This is the main interface to the client code, once the complete 
  1616 
       
  1617            expression has been built.
   810            expression has been built.
  1618 
       
  1619         """
   811         """
  1620 
       
  1621         ParserElement.resetCache()
   812         ParserElement.resetCache()
  1622 
       
  1623         if not self.streamlined:
   813         if not self.streamlined:
  1624 
       
  1625             self.streamline()
   814             self.streamline()
  1626 
       
  1627             #~ self.saveAsList = True
   815             #~ self.saveAsList = True
  1628 
       
  1629         for e in self.ignoreExprs:
   816         for e in self.ignoreExprs:
  1630 
       
  1631             e.streamline()
   817             e.streamline()
  1632 
       
  1633         if self.keepTabs:
   818         if self.keepTabs:
  1634 
       
  1635             loc, tokens = self._parse( instring, 0 )
   819             loc, tokens = self._parse( instring, 0 )
  1636 
   820         else:
  1637         else:
       
  1638 
       
  1639             loc, tokens = self._parse( instring.expandtabs(), 0 )
   821             loc, tokens = self._parse( instring.expandtabs(), 0 )
  1640 
       
  1641         return tokens
   822         return tokens
  1642 
   823 
  1643 
       
  1644 
       
  1645     def scanString( self, instring, maxMatches=sys.maxint ):
   824     def scanString( self, instring, maxMatches=sys.maxint ):
  1646 
       
  1647         """Scan the input string for expression matches.  Each match will return the 
   825         """Scan the input string for expression matches.  Each match will return the 
  1648 
       
  1649            matching tokens, start location, and end location.  May be called with optional
   826            matching tokens, start location, and end location.  May be called with optional
  1650 
       
  1651            maxMatches argument, to clip scanning after 'n' matches are found."""
   827            maxMatches argument, to clip scanning after 'n' matches are found."""
  1652 
       
  1653         if not self.streamlined:
   828         if not self.streamlined:
  1654 
       
  1655             self.streamline()
   829             self.streamline()
  1656 
       
  1657         for e in self.ignoreExprs:
   830         for e in self.ignoreExprs:
  1658 
       
  1659             e.streamline()
   831             e.streamline()
  1660 
   832         
  1661         
       
  1662 
       
  1663         if not self.keepTabs:
   833         if not self.keepTabs:
  1664 
       
  1665             instring = instring.expandtabs()
   834             instring = instring.expandtabs()
  1666 
       
  1667         instrlen = len(instring)
   835         instrlen = len(instring)
  1668 
       
  1669         loc = 0
   836         loc = 0
  1670 
       
  1671         preparseFn = self.preParse
   837         preparseFn = self.preParse
  1672 
       
  1673         parseFn = self._parse
   838         parseFn = self._parse
  1674 
       
  1675         ParserElement.resetCache()
   839         ParserElement.resetCache()
  1676 
       
  1677         matches = 0
   840         matches = 0
  1678 
       
  1679         while loc <= instrlen and matches < maxMatches:
   841         while loc <= instrlen and matches < maxMatches:
  1680 
       
  1681             try:
   842             try:
  1682 
       
  1683                 preloc = preparseFn( instring, loc )
   843                 preloc = preparseFn( instring, loc )
  1684 
       
  1685                 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
   844                 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1686 
       
  1687             except ParseException:
   845             except ParseException:
  1688 
       
  1689                 loc = preloc+1
   846                 loc = preloc+1
  1690 
       
  1691             else:
   847             else:
  1692 
       
  1693                 matches += 1
   848                 matches += 1
  1694 
       
  1695                 yield tokens, preloc, nextLoc
   849                 yield tokens, preloc, nextLoc
  1696 
       
  1697                 loc = nextLoc
   850                 loc = nextLoc
  1698 
   851         
  1699         
       
  1700 
       
  1701     def transformString( self, instring ):
   852     def transformString( self, instring ):
  1702 
       
  1703         """Extension to scanString, to modify matching text with modified tokens that may
   853         """Extension to scanString, to modify matching text with modified tokens that may
  1704 
       
  1705            be returned from a parse action.  To use transformString, define a grammar and 
   854            be returned from a parse action.  To use transformString, define a grammar and 
  1706 
       
  1707            attach a parse action to it that modifies the returned token list.  
   855            attach a parse action to it that modifies the returned token list.  
  1708 
       
  1709            Invoking transformString() on a target string will then scan for matches, 
   856            Invoking transformString() on a target string will then scan for matches, 
  1710 
       
  1711            and replace the matched text patterns according to the logic in the parse 
   857            and replace the matched text patterns according to the logic in the parse 
  1712 
       
  1713            action.  transformString() returns the resulting transformed string."""
   858            action.  transformString() returns the resulting transformed string."""
  1714 
       
  1715         out = []
   859         out = []
  1716 
       
  1717         lastE = 0
   860         lastE = 0
  1718 
       
  1719         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
   861         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1720 
       
  1721         # keep string locs straight between transformString and scanString
   862         # keep string locs straight between transformString and scanString
  1722 
       
  1723         self.keepTabs = True
   863         self.keepTabs = True
  1724 
       
  1725         for t,s,e in self.scanString( instring ):
   864         for t,s,e in self.scanString( instring ):
  1726 
       
  1727             out.append( instring[lastE:s] )
   865             out.append( instring[lastE:s] )
  1728 
       
  1729             if t:
   866             if t:
  1730 
       
  1731                 if isinstance(t,ParseResults):
   867                 if isinstance(t,ParseResults):
  1732 
       
  1733                     out += t.asList()
   868                     out += t.asList()
  1734 
       
  1735                 elif isinstance(t,list):
   869                 elif isinstance(t,list):
  1736 
       
  1737                     out += t
   870                     out += t
  1738 
       
  1739                 else:
   871                 else:
  1740 
       
  1741                     out.append(t)
   872                     out.append(t)
  1742 
       
  1743             lastE = e
   873             lastE = e
  1744 
       
  1745         out.append(instring[lastE:])
   874         out.append(instring[lastE:])
  1746 
       
  1747         return "".join(out)
   875         return "".join(out)
  1748 
   876 
  1749 
       
  1750 
       
  1751     def searchString( self, instring, maxMatches=sys.maxint ):
   877     def searchString( self, instring, maxMatches=sys.maxint ):
  1752 
       
  1753         """Another extension to scanString, simplifying the access to the tokens found
   878         """Another extension to scanString, simplifying the access to the tokens found
  1754 
       
  1755            to match the given parse expression.  May be called with optional
   879            to match the given parse expression.  May be called with optional
  1756 
       
  1757            maxMatches argument, to clip searching after 'n' matches are found.
   880            maxMatches argument, to clip searching after 'n' matches are found.
  1758 
       
  1759         """
   881         """
  1760 
       
  1761         return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
   882         return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1762 
       
  1763             
   883             
  1764 
       
  1765     def __add__(self, other ):
   884     def __add__(self, other ):
  1766 
       
  1767         """Implementation of + operator - returns And"""
   885         """Implementation of + operator - returns And"""
  1768 
       
  1769         if isinstance( other, basestring ):
   886         if isinstance( other, basestring ):
  1770 
       
  1771             other = Literal( other )
   887             other = Literal( other )
  1772 
       
  1773         if not isinstance( other, ParserElement ):
   888         if not isinstance( other, ParserElement ):
  1774 
       
  1775             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   889             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1776 
       
  1777                     SyntaxWarning, stacklevel=2)
   890                     SyntaxWarning, stacklevel=2)
  1778 
       
  1779         return And( [ self, other ] )
   891         return And( [ self, other ] )
  1780 
   892 
  1781 
       
  1782 
       
  1783     def __radd__(self, other ):
   893     def __radd__(self, other ):
  1784 
       
  1785         """Implementation of += operator"""
   894         """Implementation of += operator"""
  1786 
       
  1787         if isinstance( other, basestring ):
   895         if isinstance( other, basestring ):
  1788 
       
  1789             other = Literal( other )
   896             other = Literal( other )
  1790 
       
  1791         if not isinstance( other, ParserElement ):
   897         if not isinstance( other, ParserElement ):
  1792 
       
  1793             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   898             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1794 
       
  1795                     SyntaxWarning, stacklevel=2)
   899                     SyntaxWarning, stacklevel=2)
  1796 
       
  1797         return other + self
   900         return other + self
  1798 
   901 
  1799 
       
  1800 
       
  1801     def __or__(self, other ):
   902     def __or__(self, other ):
  1802 
       
  1803         """Implementation of | operator - returns MatchFirst"""
   903         """Implementation of | operator - returns MatchFirst"""
  1804 
       
  1805         if isinstance( other, basestring ):
   904         if isinstance( other, basestring ):
  1806 
       
  1807             other = Literal( other )
   905             other = Literal( other )
  1808 
       
  1809         if not isinstance( other, ParserElement ):
   906         if not isinstance( other, ParserElement ):
  1810 
       
  1811             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   907             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1812 
       
  1813                     SyntaxWarning, stacklevel=2)
   908                     SyntaxWarning, stacklevel=2)
  1814 
       
  1815         return MatchFirst( [ self, other ] )
   909         return MatchFirst( [ self, other ] )
  1816 
   910 
  1817 
       
  1818 
       
  1819     def __ror__(self, other ):
   911     def __ror__(self, other ):
  1820 
       
  1821         """Implementation of |= operator"""
   912         """Implementation of |= operator"""
  1822 
       
  1823         if isinstance( other, basestring ):
   913         if isinstance( other, basestring ):
  1824 
       
  1825             other = Literal( other )
   914             other = Literal( other )
  1826 
       
  1827         if not isinstance( other, ParserElement ):
   915         if not isinstance( other, ParserElement ):
  1828 
       
  1829             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   916             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1830 
       
  1831                     SyntaxWarning, stacklevel=2)
   917                     SyntaxWarning, stacklevel=2)
  1832 
       
  1833         return other | self
   918         return other | self
  1834 
   919 
  1835 
       
  1836 
       
  1837     def __xor__(self, other ):
   920     def __xor__(self, other ):
  1838 
       
  1839         """Implementation of ^ operator - returns Or"""
   921         """Implementation of ^ operator - returns Or"""
  1840 
       
  1841         if isinstance( other, basestring ):
   922         if isinstance( other, basestring ):
  1842 
       
  1843             other = Literal( other )
   923             other = Literal( other )
  1844 
       
  1845         if not isinstance( other, ParserElement ):
   924         if not isinstance( other, ParserElement ):
  1846 
       
  1847             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   925             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1848 
       
  1849                     SyntaxWarning, stacklevel=2)
   926                     SyntaxWarning, stacklevel=2)
  1850 
       
  1851         return Or( [ self, other ] )
   927         return Or( [ self, other ] )
  1852 
   928 
  1853 
       
  1854 
       
  1855     def __rxor__(self, other ):
   929     def __rxor__(self, other ):
  1856 
       
  1857         """Implementation of ^= operator"""
   930         """Implementation of ^= operator"""
  1858 
       
  1859         if isinstance( other, basestring ):
   931         if isinstance( other, basestring ):
  1860 
       
  1861             other = Literal( other )
   932             other = Literal( other )
  1862 
       
  1863         if not isinstance( other, ParserElement ):
   933         if not isinstance( other, ParserElement ):
  1864 
       
  1865             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   934             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1866 
       
  1867                     SyntaxWarning, stacklevel=2)
   935                     SyntaxWarning, stacklevel=2)
  1868 
       
  1869         return other ^ self
   936         return other ^ self
  1870 
   937 
  1871 
       
  1872 
       
  1873     def __and__(self, other ):
   938     def __and__(self, other ):
  1874 
       
  1875         """Implementation of & operator - returns Each"""
   939         """Implementation of & operator - returns Each"""
  1876 
       
  1877         if isinstance( other, basestring ):
   940         if isinstance( other, basestring ):
  1878 
       
  1879             other = Literal( other )
   941             other = Literal( other )
  1880 
       
  1881         if not isinstance( other, ParserElement ):
   942         if not isinstance( other, ParserElement ):
  1882 
       
  1883             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   943             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1884 
       
  1885                     SyntaxWarning, stacklevel=2)
   944                     SyntaxWarning, stacklevel=2)
  1886 
       
  1887         return Each( [ self, other ] )
   945         return Each( [ self, other ] )
  1888 
   946 
  1889 
       
  1890 
       
  1891     def __rand__(self, other ):
   947     def __rand__(self, other ):
  1892 
       
  1893         """Implementation of right-& operator"""
   948         """Implementation of right-& operator"""
  1894 
       
  1895         if isinstance( other, basestring ):
   949         if isinstance( other, basestring ):
  1896 
       
  1897             other = Literal( other )
   950             other = Literal( other )
  1898 
       
  1899         if not isinstance( other, ParserElement ):
   951         if not isinstance( other, ParserElement ):
  1900 
       
  1901             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
   952             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
  1902 
       
  1903                     SyntaxWarning, stacklevel=2)
   953                     SyntaxWarning, stacklevel=2)
  1904 
       
  1905         return other & self
   954         return other & self
  1906 
   955 
  1907 
       
  1908 
       
  1909     def __invert__( self ):
   956     def __invert__( self ):
  1910 
       
  1911         """Implementation of ~ operator - returns NotAny"""
   957         """Implementation of ~ operator - returns NotAny"""
  1912 
       
  1913         return NotAny( self )
   958         return NotAny( self )
  1914 
   959 
  1915 
       
  1916 
       
  1917     def suppress( self ):
   960     def suppress( self ):
  1918 
       
  1919         """Suppresses the output of this ParserElement; useful to keep punctuation from
   961         """Suppresses the output of this ParserElement; useful to keep punctuation from
  1920 
       
  1921            cluttering up returned output.
   962            cluttering up returned output.
  1922 
       
  1923         """
   963         """
  1924 
       
  1925         return Suppress( self )
   964         return Suppress( self )
  1926 
   965 
  1927 
       
  1928 
       
  1929     def leaveWhitespace( self ):
   966     def leaveWhitespace( self ):
  1930 
       
  1931         """Disables the skipping of whitespace before matching the characters in the 
   967         """Disables the skipping of whitespace before matching the characters in the 
  1932 
       
  1933            ParserElement's defined pattern.  This is normally only used internally by
   968            ParserElement's defined pattern.  This is normally only used internally by
  1934 
       
  1935            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
   969            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1936 
       
  1937         """
   970         """
  1938 
       
  1939         self.skipWhitespace = False
   971         self.skipWhitespace = False
  1940 
       
  1941         return self
   972         return self
  1942 
   973 
  1943 
       
  1944 
       
  1945     def setWhitespaceChars( self, chars ):
   974     def setWhitespaceChars( self, chars ):
  1946 
       
  1947         """Overrides the default whitespace chars
   975         """Overrides the default whitespace chars
  1948 
       
  1949         """
   976         """
  1950 
       
  1951         self.skipWhitespace = True
   977         self.skipWhitespace = True
  1952 
       
  1953         self.whiteChars = chars
   978         self.whiteChars = chars
  1954 
       
  1955         self.copyDefaultWhiteChars = False
   979         self.copyDefaultWhiteChars = False
  1956 
       
  1957         return self
   980         return self
  1958 
   981         
  1959         
       
  1960 
       
  1961     def parseWithTabs( self ):
   982     def parseWithTabs( self ):
  1962 
       
  1963         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
   983         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
  1964 
       
  1965            Must be called before parseString when the input grammar contains elements that 
   984            Must be called before parseString when the input grammar contains elements that 
  1966 
       
  1967            match <TAB> characters."""
   985            match <TAB> characters."""
  1968 
       
  1969         self.keepTabs = True
   986         self.keepTabs = True
  1970 
       
  1971         return self
   987         return self
  1972 
   988         
  1973         
       
  1974 
       
  1975     def ignore( self, other ):
   989     def ignore( self, other ):
  1976 
       
  1977         """Define expression to be ignored (e.g., comments) while doing pattern 
   990         """Define expression to be ignored (e.g., comments) while doing pattern 
  1978 
       
  1979            matching; may be called repeatedly, to define multiple comment or other
   991            matching; may be called repeatedly, to define multiple comment or other
  1980 
       
  1981            ignorable patterns.
   992            ignorable patterns.
  1982 
       
  1983         """
   993         """
  1984 
       
  1985         if isinstance( other, Suppress ):
   994         if isinstance( other, Suppress ):
  1986 
       
  1987             if other not in self.ignoreExprs:
   995             if other not in self.ignoreExprs:
  1988 
       
  1989                 self.ignoreExprs.append( other )
   996                 self.ignoreExprs.append( other )
  1990 
   997         else:
  1991         else:
       
  1992 
       
  1993             self.ignoreExprs.append( Suppress( other ) )
   998             self.ignoreExprs.append( Suppress( other ) )
  1994 
       
  1995         return self
   999         return self
  1996 
  1000 
  1997 
       
  1998 
       
  1999     def setDebugActions( self, startAction, successAction, exceptionAction ):
  1001     def setDebugActions( self, startAction, successAction, exceptionAction ):
  2000 
       
  2001         """Enable display of debugging messages while doing pattern matching."""
  1002         """Enable display of debugging messages while doing pattern matching."""
  2002 
       
  2003         self.debugActions = (startAction or _defaultStartDebugAction, 
  1003         self.debugActions = (startAction or _defaultStartDebugAction, 
  2004 
       
  2005                              successAction or _defaultSuccessDebugAction, 
  1004                              successAction or _defaultSuccessDebugAction, 
  2006 
       
  2007                              exceptionAction or _defaultExceptionDebugAction)
  1005                              exceptionAction or _defaultExceptionDebugAction)
  2008 
       
  2009         self.debug = True
  1006         self.debug = True
  2010 
       
  2011         return self
  1007         return self
  2012 
  1008 
  2013 
       
  2014 
       
  2015     def setDebug( self, flag=True ):
  1009     def setDebug( self, flag=True ):
  2016 
       
  2017         """Enable display of debugging messages while doing pattern matching."""
  1010         """Enable display of debugging messages while doing pattern matching."""
  2018 
       
  2019         if flag:
  1011         if flag:
  2020 
       
  2021             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1012             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  2022 
  1013         else:
  2023         else:
       
  2024 
       
  2025             self.debug = False
  1014             self.debug = False
  2026 
       
  2027         return self
  1015         return self
  2028 
  1016 
  2029 
       
  2030 
       
  2031     def __str__( self ):
  1017     def __str__( self ):
  2032 
       
  2033         return self.name
  1018         return self.name
  2034 
  1019 
  2035 
       
  2036 
       
  2037     def __repr__( self ):
  1020     def __repr__( self ):
  2038 
       
  2039         return _ustr(self)
  1021         return _ustr(self)
  2040 
  1022         
  2041         
       
  2042 
       
  2043     def streamline( self ):
  1023     def streamline( self ):
  2044 
       
  2045         self.streamlined = True
  1024         self.streamlined = True
  2046 
       
  2047         self.strRepr = None
  1025         self.strRepr = None
  2048 
       
  2049         return self
  1026         return self
  2050 
  1027         
  2051         
       
  2052 
       
  2053     def checkRecursion( self, parseElementList ):
  1028     def checkRecursion( self, parseElementList ):
  2054 
       
  2055         pass
  1029         pass
  2056 
  1030         
  2057         
       
  2058 
       
  2059     def validate( self, validateTrace=[] ):
  1031     def validate( self, validateTrace=[] ):
  2060 
       
  2061         """Check defined expressions for valid structure, check for infinite recursive definitions."""
  1032         """Check defined expressions for valid structure, check for infinite recursive definitions."""
  2062 
       
  2063         self.checkRecursion( [] )
  1033         self.checkRecursion( [] )
  2064 
  1034 
  2065 
       
  2066 
       
  2067     def parseFile( self, file_or_filename ):
  1035     def parseFile( self, file_or_filename ):
  2068 
       
  2069         """Execute the parse expression on the given file or filename.
  1036         """Execute the parse expression on the given file or filename.
  2070 
       
  2071            If a filename is specified (instead of a file object),
  1037            If a filename is specified (instead of a file object),
  2072 
       
  2073            the entire file is opened, read, and closed before parsing.
  1038            the entire file is opened, read, and closed before parsing.
  2074 
       
  2075         """
  1039         """
  2076 
       
  2077         try:
  1040         try:
  2078 
       
  2079             file_contents = file_or_filename.read()
  1041             file_contents = file_or_filename.read()
  2080 
       
  2081         except AttributeError:
  1042         except AttributeError:
  2082 
       
  2083             f = open(file_or_filename, "rb")
  1043             f = open(file_or_filename, "rb")
  2084 
       
  2085             file_contents = f.read()
  1044             file_contents = f.read()
  2086 
       
  2087             f.close()
  1045             f.close()
  2088 
       
  2089         return self.parseString(file_contents)
  1046         return self.parseString(file_contents)
  2090 
  1047 
  2091 
  1048 
  2092 
       
  2093 
       
  2094 
       
  2095 class Token(ParserElement):
  1049 class Token(ParserElement):
  2096 
       
  2097     """Abstract ParserElement subclass, for defining atomic matching patterns."""
  1050     """Abstract ParserElement subclass, for defining atomic matching patterns."""
  2098 
       
  2099     def __init__( self ):
  1051     def __init__( self ):
  2100 
       
  2101         super(Token,self).__init__( savelist=False )
  1052         super(Token,self).__init__( savelist=False )
  2102 
       
  2103         self.myException = ParseException("",0,"",self)
  1053         self.myException = ParseException("",0,"",self)
  2104 
  1054 
  2105 
       
  2106 
       
  2107     def setName(self, name):
  1055     def setName(self, name):
  2108 
       
  2109         s = super(Token,self).setName(name)
  1056         s = super(Token,self).setName(name)
  2110 
       
  2111         self.errmsg = "Expected " + self.name
  1057         self.errmsg = "Expected " + self.name
  2112 
       
  2113         s.myException.msg = self.errmsg
  1058         s.myException.msg = self.errmsg
  2114 
       
  2115         return s
  1059         return s
  2116 
  1060 
  2117 
  1061 
  2118 
       
  2119 
       
  2120 
       
  2121 class Empty(Token):
  1062 class Empty(Token):
  2122 
       
  2123     """An empty token, will always match."""
  1063     """An empty token, will always match."""
  2124 
       
  2125     def __init__( self ):
  1064     def __init__( self ):
  2126 
       
  2127         super(Empty,self).__init__()
  1065         super(Empty,self).__init__()
  2128 
       
  2129         self.name = "Empty"
  1066         self.name = "Empty"
  2130 
       
  2131         self.mayReturnEmpty = True
  1067         self.mayReturnEmpty = True
  2132 
       
  2133         self.mayIndexError = False
  1068         self.mayIndexError = False
  2134 
  1069 
  2135 
  1070 
  2136 
       
  2137 
       
  2138 
       
  2139 class NoMatch(Token):
  1071 class NoMatch(Token):
  2140 
       
  2141     """A token that will never match."""
  1072     """A token that will never match."""
  2142 
       
  2143     def __init__( self ):
  1073     def __init__( self ):
  2144 
       
  2145         super(NoMatch,self).__init__()
  1074         super(NoMatch,self).__init__()
  2146 
       
  2147         self.name = "NoMatch"
  1075         self.name = "NoMatch"
  2148 
       
  2149         self.mayReturnEmpty = True
  1076         self.mayReturnEmpty = True
  2150 
       
  2151         self.mayIndexError = False
  1077         self.mayIndexError = False
  2152 
       
  2153         self.errmsg = "Unmatchable token"
  1078         self.errmsg = "Unmatchable token"
  2154 
       
  2155         self.myException.msg = self.errmsg
  1079         self.myException.msg = self.errmsg
  2156 
  1080         
  2157         
       
  2158 
       
  2159     def parseImpl( self, instring, loc, doActions=True ):
  1081     def parseImpl( self, instring, loc, doActions=True ):
  2160 
       
  2161         exc = self.myException
  1082         exc = self.myException
  2162 
       
  2163         exc.loc = loc
  1083         exc.loc = loc
  2164 
       
  2165         exc.pstr = instring
  1084         exc.pstr = instring
  2166 
       
  2167         raise exc
  1085         raise exc
  2168 
  1086 
  2169 
  1087 
  2170 
       
  2171 
       
  2172 
       
  2173 class Literal(Token):
  1088 class Literal(Token):
  2174 
       
  2175     """Token to exactly match a specified string."""
  1089     """Token to exactly match a specified string."""
  2176 
       
  2177     def __init__( self, matchString ):
  1090     def __init__( self, matchString ):
  2178 
       
  2179         super(Literal,self).__init__()
  1091         super(Literal,self).__init__()
  2180 
       
  2181         self.match = matchString
  1092         self.match = matchString
  2182 
       
  2183         self.matchLen = len(matchString)
  1093         self.matchLen = len(matchString)
  2184 
       
  2185         try:
  1094         try:
  2186 
       
  2187             self.firstMatchChar = matchString[0]
  1095             self.firstMatchChar = matchString[0]
  2188 
       
  2189         except IndexError:
  1096         except IndexError:
  2190 
       
  2191             warnings.warn("null string passed to Literal; use Empty() instead", 
  1097             warnings.warn("null string passed to Literal; use Empty() instead", 
  2192 
       
  2193                             SyntaxWarning, stacklevel=2)
  1098                             SyntaxWarning, stacklevel=2)
  2194 
       
  2195             self.__class__ = Empty
  1099             self.__class__ = Empty
  2196 
       
  2197         self.name = '"%s"' % self.match
  1100         self.name = '"%s"' % self.match
  2198 
       
  2199         self.errmsg = "Expected " + self.name
  1101         self.errmsg = "Expected " + self.name
  2200 
       
  2201         self.mayReturnEmpty = False
  1102         self.mayReturnEmpty = False
  2202 
       
  2203         self.myException.msg = self.errmsg
  1103         self.myException.msg = self.errmsg
  2204 
       
  2205         self.mayIndexError = False
  1104         self.mayIndexError = False
  2206 
  1105 
  2207 
       
  2208 
       
  2209     # Performance tuning: this routine gets called a *lot*
  1106     # Performance tuning: this routine gets called a *lot*
  2210 
       
  2211     # if this is a single character match string  and the first character matches,
  1107     # if this is a single character match string  and the first character matches,
  2212 
       
  2213     # short-circuit as quickly as possible, and avoid calling startswith
  1108     # short-circuit as quickly as possible, and avoid calling startswith
  2214 
       
  2215     #~ @profile
  1109     #~ @profile
  2216 
       
  2217     def parseImpl( self, instring, loc, doActions=True ):
  1110     def parseImpl( self, instring, loc, doActions=True ):
  2218 
       
  2219         if (instring[loc] == self.firstMatchChar and
  1111         if (instring[loc] == self.firstMatchChar and
  2220 
       
  2221             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  1112             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2222 
       
  2223             return loc+self.matchLen, self.match
  1113             return loc+self.matchLen, self.match
  2224 
       
  2225         #~ raise ParseException( instring, loc, self.errmsg )
  1114         #~ raise ParseException( instring, loc, self.errmsg )
  2226 
       
  2227         exc = self.myException
  1115         exc = self.myException
  2228 
       
  2229         exc.loc = loc
  1116         exc.loc = loc
  2230 
       
  2231         exc.pstr = instring
  1117         exc.pstr = instring
  2232 
       
  2233         raise exc
  1118         raise exc
  2234 
  1119 
  2235 
       
  2236 
       
  2237 class Keyword(Token):
  1120 class Keyword(Token):
  2238 
       
  2239     """Token to exactly match a specified string as a keyword, that is, it must be 
  1121     """Token to exactly match a specified string as a keyword, that is, it must be 
  2240 
       
  2241        immediately followed by a non-keyword character.  Compare with Literal::
  1122        immediately followed by a non-keyword character.  Compare with Literal::
  2242 
       
  2243          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
  1123          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
  2244 
       
  2245          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
  1124          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
  2246 
       
  2247        Accepts two optional constructor arguments in addition to the keyword string:
  1125        Accepts two optional constructor arguments in addition to the keyword string:
  2248 
       
  2249        identChars is a string of characters that would be valid identifier characters,
  1126        identChars is a string of characters that would be valid identifier characters,
  2250 
       
  2251        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
  1127        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
  2252 
       
  2253        matching, default is False.
  1128        matching, default is False.
  2254 
       
  2255     """
  1129     """
  2256 
       
  2257     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  1130     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2258 
  1131     
  2259     
       
  2260 
       
  2261     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
  1132     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
  2262 
       
  2263         super(Keyword,self).__init__()
  1133         super(Keyword,self).__init__()
  2264 
       
  2265         self.match = matchString
  1134         self.match = matchString
  2266 
       
  2267         self.matchLen = len(matchString)
  1135         self.matchLen = len(matchString)
  2268 
       
  2269         try:
  1136         try:
  2270 
       
  2271             self.firstMatchChar = matchString[0]
  1137             self.firstMatchChar = matchString[0]
  2272 
       
  2273         except IndexError:
  1138         except IndexError:
  2274 
       
  2275             warnings.warn("null string passed to Keyword; use Empty() instead", 
  1139             warnings.warn("null string passed to Keyword; use Empty() instead", 
  2276 
       
  2277                             SyntaxWarning, stacklevel=2)
  1140                             SyntaxWarning, stacklevel=2)
  2278 
       
  2279         self.name = '"%s"' % self.match
  1141         self.name = '"%s"' % self.match
  2280 
       
  2281         self.errmsg = "Expected " + self.name
  1142         self.errmsg = "Expected " + self.name
  2282 
       
  2283         self.mayReturnEmpty = False
  1143         self.mayReturnEmpty = False
  2284 
       
  2285         self.myException.msg = self.errmsg
  1144         self.myException.msg = self.errmsg
  2286 
       
  2287         self.mayIndexError = False
  1145         self.mayIndexError = False
  2288 
       
  2289         self.caseless = caseless
  1146         self.caseless = caseless
  2290 
       
  2291         if caseless:
  1147         if caseless:
  2292 
       
  2293             self.caselessmatch = matchString.upper()
  1148             self.caselessmatch = matchString.upper()
  2294 
       
  2295             identChars = identChars.upper()
  1149             identChars = identChars.upper()
  2296 
       
  2297         self.identChars = _str2dict(identChars)
  1150         self.identChars = _str2dict(identChars)
  2298 
  1151 
  2299 
       
  2300 
       
  2301     def parseImpl( self, instring, loc, doActions=True ):
  1152     def parseImpl( self, instring, loc, doActions=True ):
  2302 
       
  2303         if self.caseless:
  1153         if self.caseless:
  2304 
       
  2305             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  1154             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2306 
       
  2307                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  1155                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2308 
       
  2309                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  1156                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2310 
       
  2311                 return loc+self.matchLen, self.match
  1157                 return loc+self.matchLen, self.match
  2312 
  1158         else:
  2313         else:
       
  2314 
       
  2315             if (instring[loc] == self.firstMatchChar and
  1159             if (instring[loc] == self.firstMatchChar and
  2316 
       
  2317                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
  1160                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2318 
       
  2319                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  1161                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2320 
       
  2321                 (loc == 0 or instring[loc-1] not in self.identChars) ):
  1162                 (loc == 0 or instring[loc-1] not in self.identChars) ):
  2322 
       
  2323                 return loc+self.matchLen, self.match
  1163                 return loc+self.matchLen, self.match
  2324 
       
  2325         #~ raise ParseException( instring, loc, self.errmsg )
  1164         #~ raise ParseException( instring, loc, self.errmsg )
  2326 
       
  2327         exc = self.myException
  1165         exc = self.myException
  2328 
       
  2329         exc.loc = loc
  1166         exc.loc = loc
  2330 
       
  2331         exc.pstr = instring
  1167         exc.pstr = instring
  2332 
       
  2333         raise exc
  1168         raise exc
  2334 
  1169         
  2335         
       
  2336 
       
  2337     def copy(self):
  1170     def copy(self):
  2338 
       
  2339         c = super(Keyword,self).copy()
  1171         c = super(Keyword,self).copy()
  2340 
       
  2341         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  1172         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2342 
       
  2343         return c
  1173         return c
  2344 
  1174         
  2345         
       
  2346 
       
  2347     def setDefaultKeywordChars( chars ):
  1175     def setDefaultKeywordChars( chars ):
  2348 
       
  2349         """Overrides the default Keyword chars
  1176         """Overrides the default Keyword chars
  2350 
       
  2351         """
  1177         """
  2352 
       
  2353         Keyword.DEFAULT_KEYWORD_CHARS = chars
  1178         Keyword.DEFAULT_KEYWORD_CHARS = chars
  2354 
       
  2355     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)        
  1179     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)        
  2356 
  1180 
  2357 
  1181 
  2358 
       
  2359 
       
  2360 
       
  2361 class CaselessLiteral(Literal):
  1182 class CaselessLiteral(Literal):
  2362 
       
  2363     """Token to match a specified string, ignoring case of letters.
  1183     """Token to match a specified string, ignoring case of letters.
  2364 
       
  2365        Note: the matched results will always be in the case of the given
  1184        Note: the matched results will always be in the case of the given
  2366 
       
  2367        match string, NOT the case of the input text.
  1185        match string, NOT the case of the input text.
  2368 
       
  2369     """
  1186     """
  2370 
       
  2371     def __init__( self, matchString ):
  1187     def __init__( self, matchString ):
  2372 
       
  2373         super(CaselessLiteral,self).__init__( matchString.upper() )
  1188         super(CaselessLiteral,self).__init__( matchString.upper() )
  2374 
       
  2375         # Preserve the defining literal.
  1189         # Preserve the defining literal.
  2376 
       
  2377         self.returnString = matchString
  1190         self.returnString = matchString
  2378 
       
  2379         self.name = "'%s'" % self.returnString
  1191         self.name = "'%s'" % self.returnString
  2380 
       
  2381         self.errmsg = "Expected " + self.name
  1192         self.errmsg = "Expected " + self.name
  2382 
       
  2383         self.myException.msg = self.errmsg
  1193         self.myException.msg = self.errmsg
  2384 
  1194 
  2385 
       
  2386 
       
  2387     def parseImpl( self, instring, loc, doActions=True ):
  1195     def parseImpl( self, instring, loc, doActions=True ):
  2388 
       
  2389         if instring[ loc:loc+self.matchLen ].upper() == self.match:
  1196         if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2390 
       
  2391             return loc+self.matchLen, self.returnString
  1197             return loc+self.matchLen, self.returnString
  2392 
       
  2393         #~ raise ParseException( instring, loc, self.errmsg )
  1198         #~ raise ParseException( instring, loc, self.errmsg )
  2394 
       
  2395         exc = self.myException
  1199         exc = self.myException
  2396 
       
  2397         exc.loc = loc
  1200         exc.loc = loc
  2398 
       
  2399         exc.pstr = instring
  1201         exc.pstr = instring
  2400 
       
  2401         raise exc
  1202         raise exc
  2402 
  1203 
  2403 
       
  2404 
       
  2405 class CaselessKeyword(Keyword):
  1204 class CaselessKeyword(Keyword):
  2406 
       
  2407     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
  1205     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
  2408 
       
  2409         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  1206         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2410 
  1207 
  2411 
       
  2412 
       
  2413     def parseImpl( self, instring, loc, doActions=True ):
  1208     def parseImpl( self, instring, loc, doActions=True ):
  2414 
       
  2415         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  1209         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2416 
       
  2417              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  1210              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  2418 
       
  2419             return loc+self.matchLen, self.match
  1211             return loc+self.matchLen, self.match
  2420 
       
  2421         #~ raise ParseException( instring, loc, self.errmsg )
  1212         #~ raise ParseException( instring, loc, self.errmsg )
  2422 
       
  2423         exc = self.myException
  1213         exc = self.myException
  2424 
       
  2425         exc.loc = loc
  1214         exc.loc = loc
  2426 
       
  2427         exc.pstr = instring
  1215         exc.pstr = instring
  2428 
       
  2429         raise exc
  1216         raise exc
  2430 
  1217 
  2431 
       
  2432 
       
  2433 class Word(Token):
  1218 class Word(Token):
  2434 
       
  2435     """Token for matching words composed of allowed character sets.
  1219     """Token for matching words composed of allowed character sets.
  2436 
       
  2437        Defined with string containing all allowed initial characters,
  1220        Defined with string containing all allowed initial characters,
  2438 
       
  2439        an optional string containing allowed body characters (if omitted,
  1221        an optional string containing allowed body characters (if omitted,
  2440 
       
  2441        defaults to the initial character set), and an optional minimum,
  1222        defaults to the initial character set), and an optional minimum,
  2442 
       
  2443        maximum, and/or exact length.
  1223        maximum, and/or exact length.
  2444 
       
  2445     """
  1224     """
  2446 
       
  2447     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
  1225     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
  2448 
       
  2449         super(Word,self).__init__()
  1226         super(Word,self).__init__()
  2450 
       
  2451         self.initCharsOrig = initChars
  1227         self.initCharsOrig = initChars
  2452 
       
  2453         self.initChars = _str2dict(initChars)
  1228         self.initChars = _str2dict(initChars)
  2454 
       
  2455         if bodyChars :
  1229         if bodyChars :
  2456 
       
  2457             self.bodyCharsOrig = bodyChars
  1230             self.bodyCharsOrig = bodyChars
  2458 
       
  2459             self.bodyChars = _str2dict(bodyChars)
  1231             self.bodyChars = _str2dict(bodyChars)
  2460 
  1232         else:
  2461         else:
       
  2462 
       
  2463             self.bodyCharsOrig = initChars
  1233             self.bodyCharsOrig = initChars
  2464 
       
  2465             self.bodyChars = _str2dict(initChars)
  1234             self.bodyChars = _str2dict(initChars)
  2466 
       
  2467             
  1235             
  2468 
       
  2469         self.maxSpecified = max > 0
  1236         self.maxSpecified = max > 0
  2470 
  1237 
  2471 
       
  2472 
       
  2473         self.minLen = min
  1238         self.minLen = min
  2474 
  1239 
  2475 
       
  2476 
       
  2477         if max > 0:
  1240         if max > 0:
  2478 
       
  2479             self.maxLen = max
  1241             self.maxLen = max
  2480 
  1242         else:
  2481         else:
       
  2482 
       
  2483             self.maxLen = sys.maxint
  1243             self.maxLen = sys.maxint
  2484 
  1244 
  2485 
       
  2486 
       
  2487         if exact > 0:
  1245         if exact > 0:
  2488 
       
  2489             self.maxLen = exact
  1246             self.maxLen = exact
  2490 
       
  2491             self.minLen = exact
  1247             self.minLen = exact
  2492 
  1248 
  2493 
       
  2494 
       
  2495         self.name = _ustr(self)
  1249         self.name = _ustr(self)
  2496 
       
  2497         self.errmsg = "Expected " + self.name
  1250         self.errmsg = "Expected " + self.name
  2498 
       
  2499         self.myException.msg = self.errmsg
  1251         self.myException.msg = self.errmsg
  2500 
       
  2501         self.mayIndexError = False
  1252         self.mayIndexError = False
  2502 
  1253         
  2503         
       
  2504 
       
  2505         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  1254         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2506 
       
  2507             if self.bodyCharsOrig == self.initCharsOrig:
  1255             if self.bodyCharsOrig == self.initCharsOrig:
  2508 
       
  2509                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  1256                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2510 
       
  2511             elif len(self.bodyCharsOrig) == 1:
  1257             elif len(self.bodyCharsOrig) == 1:
  2512 
       
  2513                 self.reString = "%s[%s]*" % \
  1258                 self.reString = "%s[%s]*" % \
  2514                                       (re.escape(self.initCharsOrig),
  1259                                       (re.escape(self.initCharsOrig),
  2515 
       
  2516                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
  1260                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
  2517 
       
  2518             else:
  1261             else:
  2519 
       
  2520                 self.reString = "[%s][%s]*" % \
  1262                 self.reString = "[%s][%s]*" % \
  2521                                       (_escapeRegexRangeChars(self.initCharsOrig),
  1263                                       (_escapeRegexRangeChars(self.initCharsOrig),
  2522 
       
  2523                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
  1264                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
  2524 
       
  2525             try:
  1265             try:
  2526 
       
  2527                 self.re = re.compile( self.reString )
  1266                 self.re = re.compile( self.reString )
  2528 
       
  2529             except:
  1267             except:
  2530 
       
  2531                 self.re = None
  1268                 self.re = None
  2532 
  1269         
  2533         
       
  2534 
       
  2535     def parseImpl( self, instring, loc, doActions=True ):
  1270     def parseImpl( self, instring, loc, doActions=True ):
  2536 
       
  2537         if self.re:
  1271         if self.re:
  2538 
       
  2539             result = self.re.match(instring,loc)
  1272             result = self.re.match(instring,loc)
  2540 
       
  2541             if not result:
  1273             if not result:
  2542 
       
  2543                 exc = self.myException
  1274                 exc = self.myException
  2544 
       
  2545                 exc.loc = loc
  1275                 exc.loc = loc
  2546 
       
  2547                 exc.pstr = instring
  1276                 exc.pstr = instring
  2548 
       
  2549                 raise exc
  1277                 raise exc
  2550 
       
  2551             
  1278             
  2552 
       
  2553             loc = result.end()
  1279             loc = result.end()
  2554 
       
  2555             return loc,result.group()
  1280             return loc,result.group()
  2556 
  1281         
  2557         
       
  2558 
       
  2559         if not(instring[ loc ] in self.initChars):
  1282         if not(instring[ loc ] in self.initChars):
  2560 
       
  2561             #~ raise ParseException( instring, loc, self.errmsg )
  1283             #~ raise ParseException( instring, loc, self.errmsg )
  2562 
       
  2563             exc = self.myException
  1284             exc = self.myException
  2564 
       
  2565             exc.loc = loc
  1285             exc.loc = loc
  2566 
       
  2567             exc.pstr = instring
  1286             exc.pstr = instring
  2568 
       
  2569             raise exc
  1287             raise exc
  2570 
       
  2571         start = loc
  1288         start = loc
  2572 
       
  2573         loc += 1
  1289         loc += 1
  2574 
       
  2575         instrlen = len(instring)
  1290         instrlen = len(instring)
  2576 
       
  2577         bodychars = self.bodyChars
  1291         bodychars = self.bodyChars
  2578 
       
  2579         maxloc = start + self.maxLen
  1292         maxloc = start + self.maxLen
  2580 
       
  2581         maxloc = min( maxloc, instrlen )
  1293         maxloc = min( maxloc, instrlen )
  2582 
       
  2583         while loc < maxloc and instring[loc] in bodychars:
  1294         while loc < maxloc and instring[loc] in bodychars:
  2584 
       
  2585             loc += 1
  1295             loc += 1
  2586 
       
  2587             
  1296             
  2588 
       
  2589         throwException = False
  1297         throwException = False
  2590 
       
  2591         if loc - start < self.minLen:
  1298         if loc - start < self.minLen:
  2592 
       
  2593             throwException = True
  1299             throwException = True
  2594 
       
  2595         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  1300         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2596 
       
  2597             throwException = True
  1301             throwException = True
  2598 
  1302 
  2599 
       
  2600 
       
  2601         if throwException:
  1303         if throwException:
  2602 
       
  2603             #~ raise ParseException( instring, loc, self.errmsg )
  1304             #~ raise ParseException( instring, loc, self.errmsg )
  2604 
       
  2605             exc = self.myException
  1305             exc = self.myException
  2606 
       
  2607             exc.loc = loc
  1306             exc.loc = loc
  2608 
       
  2609             exc.pstr = instring
  1307             exc.pstr = instring
  2610 
       
  2611             raise exc
  1308             raise exc
  2612 
  1309 
  2613 
       
  2614 
       
  2615         return loc, instring[start:loc]
  1310         return loc, instring[start:loc]
  2616 
  1311 
  2617 
       
  2618 
       
  2619     def __str__( self ):
  1312     def __str__( self ):
  2620 
       
  2621         try:
  1313         try:
  2622 
       
  2623             return super(Word,self).__str__()
  1314             return super(Word,self).__str__()
  2624 
       
  2625         except:
  1315         except:
  2626 
       
  2627             pass
  1316             pass
  2628 
  1317 
  2629 
       
  2630 
       
  2631             
  1318             
  2632 
       
  2633         if self.strRepr is None:
  1319         if self.strRepr is None:
  2634 
       
  2635             
  1320             
  2636 
       
  2637             def charsAsStr(s):
  1321             def charsAsStr(s):
  2638 
       
  2639                 if len(s)>4:
  1322                 if len(s)>4:
  2640 
       
  2641                     return s[:4]+"..."
  1323                     return s[:4]+"..."
  2642 
       
  2643                 else:
  1324                 else:
  2644 
       
  2645                     return s
  1325                     return s
  2646 
       
  2647             
  1326             
  2648 
       
  2649             if ( self.initCharsOrig != self.bodyCharsOrig ):
  1327             if ( self.initCharsOrig != self.bodyCharsOrig ):
  2650 
       
  2651                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  1328                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2652 
       
  2653             else:
  1329             else:
  2654 
       
  2655                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  1330                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2656 
  1331 
  2657 
       
  2658 
       
  2659         return self.strRepr
  1332         return self.strRepr
  2660 
  1333 
  2661 
  1334 
  2662 
       
  2663 
       
  2664 
       
  2665 class Regex(Token):
  1335 class Regex(Token):
  2666 
       
  2667     """Token for matching strings that match a given regular expression.
  1336     """Token for matching strings that match a given regular expression.
  2668 
       
  2669        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  1337        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  2670 
       
  2671     """
  1338     """
  2672 
       
  2673     def __init__( self, pattern, flags=0):
  1339     def __init__( self, pattern, flags=0):
  2674 
       
  2675         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
  1340         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
  2676 
       
  2677         super(Regex,self).__init__()
  1341         super(Regex,self).__init__()
  2678 
  1342         
  2679         
       
  2680 
       
  2681         if len(pattern) == 0:
  1343         if len(pattern) == 0:
  2682 
       
  2683             warnings.warn("null string passed to Regex; use Empty() instead", 
  1344             warnings.warn("null string passed to Regex; use Empty() instead", 
  2684 
       
  2685                     SyntaxWarning, stacklevel=2)
  1345                     SyntaxWarning, stacklevel=2)
  2686 
  1346     
  2687     
       
  2688 
       
  2689         self.pattern = pattern
  1347         self.pattern = pattern
  2690 
       
  2691         self.flags = flags
  1348         self.flags = flags
  2692 
  1349         
  2693         
       
  2694 
       
  2695         try:
  1350         try:
  2696 
       
  2697             self.re = re.compile(self.pattern, self.flags)
  1351             self.re = re.compile(self.pattern, self.flags)
  2698 
       
  2699             self.reString = self.pattern
  1352             self.reString = self.pattern
  2700 
       
  2701         except sre_constants.error,e:
  1353         except sre_constants.error,e:
  2702 
       
  2703             warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
  1354             warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 
  2704 
       
  2705                 SyntaxWarning, stacklevel=2)
  1355                 SyntaxWarning, stacklevel=2)
  2706 
       
  2707             raise
  1356             raise
  2708 
  1357 
  2709 
       
  2710 
       
  2711         self.name = _ustr(self)
  1358         self.name = _ustr(self)
  2712 
       
  2713         self.errmsg = "Expected " + self.name
  1359         self.errmsg = "Expected " + self.name
  2714 
       
  2715         self.myException.msg = self.errmsg
  1360         self.myException.msg = self.errmsg
  2716 
       
  2717         self.mayIndexError = False
  1361         self.mayIndexError = False
  2718 
       
  2719         self.mayReturnEmpty = True
  1362         self.mayReturnEmpty = True
  2720 
  1363     
  2721     
       
  2722 
       
  2723     def parseImpl( self, instring, loc, doActions=True ):
  1364     def parseImpl( self, instring, loc, doActions=True ):
  2724 
       
  2725         result = self.re.match(instring,loc)
  1365         result = self.re.match(instring,loc)
  2726 
       
  2727         if not result:
  1366         if not result:
  2728 
       
  2729             exc = self.myException
  1367             exc = self.myException
  2730 
       
  2731             exc.loc = loc
  1368             exc.loc = loc
  2732 
       
  2733             exc.pstr = instring
  1369             exc.pstr = instring
  2734 
       
  2735             raise exc
  1370             raise exc
  2736 
  1371         
  2737         
       
  2738 
       
  2739         loc = result.end()
  1372         loc = result.end()
  2740 
       
  2741         d = result.groupdict()
  1373         d = result.groupdict()
  2742 
       
  2743         ret = ParseResults(result.group())
  1374         ret = ParseResults(result.group())
  2744 
       
  2745         if d:
  1375         if d:
  2746 
       
  2747             for k in d.keys():
  1376             for k in d.keys():
  2748 
       
  2749                 ret[k] = d[k]
  1377                 ret[k] = d[k]
  2750 
       
  2751         return loc,ret
  1378         return loc,ret
  2752 
  1379     
  2753     
       
  2754 
       
  2755     def __str__( self ):
  1380     def __str__( self ):
  2756 
       
  2757         try:
  1381         try:
  2758 
       
  2759             return super(Regex,self).__str__()
  1382             return super(Regex,self).__str__()
  2760 
       
  2761         except:
  1383         except:
  2762 
       
  2763             pass
  1384             pass
  2764 
  1385         
  2765         
       
  2766 
       
  2767         if self.strRepr is None:
  1386         if self.strRepr is None:
  2768 
       
  2769             self.strRepr = "Re:(%s)" % repr(self.pattern)
  1387             self.strRepr = "Re:(%s)" % repr(self.pattern)
  2770 
  1388         
  2771         
       
  2772 
       
  2773         return self.strRepr
  1389         return self.strRepr
  2774 
  1390 
  2775 
  1391 
  2776 
       
  2777 
       
  2778 
       
  2779 class QuotedString(Token):
  1392 class QuotedString(Token):
  2780 
       
  2781     """Token for matching strings that are delimited by quoting characters.
  1393     """Token for matching strings that are delimited by quoting characters.
  2782 
       
  2783     """
  1394     """
  2784 
       
  2785     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
  1395     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
  2786 
       
  2787         """
  1396         """
  2788 
       
  2789            Defined with the following parameters:
  1397            Defined with the following parameters:
  2790 
       
  2791            - quoteChar - string of one or more characters defining the quote delimiting string
  1398            - quoteChar - string of one or more characters defining the quote delimiting string
  2792 
       
  2793            - escChar - character to escape quotes, typically backslash (default=None)
  1399            - escChar - character to escape quotes, typically backslash (default=None)
  2794 
       
  2795            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
  1400            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
  2796 
       
  2797            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
  1401            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
  2798 
       
  2799            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
  1402            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
  2800 
       
  2801            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
  1403            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
  2802 
       
  2803         """
  1404         """
  2804 
       
  2805         super(QuotedString,self).__init__()
  1405         super(QuotedString,self).__init__()
  2806 
  1406         
  2807         
       
  2808 
       
  2809         # remove white space from quote chars - wont work anyway
  1407         # remove white space from quote chars - wont work anyway
  2810 
       
  2811         quoteChar = quoteChar.strip()
  1408         quoteChar = quoteChar.strip()
  2812 
       
  2813         if len(quoteChar) == 0:
  1409         if len(quoteChar) == 0:
  2814 
       
  2815             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  1410             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2816 
       
  2817             raise SyntaxError()
  1411             raise SyntaxError()
  2818 
  1412         
  2819         
       
  2820 
       
  2821         if endQuoteChar is None:
  1413         if endQuoteChar is None:
  2822 
       
  2823             endQuoteChar = quoteChar
  1414             endQuoteChar = quoteChar
  2824 
  1415         else:
  2825         else:
       
  2826 
       
  2827             endQuoteChar = endQuoteChar.strip()
  1416             endQuoteChar = endQuoteChar.strip()
  2828 
       
  2829             if len(endQuoteChar) == 0:
  1417             if len(endQuoteChar) == 0:
  2830 
       
  2831                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  1418                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2832 
       
  2833                 raise SyntaxError()
  1419                 raise SyntaxError()
  2834 
  1420         
  2835         
       
  2836 
       
  2837         self.quoteChar = quoteChar
  1421         self.quoteChar = quoteChar
  2838 
       
  2839         self.quoteCharLen = len(quoteChar)
  1422         self.quoteCharLen = len(quoteChar)
  2840 
       
  2841         self.firstQuoteChar = quoteChar[0]
  1423         self.firstQuoteChar = quoteChar[0]
  2842 
       
  2843         self.endQuoteChar = endQuoteChar
  1424         self.endQuoteChar = endQuoteChar
  2844 
       
  2845         self.endQuoteCharLen = len(endQuoteChar)
  1425         self.endQuoteCharLen = len(endQuoteChar)
  2846 
       
  2847         self.escChar = escChar
  1426         self.escChar = escChar
  2848 
       
  2849         self.escQuote = escQuote
  1427         self.escQuote = escQuote
  2850 
       
  2851         self.unquoteResults = unquoteResults
  1428         self.unquoteResults = unquoteResults
  2852 
  1429         
  2853         
       
  2854 
       
  2855         if multiline:
  1430         if multiline:
  2856 
       
  2857             self.flags = re.MULTILINE | re.DOTALL
  1431             self.flags = re.MULTILINE | re.DOTALL
  2858 
       
  2859             self.pattern = r'%s(?:[^%s%s]' % \
  1432             self.pattern = r'%s(?:[^%s%s]' % \
  2860                 ( re.escape(self.quoteChar),
  1433                 ( re.escape(self.quoteChar),
  2861 
       
  2862                   _escapeRegexRangeChars(self.endQuoteChar[0]),
  1434                   _escapeRegexRangeChars(self.endQuoteChar[0]),
  2863 
       
  2864                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  1435                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2865 
  1436         else:
  2866         else:
       
  2867 
       
  2868             self.flags = 0
  1437             self.flags = 0
  2869 
       
  2870             self.pattern = r'%s(?:[^%s\n\r%s]' % \
  1438             self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2871                 ( re.escape(self.quoteChar),
  1439                 ( re.escape(self.quoteChar),
  2872 
       
  2873                   _escapeRegexRangeChars(self.endQuoteChar[0]),
  1440                   _escapeRegexRangeChars(self.endQuoteChar[0]),
  2874 
       
  2875                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  1441                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2876 
       
  2877         if len(self.endQuoteChar) > 1:
  1442         if len(self.endQuoteChar) > 1:
  2878 
       
  2879             self.pattern += (
  1443             self.pattern += (
  2880 
       
  2881                 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  1444                 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2882 
       
  2883                                                _escapeRegexRangeChars(self.endQuoteChar[i])) 
  1445                                                _escapeRegexRangeChars(self.endQuoteChar[i])) 
  2884 
       
  2885                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
  1446                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
  2886 
       
  2887                 )
  1447                 )
  2888 
       
  2889         if escQuote:
  1448         if escQuote:
  2890 
       
  2891             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  1449             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2892 
       
  2893         if escChar:
  1450         if escChar:
  2894 
       
  2895             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  1451             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2896 
       
  2897             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  1452             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2898 
       
  2899         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  1453         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2900 
  1454         
  2901         
       
  2902 
       
  2903         try:
  1455         try:
  2904 
       
  2905             self.re = re.compile(self.pattern, self.flags)
  1456             self.re = re.compile(self.pattern, self.flags)
  2906 
       
  2907             self.reString = self.pattern
  1457             self.reString = self.pattern
  2908 
       
  2909         except sre_constants.error,e:
  1458         except sre_constants.error,e:
  2910 
       
  2911             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
  1459             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 
  2912 
       
  2913                 SyntaxWarning, stacklevel=2)
  1460                 SyntaxWarning, stacklevel=2)
  2914 
       
  2915             raise
  1461             raise
  2916 
  1462 
  2917 
       
  2918 
       
  2919         self.name = _ustr(self)
  1463         self.name = _ustr(self)
  2920 
       
  2921         self.errmsg = "Expected " + self.name
  1464         self.errmsg = "Expected " + self.name
  2922 
       
  2923         self.myException.msg = self.errmsg
  1465         self.myException.msg = self.errmsg
  2924 
       
  2925         self.mayIndexError = False
  1466         self.mayIndexError = False
  2926 
       
  2927         self.mayReturnEmpty = True
  1467         self.mayReturnEmpty = True
  2928 
  1468     
  2929     
       
  2930 
       
  2931     def parseImpl( self, instring, loc, doActions=True ):
  1469     def parseImpl( self, instring, loc, doActions=True ):
  2932 
       
  2933         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  1470         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2934 
       
  2935         if not result:
  1471         if not result:
  2936 
       
  2937             exc = self.myException
  1472             exc = self.myException
  2938 
       
  2939             exc.loc = loc
  1473             exc.loc = loc
  2940 
       
  2941             exc.pstr = instring
  1474             exc.pstr = instring
  2942 
       
  2943             raise exc
  1475             raise exc
  2944 
  1476         
  2945         
       
  2946 
       
  2947         loc = result.end()
  1477         loc = result.end()
  2948 
       
  2949         ret = result.group()
  1478         ret = result.group()
  2950 
  1479         
  2951         
       
  2952 
       
  2953         if self.unquoteResults:
  1480         if self.unquoteResults:
  2954 
       
  2955             
  1481             
  2956 
       
  2957             # strip off quotes
  1482             # strip off quotes
  2958 
       
  2959             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  1483             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2960 
       
  2961                 
  1484                 
  2962 
       
  2963             if isinstance(ret,basestring):
  1485             if isinstance(ret,basestring):
  2964 
       
  2965                 # replace escaped characters
  1486                 # replace escaped characters
  2966 
       
  2967                 if self.escChar:
  1487                 if self.escChar:
  2968 
       
  2969                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
  1488                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
  2970 
  1489 
  2971 
       
  2972 
       
  2973                 # replace escaped quotes
  1490                 # replace escaped quotes
  2974 
       
  2975                 if self.escQuote:
  1491                 if self.escQuote:
  2976 
       
  2977                     ret = ret.replace(self.escQuote, self.endQuoteChar)
  1492                     ret = ret.replace(self.escQuote, self.endQuoteChar)
  2978 
  1493 
  2979 
       
  2980 
       
  2981         return loc, ret
  1494         return loc, ret
  2982 
  1495     
  2983     
       
  2984 
       
  2985     def __str__( self ):
  1496     def __str__( self ):
  2986 
       
  2987         try:
  1497         try:
  2988 
       
  2989             return super(QuotedString,self).__str__()
  1498             return super(QuotedString,self).__str__()
  2990 
       
  2991         except:
  1499         except:
  2992 
       
  2993             pass
  1500             pass
  2994 
  1501         
  2995         
       
  2996 
       
  2997         if self.strRepr is None:
  1502         if self.strRepr is None:
  2998 
       
  2999             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  1503             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  3000 
  1504         
  3001         
       
  3002 
       
  3003         return self.strRepr
  1505         return self.strRepr
  3004 
  1506 
  3005 
  1507 
  3006 
       
  3007 
       
  3008 
       
  3009 class CharsNotIn(Token):
  1508 class CharsNotIn(Token):
  3010 
       
  3011     """Token for matching words composed of characters *not* in a given set.
  1509     """Token for matching words composed of characters *not* in a given set.
  3012 
       
  3013        Defined with string containing all disallowed characters, and an optional 
  1510        Defined with string containing all disallowed characters, and an optional 
  3014 
       
  3015        minimum, maximum, and/or exact length.
  1511        minimum, maximum, and/or exact length.
  3016 
       
  3017     """
  1512     """
  3018 
       
  3019     def __init__( self, notChars, min=1, max=0, exact=0 ):
  1513     def __init__( self, notChars, min=1, max=0, exact=0 ):
  3020 
       
  3021         super(CharsNotIn,self).__init__()
  1514         super(CharsNotIn,self).__init__()
  3022 
       
  3023         self.skipWhitespace = False
  1515         self.skipWhitespace = False
  3024 
       
  3025         self.notChars = notChars
  1516         self.notChars = notChars
  3026 
  1517         
  3027         
       
  3028 
       
  3029         self.minLen = min
  1518         self.minLen = min
  3030 
  1519 
  3031 
       
  3032 
       
  3033         if max > 0:
  1520         if max > 0:
  3034 
       
  3035             self.maxLen = max
  1521             self.maxLen = max
  3036 
  1522         else:
  3037         else:
       
  3038 
       
  3039             self.maxLen = sys.maxint
  1523             self.maxLen = sys.maxint
  3040 
  1524 
  3041 
       
  3042 
       
  3043         if exact > 0:
  1525         if exact > 0:
  3044 
       
  3045             self.maxLen = exact
  1526             self.maxLen = exact
  3046 
       
  3047             self.minLen = exact
  1527             self.minLen = exact
  3048 
  1528         
  3049         
       
  3050 
       
  3051         self.name = _ustr(self)
  1529         self.name = _ustr(self)
  3052 
       
  3053         self.errmsg = "Expected " + self.name
  1530         self.errmsg = "Expected " + self.name
  3054 
       
  3055         self.mayReturnEmpty = ( self.minLen == 0 )
  1531         self.mayReturnEmpty = ( self.minLen == 0 )
  3056 
       
  3057         self.myException.msg = self.errmsg
  1532         self.myException.msg = self.errmsg
  3058 
       
  3059         self.mayIndexError = False
  1533         self.mayIndexError = False
  3060 
  1534 
  3061 
       
  3062 
       
  3063     def parseImpl( self, instring, loc, doActions=True ):
  1535     def parseImpl( self, instring, loc, doActions=True ):
  3064 
       
  3065         if instring[loc] in self.notChars:
  1536         if instring[loc] in self.notChars:
  3066 
       
  3067             #~ raise ParseException( instring, loc, self.errmsg )
  1537             #~ raise ParseException( instring, loc, self.errmsg )
  3068 
       
  3069             exc = self.myException
  1538             exc = self.myException
  3070 
       
  3071             exc.loc = loc
  1539             exc.loc = loc
  3072 
       
  3073             exc.pstr = instring
  1540             exc.pstr = instring
  3074 
       
  3075             raise exc
  1541             raise exc
  3076 
       
  3077             
  1542             
  3078 
       
  3079         start = loc
  1543         start = loc
  3080 
       
  3081         loc += 1
  1544         loc += 1
  3082 
       
  3083         notchars = self.notChars
  1545         notchars = self.notChars
  3084 
       
  3085         maxlen = min( start+self.maxLen, len(instring) )
  1546         maxlen = min( start+self.maxLen, len(instring) )
  3086 
       
  3087         while loc < maxlen and (instring[loc] not in notchars):
  1547         while loc < maxlen and (instring[loc] not in notchars):
  3088 
       
  3089             loc += 1
  1548             loc += 1
  3090 
  1549 
  3091 
       
  3092 
       
  3093         if loc - start < self.minLen:
  1550         if loc - start < self.minLen:
  3094 
       
  3095             #~ raise ParseException( instring, loc, self.errmsg )
  1551             #~ raise ParseException( instring, loc, self.errmsg )
  3096 
       
  3097             exc = self.myException
  1552             exc = self.myException
  3098 
       
  3099             exc.loc = loc
  1553             exc.loc = loc
  3100 
       
  3101             exc.pstr = instring
  1554             exc.pstr = instring
  3102 
       
  3103             raise exc
  1555             raise exc
  3104 
  1556 
  3105 
       
  3106 
       
  3107         return loc, instring[start:loc]
  1557         return loc, instring[start:loc]
  3108 
  1558 
  3109 
       
  3110 
       
  3111     def __str__( self ):
  1559     def __str__( self ):
  3112 
       
  3113         try:
  1560         try:
  3114 
       
  3115             return super(CharsNotIn, self).__str__()
  1561             return super(CharsNotIn, self).__str__()
  3116 
       
  3117         except:
  1562         except:
  3118 
       
  3119             pass
  1563             pass
  3120 
  1564 
  3121 
       
  3122 
       
  3123         if self.strRepr is None:
  1565         if self.strRepr is None:
  3124 
       
  3125             if len(self.notChars) > 4:
  1566             if len(self.notChars) > 4:
  3126 
       
  3127                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
  1567                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
  3128 
       
  3129             else:
  1568             else:
  3130 
       
  3131                 self.strRepr = "!W:(%s)" % self.notChars
  1569                 self.strRepr = "!W:(%s)" % self.notChars
  3132 
  1570         
  3133         
       
  3134 
       
  3135         return self.strRepr
  1571         return self.strRepr
  3136 
  1572 
  3137 
       
  3138 
       
  3139 class White(Token):
  1573 class White(Token):
  3140 
       
  3141     """Special matching class for matching whitespace.  Normally, whitespace is ignored
  1574     """Special matching class for matching whitespace.  Normally, whitespace is ignored
  3142 
       
  3143        by pyparsing grammars.  This class is included when some whitespace structures
  1575        by pyparsing grammars.  This class is included when some whitespace structures
  3144 
       
  3145        are significant.  Define with a string containing the whitespace characters to be
  1576        are significant.  Define with a string containing the whitespace characters to be
  3146 
       
  3147        matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
  1577        matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
  3148 
       
  3149        as defined for the Word class."""
  1578        as defined for the Word class."""
  3150 
       
  3151     whiteStrs = {
  1579     whiteStrs = {
  3152 
       
  3153         " " : "<SPC>",
  1580         " " : "<SPC>",
  3154 
       
  3155         "\t": "<TAB>",
  1581         "\t": "<TAB>",
  3156 
       
  3157         "\n": "<LF>",
  1582         "\n": "<LF>",
  3158 
       
  3159         "\r": "<CR>",
  1583         "\r": "<CR>",
  3160 
       
  3161         "\f": "<FF>",
  1584         "\f": "<FF>",
  3162 
       
  3163         }
  1585         }
  3164 
       
  3165     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  1586     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  3166 
       
  3167         super(White,self).__init__()
  1587         super(White,self).__init__()
  3168 
       
  3169         self.matchWhite = ws
  1588         self.matchWhite = ws
  3170 
       
  3171         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
  1589         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
  3172 
       
  3173         #~ self.leaveWhitespace()
  1590         #~ self.leaveWhitespace()
  3174 
       
  3175         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
  1591         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
  3176 
       
  3177         self.mayReturnEmpty = True
  1592         self.mayReturnEmpty = True
  3178 
       
  3179         self.errmsg = "Expected " + self.name
  1593         self.errmsg = "Expected " + self.name
  3180 
       
  3181         self.myException.msg = self.errmsg
  1594         self.myException.msg = self.errmsg
  3182 
  1595 
  3183 
       
  3184 
       
  3185         self.minLen = min
  1596         self.minLen = min
  3186 
  1597 
  3187 
       
  3188 
       
  3189         if max > 0:
  1598         if max > 0:
  3190 
       
  3191             self.maxLen = max
  1599             self.maxLen = max
  3192 
  1600         else:
  3193         else:
       
  3194 
       
  3195             self.maxLen = sys.maxint
  1601             self.maxLen = sys.maxint
  3196 
  1602 
  3197 
       
  3198 
       
  3199         if exact > 0:
  1603         if exact > 0:
  3200 
       
  3201             self.maxLen = exact
  1604             self.maxLen = exact
  3202 
       
  3203             self.minLen = exact
  1605             self.minLen = exact
  3204 
       
  3205             
  1606             
  3206 
       
  3207     def parseImpl( self, instring, loc, doActions=True ):
  1607     def parseImpl( self, instring, loc, doActions=True ):
  3208 
       
  3209         if not(instring[ loc ] in self.matchWhite):
  1608         if not(instring[ loc ] in self.matchWhite):
  3210 
       
  3211             #~ raise ParseException( instring, loc, self.errmsg )
  1609             #~ raise ParseException( instring, loc, self.errmsg )
  3212 
       
  3213             exc = self.myException
  1610             exc = self.myException
  3214 
       
  3215             exc.loc = loc
  1611             exc.loc = loc
  3216 
       
  3217             exc.pstr = instring
  1612             exc.pstr = instring
  3218 
       
  3219             raise exc
  1613             raise exc
  3220 
       
  3221         start = loc
  1614         start = loc
  3222 
       
  3223         loc += 1
  1615         loc += 1
  3224 
       
  3225         maxloc = start + self.maxLen
  1616         maxloc = start + self.maxLen
  3226 
       
  3227         maxloc = min( maxloc, len(instring) )
  1617         maxloc = min( maxloc, len(instring) )
  3228 
       
  3229         while loc < maxloc and instring[loc] in self.matchWhite:
  1618         while loc < maxloc and instring[loc] in self.matchWhite:
  3230 
       
  3231             loc += 1
  1619             loc += 1
  3232 
  1620 
  3233 
       
  3234 
       
  3235         if loc - start < self.minLen:
  1621         if loc - start < self.minLen:
  3236 
       
  3237             #~ raise ParseException( instring, loc, self.errmsg )
  1622             #~ raise ParseException( instring, loc, self.errmsg )
  3238 
       
  3239             exc = self.myException
  1623             exc = self.myException
  3240 
       
  3241             exc.loc = loc
  1624             exc.loc = loc
  3242 
       
  3243             exc.pstr = instring
  1625             exc.pstr = instring
  3244 
       
  3245             raise exc
  1626             raise exc
  3246 
  1627 
  3247 
       
  3248 
       
  3249         return loc, instring[start:loc]
  1628         return loc, instring[start:loc]
  3250 
  1629 
  3251 
  1630 
  3252 
       
  3253 
       
  3254 
       
  3255 class PositionToken(Token):
  1631 class PositionToken(Token):
  3256 
       
  3257     def __init__( self ):
  1632     def __init__( self ):
  3258 
       
  3259         super(PositionToken,self).__init__()
  1633         super(PositionToken,self).__init__()
  3260 
       
  3261         self.name=self.__class__.__name__
  1634         self.name=self.__class__.__name__
  3262 
       
  3263         self.mayReturnEmpty = True
  1635         self.mayReturnEmpty = True
  3264 
       
  3265         self.mayIndexError = False
  1636         self.mayIndexError = False
  3266 
  1637 
  3267 
       
  3268 
       
  3269 class GoToColumn(PositionToken):
  1638 class GoToColumn(PositionToken):
  3270 
       
  3271     """Token to advance to a specific column of input text; useful for tabular report scraping."""
  1639     """Token to advance to a specific column of input text; useful for tabular report scraping."""
  3272 
       
  3273     def __init__( self, colno ):
  1640     def __init__( self, colno ):
  3274 
       
  3275         super(GoToColumn,self).__init__()
  1641         super(GoToColumn,self).__init__()
  3276 
       
  3277         self.col = colno
  1642         self.col = colno
  3278 
  1643 
  3279 
       
  3280 
       
  3281     def preParse( self, instring, loc ):
  1644     def preParse( self, instring, loc ):
  3282 
       
  3283         if col(loc,instring) != self.col:
  1645         if col(loc,instring) != self.col:
  3284 
       
  3285             instrlen = len(instring)
  1646             instrlen = len(instring)
  3286 
       
  3287             if self.ignoreExprs:
  1647             if self.ignoreExprs:
  3288 
       
  3289                 loc = self.skipIgnorables( instring, loc )
  1648                 loc = self.skipIgnorables( instring, loc )
  3290 
       
  3291             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  1649             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  3292 
       
  3293                 loc += 1
  1650                 loc += 1
  3294 
       
  3295         return loc
  1651         return loc
  3296 
  1652 
  3297 
       
  3298 
       
  3299     def parseImpl( self, instring, loc, doActions=True ):
  1653     def parseImpl( self, instring, loc, doActions=True ):
  3300 
       
  3301         thiscol = col( loc, instring )
  1654         thiscol = col( loc, instring )
  3302 
       
  3303         if thiscol > self.col:
  1655         if thiscol > self.col:
  3304 
       
  3305             raise ParseException( instring, loc, "Text not in expected column", self )
  1656             raise ParseException( instring, loc, "Text not in expected column", self )
  3306 
       
  3307         newloc = loc + self.col - thiscol
  1657         newloc = loc + self.col - thiscol
  3308 
       
  3309         ret = instring[ loc: newloc ]
  1658         ret = instring[ loc: newloc ]
  3310 
       
  3311         return newloc, ret
  1659         return newloc, ret
  3312 
  1660 
  3313 
       
  3314 
       
  3315 class LineStart(PositionToken):
  1661 class LineStart(PositionToken):
  3316 
       
  3317     """Matches if current position is at the beginning of a line within the parse string"""
  1662     """Matches if current position is at the beginning of a line within the parse string"""
  3318 
       
  3319     def __init__( self ):
  1663     def __init__( self ):
  3320 
       
  3321         super(LineStart,self).__init__()
  1664         super(LineStart,self).__init__()
  3322 
       
  3323         self.setWhitespaceChars( " \t" )
  1665         self.setWhitespaceChars( " \t" )
  3324 
       
  3325         self.errmsg = "Expected start of line"
  1666         self.errmsg = "Expected start of line"
  3326 
       
  3327         self.myException.msg = self.errmsg
  1667         self.myException.msg = self.errmsg
  3328 
  1668 
  3329 
       
  3330 
       
  3331     def preParse( self, instring, loc ):
  1669     def preParse( self, instring, loc ):
  3332 
       
  3333         preloc = super(LineStart,self).preParse(instring,loc)
  1670         preloc = super(LineStart,self).preParse(instring,loc)
  3334 
       
  3335         if instring[preloc] == "\n":
  1671         if instring[preloc] == "\n":
  3336 
       
  3337             loc += 1
  1672             loc += 1
  3338 
       
  3339         return loc
  1673         return loc
  3340 
  1674 
  3341 
       
  3342 
       
  3343     def parseImpl( self, instring, loc, doActions=True ):
  1675     def parseImpl( self, instring, loc, doActions=True ):
  3344 
       
  3345         if not( loc==0 or
  1676         if not( loc==0 or
  3346 
       
  3347             (loc == self.preParse( instring, 0 )) or
  1677             (loc == self.preParse( instring, 0 )) or
  3348 
       
  3349             (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
  1678             (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
  3350 
       
  3351             #~ raise ParseException( instring, loc, "Expected start of line" )
  1679             #~ raise ParseException( instring, loc, "Expected start of line" )
  3352 
       
  3353             exc = self.myException
  1680             exc = self.myException
  3354 
       
  3355             exc.loc = loc
  1681             exc.loc = loc
  3356 
       
  3357             exc.pstr = instring
  1682             exc.pstr = instring
  3358 
       
  3359             raise exc
  1683             raise exc
  3360 
       
  3361         return loc, []
  1684         return loc, []
  3362 
  1685 
  3363 
       
  3364 
       
  3365 class LineEnd(PositionToken):
  1686 class LineEnd(PositionToken):
  3366 
       
  3367     """Matches if current position is at the end of a line within the parse string"""
  1687     """Matches if current position is at the end of a line within the parse string"""
  3368 
       
  3369     def __init__( self ):
  1688     def __init__( self ):
  3370 
       
  3371         super(LineEnd,self).__init__()
  1689         super(LineEnd,self).__init__()
  3372 
       
  3373         self.setWhitespaceChars( " \t" )
  1690         self.setWhitespaceChars( " \t" )
  3374 
       
  3375         self.errmsg = "Expected end of line"
  1691         self.errmsg = "Expected end of line"
  3376 
       
  3377         self.myException.msg = self.errmsg
  1692         self.myException.msg = self.errmsg
  3378 
  1693     
  3379     
       
  3380 
       
  3381     def parseImpl( self, instring, loc, doActions=True ):
  1694     def parseImpl( self, instring, loc, doActions=True ):
  3382 
       
  3383         if loc<len(instring):
  1695         if loc<len(instring):
  3384 
       
  3385             if instring[loc] == "\n":
  1696             if instring[loc] == "\n":
  3386 
       
  3387                 return loc+1, "\n"
  1697                 return loc+1, "\n"
  3388 
       
  3389             else:
  1698             else:
  3390 
       
  3391                 #~ raise ParseException( instring, loc, "Expected end of line" )
  1699                 #~ raise ParseException( instring, loc, "Expected end of line" )
  3392 
       
  3393                 exc = self.myException
  1700                 exc = self.myException
  3394 
       
  3395                 exc.loc = loc
  1701                 exc.loc = loc
  3396 
       
  3397                 exc.pstr = instring
  1702                 exc.pstr = instring
  3398 
       
  3399                 raise exc
  1703                 raise exc
  3400 
       
  3401         elif loc == len(instring):
  1704         elif loc == len(instring):
  3402 
       
  3403             return loc+1, []
  1705             return loc+1, []
  3404 
  1706         else:
  3405         else:
       
  3406 
       
  3407             exc = self.myException
  1707             exc = self.myException
  3408 
       
  3409             exc.loc = loc
  1708             exc.loc = loc
  3410 
       
  3411             exc.pstr = instring
  1709             exc.pstr = instring
  3412 
       
  3413             raise exc
  1710             raise exc
  3414 
  1711 
  3415 
       
  3416 
       
  3417 class StringStart(PositionToken):
  1712 class StringStart(PositionToken):
  3418 
       
  3419     """Matches if current position is at the beginning of the parse string"""
  1713     """Matches if current position is at the beginning of the parse string"""
  3420 
       
  3421     def __init__( self ):
  1714     def __init__( self ):
  3422 
       
  3423         super(StringStart,self).__init__()
  1715         super(StringStart,self).__init__()
  3424 
       
  3425         self.errmsg = "Expected start of text"
  1716         self.errmsg = "Expected start of text"
  3426 
       
  3427         self.myException.msg = self.errmsg
  1717         self.myException.msg = self.errmsg
  3428 
  1718     
  3429     
       
  3430 
       
  3431     def parseImpl( self, instring, loc, doActions=True ):
  1719     def parseImpl( self, instring, loc, doActions=True ):
  3432 
       
  3433         if loc != 0:
  1720         if loc != 0:
  3434 
       
  3435             # see if entire string up to here is just whitespace and ignoreables
  1721             # see if entire string up to here is just whitespace and ignoreables
  3436 
       
  3437             if loc != self.preParse( instring, 0 ):
  1722             if loc != self.preParse( instring, 0 ):
  3438 
       
  3439                 #~ raise ParseException( instring, loc, "Expected start of text" )
  1723                 #~ raise ParseException( instring, loc, "Expected start of text" )
  3440 
       
  3441                 exc = self.myException
  1724                 exc = self.myException
  3442 
       
  3443                 exc.loc = loc
  1725                 exc.loc = loc
  3444 
       
  3445                 exc.pstr = instring
  1726                 exc.pstr = instring
  3446 
       
  3447                 raise exc
  1727                 raise exc
  3448 
       
  3449         return loc, []
  1728         return loc, []
  3450 
  1729 
  3451 
       
  3452 
       
  3453 class StringEnd(PositionToken):
  1730 class StringEnd(PositionToken):
  3454 
       
  3455     """Matches if current position is at the end of the parse string"""
  1731     """Matches if current position is at the end of the parse string"""
  3456 
       
  3457     def __init__( self ):
  1732     def __init__( self ):
  3458 
       
  3459         super(StringEnd,self).__init__()
  1733         super(StringEnd,self).__init__()
  3460 
       
  3461         self.errmsg = "Expected end of text"
  1734         self.errmsg = "Expected end of text"
  3462 
       
  3463         self.myException.msg = self.errmsg
  1735         self.myException.msg = self.errmsg
  3464 
  1736     
  3465     
       
  3466 
       
  3467     def parseImpl( self, instring, loc, doActions=True ):
  1737     def parseImpl( self, instring, loc, doActions=True ):
  3468 
       
  3469         if loc < len(instring):
  1738         if loc < len(instring):
  3470 
       
  3471             #~ raise ParseException( instring, loc, "Expected end of text" )
  1739             #~ raise ParseException( instring, loc, "Expected end of text" )
  3472 
       
  3473             exc = self.myException
  1740             exc = self.myException
  3474 
       
  3475             exc.loc = loc
  1741             exc.loc = loc
  3476 
       
  3477             exc.pstr = instring
  1742             exc.pstr = instring
  3478 
       
  3479             raise exc
  1743             raise exc
  3480 
       
  3481         elif loc == len(instring):
  1744         elif loc == len(instring):
  3482 
       
  3483             return loc+1, []
  1745             return loc+1, []
  3484 
  1746         else:
  3485         else:
       
  3486 
       
  3487             exc = self.myException
  1747             exc = self.myException
  3488 
       
  3489             exc.loc = loc
  1748             exc.loc = loc
  3490 
       
  3491             exc.pstr = instring
  1749             exc.pstr = instring
  3492 
       
  3493             raise exc
  1750             raise exc
  3494 
  1751 
  3495 
  1752 
  3496 
       
  3497 
       
  3498 
       
  3499 class ParseExpression(ParserElement):
  1753 class ParseExpression(ParserElement):
  3500 
       
  3501     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
  1754     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
  3502 
       
  3503     def __init__( self, exprs, savelist = False ):
  1755     def __init__( self, exprs, savelist = False ):
  3504 
       
  3505         super(ParseExpression,self).__init__(savelist)
  1756         super(ParseExpression,self).__init__(savelist)
  3506 
       
  3507         if isinstance( exprs, list ):
  1757         if isinstance( exprs, list ):
  3508 
       
  3509             self.exprs = exprs
  1758             self.exprs = exprs
  3510 
       
  3511         elif isinstance( exprs, basestring ):
  1759         elif isinstance( exprs, basestring ):
  3512 
       
  3513             self.exprs = [ Literal( exprs ) ]
  1760             self.exprs = [ Literal( exprs ) ]
  3514 
  1761         else:
  3515         else:
       
  3516 
       
  3517             self.exprs = [ exprs ]
  1762             self.exprs = [ exprs ]
  3518 
  1763 
  3519 
       
  3520 
       
  3521     def __getitem__( self, i ):
  1764     def __getitem__( self, i ):
  3522 
       
  3523         return self.exprs[i]
  1765         return self.exprs[i]
  3524 
  1766 
  3525 
       
  3526 
       
  3527     def append( self, other ):
  1767     def append( self, other ):
  3528 
       
  3529         self.exprs.append( other )
  1768         self.exprs.append( other )
  3530 
       
  3531         self.strRepr = None
  1769         self.strRepr = None
  3532 
       
  3533         return self
  1770         return self
  3534 
  1771 
  3535 
       
  3536 
       
  3537     def leaveWhitespace( self ):
  1772     def leaveWhitespace( self ):
  3538 
       
  3539         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
  1773         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
  3540 
       
  3541            all contained expressions."""
  1774            all contained expressions."""
  3542 
       
  3543         self.skipWhitespace = False
  1775         self.skipWhitespace = False
  3544 
       
  3545         self.exprs = [ e.copy() for e in self.exprs ]
  1776         self.exprs = [ e.copy() for e in self.exprs ]
  3546 
       
  3547         for e in self.exprs:
  1777         for e in self.exprs:
  3548 
       
  3549             e.leaveWhitespace()
  1778             e.leaveWhitespace()
  3550 
       
  3551         return self
  1779         return self
  3552 
  1780 
  3553 
       
  3554 
       
  3555     def ignore( self, other ):
  1781     def ignore( self, other ):
  3556 
       
  3557         if isinstance( other, Suppress ):
  1782         if isinstance( other, Suppress ):
  3558 
       
  3559             if other not in self.ignoreExprs:
  1783             if other not in self.ignoreExprs:
  3560 
       
  3561                 super( ParseExpression, self).ignore( other )
  1784                 super( ParseExpression, self).ignore( other )
  3562 
       
  3563                 for e in self.exprs:
  1785                 for e in self.exprs:
  3564 
       
  3565                     e.ignore( self.ignoreExprs[-1] )
  1786                     e.ignore( self.ignoreExprs[-1] )
  3566 
  1787         else:
  3567         else:
       
  3568 
       
  3569             super( ParseExpression, self).ignore( other )
  1788             super( ParseExpression, self).ignore( other )
  3570 
       
  3571             for e in self.exprs:
  1789             for e in self.exprs:
  3572 
       
  3573                 e.ignore( self.ignoreExprs[-1] )
  1790                 e.ignore( self.ignoreExprs[-1] )
  3574 
       
  3575         return self
  1791         return self
  3576 
  1792 
  3577 
       
  3578 
       
  3579     def __str__( self ):
  1793     def __str__( self ):
  3580 
       
  3581         try:
  1794         try:
  3582 
       
  3583             return super(ParseExpression,self).__str__()
  1795             return super(ParseExpression,self).__str__()
  3584 
       
  3585         except:
  1796         except:
  3586 
       
  3587             pass
  1797             pass
  3588 
       
  3589             
  1798             
  3590 
       
  3591         if self.strRepr is None:
  1799         if self.strRepr is None:
  3592 
       
  3593             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  1800             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  3594 
       
  3595         return self.strRepr
  1801         return self.strRepr
  3596 
  1802 
  3597 
       
  3598 
       
  3599     def streamline( self ):
  1803     def streamline( self ):
  3600 
       
  3601         super(ParseExpression,self).streamline()
  1804         super(ParseExpression,self).streamline()
  3602 
  1805 
  3603 
       
  3604 
       
  3605         for e in self.exprs:
  1806         for e in self.exprs:
  3606 
       
  3607             e.streamline()
  1807             e.streamline()
  3608 
  1808 
  3609 
       
  3610 
       
  3611         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  1809         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  3612 
       
  3613         # but only if there are no parse actions or resultsNames on the nested And's
  1810         # but only if there are no parse actions or resultsNames on the nested And's
  3614 
       
  3615         # (likewise for Or's and MatchFirst's)
  1811         # (likewise for Or's and MatchFirst's)
  3616 
       
  3617         if ( len(self.exprs) == 2 ):
  1812         if ( len(self.exprs) == 2 ):
  3618 
       
  3619             other = self.exprs[0]
  1813             other = self.exprs[0]
  3620 
       
  3621             if ( isinstance( other, self.__class__ ) and
  1814             if ( isinstance( other, self.__class__ ) and
  3622 
       
  3623                   not(other.parseAction) and
  1815                   not(other.parseAction) and
  3624 
       
  3625                   other.resultsName is None and
  1816                   other.resultsName is None and
  3626 
       
  3627                   not other.debug ):
  1817                   not other.debug ):
  3628 
       
  3629                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
  1818                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
  3630 
       
  3631                 self.strRepr = None
  1819                 self.strRepr = None
  3632 
       
  3633                 self.mayReturnEmpty |= other.mayReturnEmpty
  1820                 self.mayReturnEmpty |= other.mayReturnEmpty
  3634 
       
  3635                 self.mayIndexError  |= other.mayIndexError
  1821                 self.mayIndexError  |= other.mayIndexError
  3636 
  1822 
  3637 
       
  3638 
       
  3639             other = self.exprs[-1]
  1823             other = self.exprs[-1]
  3640 
       
  3641             if ( isinstance( other, self.__class__ ) and
  1824             if ( isinstance( other, self.__class__ ) and
  3642 
       
  3643                   not(other.parseAction) and
  1825                   not(other.parseAction) and
  3644 
       
  3645                   other.resultsName is None and
  1826                   other.resultsName is None and
  3646 
       
  3647                   not other.debug ):
  1827                   not other.debug ):
  3648 
       
  3649                 self.exprs = self.exprs[:-1] + other.exprs[:]
  1828                 self.exprs = self.exprs[:-1] + other.exprs[:]
  3650 
       
  3651                 self.strRepr = None
  1829                 self.strRepr = None
  3652 
       
  3653                 self.mayReturnEmpty |= other.mayReturnEmpty
  1830                 self.mayReturnEmpty |= other.mayReturnEmpty
  3654 
       
  3655                 self.mayIndexError  |= other.mayIndexError
  1831                 self.mayIndexError  |= other.mayIndexError
  3656 
  1832 
  3657 
       
  3658 
       
  3659         return self
  1833         return self
  3660 
  1834 
  3661 
       
  3662 
       
  3663     def setResultsName( self, name, listAllMatches=False ):
  1835     def setResultsName( self, name, listAllMatches=False ):
  3664 
       
  3665         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  1836         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  3666 
       
  3667         return ret
  1837         return ret
  3668 
  1838     
  3669     
       
  3670 
       
  3671     def validate( self, validateTrace=[] ):
  1839     def validate( self, validateTrace=[] ):
  3672 
       
  3673         tmp = validateTrace[:]+[self]
  1840         tmp = validateTrace[:]+[self]
  3674 
       
  3675         for e in self.exprs:
  1841         for e in self.exprs:
  3676 
       
  3677             e.validate(tmp)
  1842             e.validate(tmp)
  3678 
       
  3679         self.checkRecursion( [] )
  1843         self.checkRecursion( [] )
  3680 
  1844 
  3681 
       
  3682 
       
  3683 class And(ParseExpression):
  1845 class And(ParseExpression):
  3684 
       
  3685     """Requires all given ParseExpressions to be found in the given order.
  1846     """Requires all given ParseExpressions to be found in the given order.
  3686 
       
  3687        Expressions may be separated by whitespace.
  1847        Expressions may be separated by whitespace.
  3688 
       
  3689        May be constructed using the '+' operator.
  1848        May be constructed using the '+' operator.
  3690 
       
  3691     """
  1849     """
  3692 
       
  3693     def __init__( self, exprs, savelist = True ):
  1850     def __init__( self, exprs, savelist = True ):
  3694 
       
  3695         super(And,self).__init__(exprs, savelist)
  1851         super(And,self).__init__(exprs, savelist)
  3696 
       
  3697         self.mayReturnEmpty = True
  1852         self.mayReturnEmpty = True
  3698 
       
  3699         for e in self.exprs:
  1853         for e in self.exprs:
  3700 
       
  3701             if not e.mayReturnEmpty:
  1854             if not e.mayReturnEmpty:
  3702 
       
  3703                 self.mayReturnEmpty = False
  1855                 self.mayReturnEmpty = False
  3704 
       
  3705                 break
  1856                 break
  3706 
       
  3707         self.setWhitespaceChars( exprs[0].whiteChars )
  1857         self.setWhitespaceChars( exprs[0].whiteChars )
  3708 
       
  3709         self.skipWhitespace = exprs[0].skipWhitespace
  1858         self.skipWhitespace = exprs[0].skipWhitespace
  3710 
  1859 
  3711 
       
  3712 
       
  3713     def parseImpl( self, instring, loc, doActions=True ):
  1860     def parseImpl( self, instring, loc, doActions=True ):
  3714 
       
  3715         # pass False as last arg to _parse for first element, since we already
  1861         # pass False as last arg to _parse for first element, since we already
  3716 
       
  3717         # pre-parsed the string as part of our And pre-parsing
  1862         # pre-parsed the string as part of our And pre-parsing
  3718 
       
  3719         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  1863         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  3720 
       
  3721         for e in self.exprs[1:]:
  1864         for e in self.exprs[1:]:
  3722 
       
  3723             loc, exprtokens = e._parse( instring, loc, doActions )
  1865             loc, exprtokens = e._parse( instring, loc, doActions )
  3724 
       
  3725             if exprtokens or exprtokens.keys():
  1866             if exprtokens or exprtokens.keys():
  3726 
       
  3727                 resultlist += exprtokens
  1867                 resultlist += exprtokens
  3728 
       
  3729         return loc, resultlist
  1868         return loc, resultlist
  3730 
  1869 
  3731 
       
  3732 
       
  3733     def __iadd__(self, other ):
  1870     def __iadd__(self, other ):
  3734 
       
  3735         if isinstance( other, basestring ):
  1871         if isinstance( other, basestring ):
  3736 
       
  3737             other = Literal( other )
  1872             other = Literal( other )
  3738 
       
  3739         return self.append( other ) #And( [ self, other ] )
  1873         return self.append( other ) #And( [ self, other ] )
  3740 
  1874         
  3741         
       
  3742 
       
  3743     def checkRecursion( self, parseElementList ):
  1875     def checkRecursion( self, parseElementList ):
  3744 
       
  3745         subRecCheckList = parseElementList[:] + [ self ]
  1876         subRecCheckList = parseElementList[:] + [ self ]
  3746 
       
  3747         for e in self.exprs:
  1877         for e in self.exprs:
  3748 
       
  3749             e.checkRecursion( subRecCheckList )
  1878             e.checkRecursion( subRecCheckList )
  3750 
       
  3751             if not e.mayReturnEmpty:
  1879             if not e.mayReturnEmpty:
  3752 
       
  3753                 break
  1880                 break
  3754 
       
  3755                 
  1881                 
  3756 
       
  3757     def __str__( self ):
  1882     def __str__( self ):
  3758 
       
  3759         if hasattr(self,"name"):
  1883         if hasattr(self,"name"):
  3760 
       
  3761             return self.name
  1884             return self.name
  3762 
       
  3763             
  1885             
  3764 
       
  3765         if self.strRepr is None:
  1886         if self.strRepr is None:
  3766 
       
  3767             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  1887             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  3768 
  1888         
  3769         
       
  3770 
       
  3771         return self.strRepr
  1889         return self.strRepr
  3772 
  1890     
  3773     
       
  3774 
       
  3775 
       
  3776 
  1891 
  3777 class Or(ParseExpression):
  1892 class Or(ParseExpression):
  3778 
       
  3779     """Requires that at least one ParseExpression is found.
  1893     """Requires that at least one ParseExpression is found.
  3780 
       
  3781        If two expressions match, the expression that matches the longest string will be used.
  1894        If two expressions match, the expression that matches the longest string will be used.
  3782 
       
  3783        May be constructed using the '^' operator.
  1895        May be constructed using the '^' operator.
  3784 
       
  3785     """
  1896     """
  3786 
       
  3787     def __init__( self, exprs, savelist = False ):
  1897     def __init__( self, exprs, savelist = False ):
  3788 
       
  3789         super(Or,self).__init__(exprs, savelist)
  1898         super(Or,self).__init__(exprs, savelist)
  3790 
       
  3791         self.mayReturnEmpty = False
  1899         self.mayReturnEmpty = False
  3792 
       
  3793         for e in self.exprs:
  1900         for e in self.exprs:
  3794 
       
  3795             if e.mayReturnEmpty:
  1901             if e.mayReturnEmpty:
  3796 
       
  3797                 self.mayReturnEmpty = True
  1902                 self.mayReturnEmpty = True
  3798 
       
  3799                 break
  1903                 break
  3800 
  1904     
  3801     
       
  3802 
       
  3803     def parseImpl( self, instring, loc, doActions=True ):
  1905     def parseImpl( self, instring, loc, doActions=True ):
  3804 
       
  3805         maxExcLoc = -1
  1906         maxExcLoc = -1
  3806 
       
  3807         maxMatchLoc = -1
  1907         maxMatchLoc = -1
  3808 
       
  3809         for e in self.exprs:
  1908         for e in self.exprs:
  3810 
       
  3811             try:
  1909             try:
  3812 
       
  3813                 loc2 = e.tryParse( instring, loc )
  1910                 loc2 = e.tryParse( instring, loc )
  3814 
       
  3815             except ParseException, err:
  1911             except ParseException, err:
  3816 
       
  3817                 if err.loc > maxExcLoc:
  1912                 if err.loc > maxExcLoc:
  3818 
       
  3819                     maxException = err
  1913                     maxException = err
  3820 
       
  3821                     maxExcLoc = err.loc
  1914                     maxExcLoc = err.loc
  3822 
       
  3823             except IndexError, err:
  1915             except IndexError, err:
  3824 
       
  3825                 if len(instring) > maxExcLoc:
  1916                 if len(instring) > maxExcLoc:
  3826 
       
  3827                     maxException = ParseException(instring,len(instring),e.errmsg,self)
  1917                     maxException = ParseException(instring,len(instring),e.errmsg,self)
  3828 
       
  3829                     maxExcLoc = len(instring)
  1918                     maxExcLoc = len(instring)
  3830 
       
  3831             else:
  1919             else:
  3832 
       
  3833                 if loc2 > maxMatchLoc:
  1920                 if loc2 > maxMatchLoc:
  3834 
       
  3835                     maxMatchLoc = loc2
  1921                     maxMatchLoc = loc2
  3836 
       
  3837                     maxMatchExp = e
  1922                     maxMatchExp = e
  3838 
  1923         
  3839         
       
  3840 
       
  3841         if maxMatchLoc < 0:
  1924         if maxMatchLoc < 0:
  3842 
       
  3843             if self.exprs:
  1925             if self.exprs:
  3844 
       
  3845                 raise maxException
  1926                 raise maxException
  3846 
       
  3847             else:
  1927             else:
  3848 
       
  3849                 raise ParseException(instring, loc, "no defined alternatives to match", self)
  1928                 raise ParseException(instring, loc, "no defined alternatives to match", self)
  3850 
  1929 
  3851 
       
  3852 
       
  3853         return maxMatchExp._parse( instring, loc, doActions )
  1930         return maxMatchExp._parse( instring, loc, doActions )
  3854 
  1931 
  3855 
       
  3856 
       
  3857     def __ixor__(self, other ):
  1932     def __ixor__(self, other ):
  3858 
       
  3859         if isinstance( other, basestring ):
  1933         if isinstance( other, basestring ):
  3860 
       
  3861             other = Literal( other )
  1934             other = Literal( other )
  3862 
       
  3863         return self.append( other ) #Or( [ self, other ] )
  1935         return self.append( other ) #Or( [ self, other ] )
  3864 
  1936 
  3865 
       
  3866 
       
  3867     def __str__( self ):
  1937     def __str__( self ):
  3868 
       
  3869         if hasattr(self,"name"):
  1938         if hasattr(self,"name"):
  3870 
       
  3871             return self.name
  1939             return self.name
  3872 
       
  3873             
  1940             
  3874 
       
  3875         if self.strRepr is None:
  1941         if self.strRepr is None:
  3876 
       
  3877             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  1942             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  3878 
  1943         
  3879         
       
  3880 
       
  3881         return self.strRepr
  1944         return self.strRepr
  3882 
  1945     
  3883     
       
  3884 
       
  3885     def checkRecursion( self, parseElementList ):
  1946     def checkRecursion( self, parseElementList ):
  3886 
       
  3887         subRecCheckList = parseElementList[:] + [ self ]
  1947         subRecCheckList = parseElementList[:] + [ self ]
  3888 
       
  3889         for e in self.exprs:
  1948         for e in self.exprs:
  3890 
       
  3891             e.checkRecursion( subRecCheckList )
  1949             e.checkRecursion( subRecCheckList )
  3892 
  1950 
  3893 
  1951 
  3894 
       
  3895 
       
  3896 
       
  3897 class MatchFirst(ParseExpression):
  1952 class MatchFirst(ParseExpression):
  3898 
       
  3899     """Requires that at least one ParseExpression is found.
  1953     """Requires that at least one ParseExpression is found.
  3900 
       
  3901        If two expressions match, the first one listed is the one that will match.
  1954        If two expressions match, the first one listed is the one that will match.
  3902 
       
  3903        May be constructed using the '|' operator.
  1955        May be constructed using the '|' operator.
  3904 
       
  3905     """
  1956     """
  3906 
       
  3907     def __init__( self, exprs, savelist = False ):
  1957     def __init__( self, exprs, savelist = False ):
  3908 
       
  3909         super(MatchFirst,self).__init__(exprs, savelist)
  1958         super(MatchFirst,self).__init__(exprs, savelist)
  3910 
       
  3911         if exprs:
  1959         if exprs:
  3912 
       
  3913             self.mayReturnEmpty = False
  1960             self.mayReturnEmpty = False
  3914 
       
  3915             for e in self.exprs:
  1961             for e in self.exprs:
  3916 
       
  3917                 if e.mayReturnEmpty:
  1962                 if e.mayReturnEmpty:
  3918 
       
  3919                     self.mayReturnEmpty = True
  1963                     self.mayReturnEmpty = True
  3920 
       
  3921                     break
  1964                     break
  3922 
  1965         else:
  3923         else:
       
  3924 
       
  3925             self.mayReturnEmpty = True
  1966             self.mayReturnEmpty = True
  3926 
  1967     
  3927     
       
  3928 
       
  3929     def parseImpl( self, instring, loc, doActions=True ):
  1968     def parseImpl( self, instring, loc, doActions=True ):
  3930 
       
  3931         maxExcLoc = -1
  1969         maxExcLoc = -1
  3932 
       
  3933         for e in self.exprs:
  1970         for e in self.exprs:
  3934 
       
  3935             try:
  1971             try:
  3936 
       
  3937                 ret = e._parse( instring, loc, doActions )
  1972                 ret = e._parse( instring, loc, doActions )
  3938 
       
  3939                 return ret
  1973                 return ret
  3940 
       
  3941             except ParseException, err:
  1974             except ParseException, err:
  3942 
       
  3943                 if err.loc > maxExcLoc:
  1975                 if err.loc > maxExcLoc:
  3944 
       
  3945                     maxException = err
  1976                     maxException = err
  3946 
       
  3947                     maxExcLoc = err.loc
  1977                     maxExcLoc = err.loc
  3948 
       
  3949             except IndexError, err:
  1978             except IndexError, err:
  3950 
       
  3951                 if len(instring) > maxExcLoc:
  1979                 if len(instring) > maxExcLoc:
  3952 
       
  3953                     maxException = ParseException(instring,len(instring),e.errmsg,self)
  1980                     maxException = ParseException(instring,len(instring),e.errmsg,self)
  3954 
       
  3955                     maxExcLoc = len(instring)
  1981                     maxExcLoc = len(instring)
  3956 
  1982 
  3957 
       
  3958 
       
  3959         # only got here if no expression matched, raise exception for match that made it the furthest
  1983         # only got here if no expression matched, raise exception for match that made it the furthest
  3960 
  1984         else:
  3961         else:
       
  3962 
       
  3963             if self.exprs:
  1985             if self.exprs:
  3964 
       
  3965                 raise maxException
  1986                 raise maxException
  3966 
       
  3967             else:
  1987             else:
  3968 
       
  3969                 raise ParseException(instring, loc, "no defined alternatives to match", self)
  1988                 raise ParseException(instring, loc, "no defined alternatives to match", self)
  3970 
  1989 
  3971 
       
  3972 
       
  3973     def __ior__(self, other ):
  1990     def __ior__(self, other ):
  3974 
       
  3975         if isinstance( other, basestring ):
  1991         if isinstance( other, basestring ):
  3976 
       
  3977             other = Literal( other )
  1992             other = Literal( other )
  3978 
       
  3979         return self.append( other ) #MatchFirst( [ self, other ] )
  1993         return self.append( other ) #MatchFirst( [ self, other ] )
  3980 
  1994 
  3981 
       
  3982 
       
  3983     def __str__( self ):
  1995     def __str__( self ):
  3984 
       
  3985         if hasattr(self,"name"):
  1996         if hasattr(self,"name"):
  3986 
       
  3987             return self.name
  1997             return self.name
  3988 
       
  3989             
  1998             
  3990 
       
  3991         if self.strRepr is None:
  1999         if self.strRepr is None:
  3992 
       
  3993             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  2000             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  3994 
  2001         
  3995         
       
  3996 
       
  3997         return self.strRepr
  2002         return self.strRepr
  3998 
  2003     
  3999     
       
  4000 
       
  4001     def checkRecursion( self, parseElementList ):
  2004     def checkRecursion( self, parseElementList ):
  4002 
       
  4003         subRecCheckList = parseElementList[:] + [ self ]
  2005         subRecCheckList = parseElementList[:] + [ self ]
  4004 
       
  4005         for e in self.exprs:
  2006         for e in self.exprs:
  4006 
       
  4007             e.checkRecursion( subRecCheckList )
  2007             e.checkRecursion( subRecCheckList )
  4008 
  2008 
  4009 
       
  4010 
       
  4011 class Each(ParseExpression):
  2009 class Each(ParseExpression):
  4012 
       
  4013     """Requires all given ParseExpressions to be found, but in any order.
  2010     """Requires all given ParseExpressions to be found, but in any order.
  4014 
       
  4015        Expressions may be separated by whitespace.
  2011        Expressions may be separated by whitespace.
  4016 
       
  4017        May be constructed using the '&' operator.
  2012        May be constructed using the '&' operator.
  4018 
       
  4019     """
  2013     """
  4020 
       
  4021     def __init__( self, exprs, savelist = True ):
  2014     def __init__( self, exprs, savelist = True ):
  4022 
       
  4023         super(Each,self).__init__(exprs, savelist)
  2015         super(Each,self).__init__(exprs, savelist)
  4024 
       
  4025         self.mayReturnEmpty = True
  2016         self.mayReturnEmpty = True
  4026 
       
  4027         for e in self.exprs:
  2017         for e in self.exprs:
  4028 
       
  4029             if not e.mayReturnEmpty:
  2018             if not e.mayReturnEmpty:
  4030 
       
  4031                 self.mayReturnEmpty = False
  2019                 self.mayReturnEmpty = False
  4032 
       
  4033                 break
  2020                 break
  4034 
       
  4035         self.skipWhitespace = True
  2021         self.skipWhitespace = True
  4036 
       
  4037         self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
  2022         self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
  4038 
       
  4039         self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
  2023         self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
  4040 
       
  4041         self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
  2024         self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
  4042 
       
  4043         self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  2025         self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  4044 
       
  4045         self.required += self.multirequired
  2026         self.required += self.multirequired
  4046 
  2027 
  4047 
       
  4048 
       
  4049     def parseImpl( self, instring, loc, doActions=True ):
  2028     def parseImpl( self, instring, loc, doActions=True ):
  4050 
       
  4051         tmpLoc = loc
  2029         tmpLoc = loc
  4052 
       
  4053         tmpReqd = self.required[:]
  2030         tmpReqd = self.required[:]
  4054 
       
  4055         tmpOpt  = self.optionals[:]
  2031         tmpOpt  = self.optionals[:]
  4056 
       
  4057         matchOrder = []
  2032         matchOrder = []
  4058 
  2033 
  4059 
       
  4060 
       
  4061         keepMatching = True
  2034         keepMatching = True
  4062 
       
  4063         while keepMatching:
  2035         while keepMatching:
  4064 
       
  4065             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  2036             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  4066 
       
  4067             failed = []
  2037             failed = []
  4068 
       
  4069             for e in tmpExprs:
  2038             for e in tmpExprs:
  4070 
       
  4071                 try:
  2039                 try:
  4072 
       
  4073                     tmpLoc = e.tryParse( instring, tmpLoc )
  2040                     tmpLoc = e.tryParse( instring, tmpLoc )
  4074 
       
  4075                 except ParseException:
  2041                 except ParseException:
  4076 
       
  4077                     failed.append(e)
  2042                     failed.append(e)
  4078 
       
  4079                 else:
  2043                 else:
  4080 
       
  4081                     matchOrder.append(e)
  2044                     matchOrder.append(e)
  4082 
       
  4083                     if e in tmpReqd:
  2045                     if e in tmpReqd:
  4084 
       
  4085                         tmpReqd.remove(e)
  2046                         tmpReqd.remove(e)
  4086 
       
  4087                     elif e in tmpOpt:
  2047                     elif e in tmpOpt:
  4088 
       
  4089                         tmpOpt.remove(e)
  2048                         tmpOpt.remove(e)
  4090 
       
  4091             if len(failed) == len(tmpExprs):
  2049             if len(failed) == len(tmpExprs):
  4092 
       
  4093                 keepMatching = False
  2050                 keepMatching = False
  4094 
  2051         
  4095         
       
  4096 
       
  4097         if tmpReqd:
  2052         if tmpReqd:
  4098 
       
  4099             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
  2053             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
  4100 
       
  4101             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  2054             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  4102 
  2055 
  4103 
       
  4104 
       
  4105         resultlist = []
  2056         resultlist = []
  4106 
       
  4107         for e in matchOrder:
  2057         for e in matchOrder:
  4108 
       
  4109             loc,results = e._parse(instring,loc,doActions)
  2058             loc,results = e._parse(instring,loc,doActions)
  4110 
       
  4111             resultlist.append(results)
  2059             resultlist.append(results)
  4112 
       
  4113             
  2060             
  4114 
       
  4115         finalResults = ParseResults([])
  2061         finalResults = ParseResults([])
  4116 
       
  4117         for r in resultlist:
  2062         for r in resultlist:
  4118 
       
  4119             dups = {}
  2063             dups = {}
  4120 
       
  4121             for k in r.keys():
  2064             for k in r.keys():
  4122 
       
  4123                 if k in finalResults.keys():
  2065                 if k in finalResults.keys():
  4124 
       
  4125                     tmp = ParseResults(finalResults[k])
  2066                     tmp = ParseResults(finalResults[k])
  4126 
       
  4127                     tmp += ParseResults(r[k])
  2067                     tmp += ParseResults(r[k])
  4128 
       
  4129                     dups[k] = tmp
  2068                     dups[k] = tmp
  4130 
       
  4131             finalResults += ParseResults(r)
  2069             finalResults += ParseResults(r)
  4132 
       
  4133             for k,v in dups.items():
  2070             for k,v in dups.items():
  4134 
       
  4135                 finalResults[k] = v
  2071                 finalResults[k] = v
  4136 
       
  4137         return loc, finalResults
  2072         return loc, finalResults
  4138 
  2073 
  4139 
       
  4140 
       
  4141     def __str__( self ):
  2074     def __str__( self ):
  4142 
       
  4143         if hasattr(self,"name"):
  2075         if hasattr(self,"name"):
  4144 
       
  4145             return self.name
  2076             return self.name
  4146 
       
  4147             
  2077             
  4148 
       
  4149         if self.strRepr is None:
  2078         if self.strRepr is None:
  4150 
       
  4151             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  2079             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
  4152 
  2080         
  4153         
       
  4154 
       
  4155         return self.strRepr
  2081         return self.strRepr
  4156 
  2082     
  4157     
       
  4158 
       
  4159     def checkRecursion( self, parseElementList ):
  2083     def checkRecursion( self, parseElementList ):
  4160 
       
  4161         subRecCheckList = parseElementList[:] + [ self ]
  2084         subRecCheckList = parseElementList[:] + [ self ]
  4162 
       
  4163         for e in self.exprs:
  2085         for e in self.exprs:
  4164 
       
  4165             e.checkRecursion( subRecCheckList )
  2086             e.checkRecursion( subRecCheckList )
  4166 
  2087 
  4167 
  2088 
  4168 
       
  4169 
       
  4170 
       
  4171 class ParseElementEnhance(ParserElement):
  2089 class ParseElementEnhance(ParserElement):
  4172 
       
  4173     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
  2090     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
  4174 
       
  4175     def __init__( self, expr, savelist=False ):
  2091     def __init__( self, expr, savelist=False ):
  4176 
       
  4177         super(ParseElementEnhance,self).__init__(savelist)
  2092         super(ParseElementEnhance,self).__init__(savelist)
  4178 
       
  4179         if isinstance( expr, basestring ):
  2093         if isinstance( expr, basestring ):
  4180 
       
  4181             expr = Literal(expr)
  2094             expr = Literal(expr)
  4182 
       
  4183         self.expr = expr
  2095         self.expr = expr
  4184 
       
  4185         self.strRepr = None
  2096         self.strRepr = None
  4186 
       
  4187         if expr is not None:
  2097         if expr is not None:
  4188 
       
  4189             self.mayIndexError = expr.mayIndexError
  2098             self.mayIndexError = expr.mayIndexError
  4190 
       
  4191             self.setWhitespaceChars( expr.whiteChars )
  2099             self.setWhitespaceChars( expr.whiteChars )
  4192 
       
  4193             self.skipWhitespace = expr.skipWhitespace
  2100             self.skipWhitespace = expr.skipWhitespace
  4194 
       
  4195             self.saveAsList = expr.saveAsList
  2101             self.saveAsList = expr.saveAsList
  4196 
  2102     
  4197     
       
  4198 
       
  4199     def parseImpl( self, instring, loc, doActions=True ):
  2103     def parseImpl( self, instring, loc, doActions=True ):
  4200 
       
  4201         if self.expr is not None:
  2104         if self.expr is not None:
  4202 
       
  4203             return self.expr._parse( instring, loc, doActions, callPreParse=False )
  2105             return self.expr._parse( instring, loc, doActions, callPreParse=False )
  4204 
  2106         else:
  4205         else:
       
  4206 
       
  4207             raise ParseException("",loc,self.errmsg,self)
  2107             raise ParseException("",loc,self.errmsg,self)
  4208 
       
  4209             
  2108             
  4210 
       
  4211     def leaveWhitespace( self ):
  2109     def leaveWhitespace( self ):
  4212 
       
  4213         self.skipWhitespace = False
  2110         self.skipWhitespace = False
  4214 
       
  4215         self.expr = self.expr.copy()
  2111         self.expr = self.expr.copy()
  4216 
       
  4217         if self.expr is not None:
  2112         if self.expr is not None:
  4218 
       
  4219             self.expr.leaveWhitespace()
  2113             self.expr.leaveWhitespace()
  4220 
       
  4221         return self
  2114         return self
  4222 
  2115 
  4223 
       
  4224 
       
  4225     def ignore( self, other ):
  2116     def ignore( self, other ):
  4226 
       
  4227         if isinstance( other, Suppress ):
  2117         if isinstance( other, Suppress ):
  4228 
       
  4229             if other not in self.ignoreExprs:
  2118             if other not in self.ignoreExprs:
  4230 
       
  4231                 super( ParseElementEnhance, self).ignore( other )
  2119                 super( ParseElementEnhance, self).ignore( other )
  4232 
       
  4233                 if self.expr is not None:
  2120                 if self.expr is not None:
  4234 
       
  4235                     self.expr.ignore( self.ignoreExprs[-1] )
  2121                     self.expr.ignore( self.ignoreExprs[-1] )
  4236 
  2122         else:
  4237         else:
       
  4238 
       
  4239             super( ParseElementEnhance, self).ignore( other )
  2123             super( ParseElementEnhance, self).ignore( other )
  4240 
       
  4241             if self.expr is not None:
  2124             if self.expr is not None:
  4242 
       
  4243                 self.expr.ignore( self.ignoreExprs[-1] )
  2125                 self.expr.ignore( self.ignoreExprs[-1] )
  4244 
       
  4245         return self
  2126         return self
  4246 
  2127 
  4247 
       
  4248 
       
  4249     def streamline( self ):
  2128     def streamline( self ):
  4250 
       
  4251         super(ParseElementEnhance,self).streamline()
  2129         super(ParseElementEnhance,self).streamline()
  4252 
       
  4253         if self.expr is not None:
  2130         if self.expr is not None:
  4254 
       
  4255             self.expr.streamline()
  2131             self.expr.streamline()
  4256 
       
  4257         return self
  2132         return self
  4258 
  2133 
  4259 
       
  4260 
       
  4261     def checkRecursion( self, parseElementList ):
  2134     def checkRecursion( self, parseElementList ):
  4262 
       
  4263         if self in parseElementList:
  2135         if self in parseElementList:
  4264 
       
  4265             raise RecursiveGrammarException( parseElementList+[self] )
  2136             raise RecursiveGrammarException( parseElementList+[self] )
  4266 
       
  4267         subRecCheckList = parseElementList[:] + [ self ]
  2137         subRecCheckList = parseElementList[:] + [ self ]
  4268 
       
  4269         if self.expr is not None:
  2138         if self.expr is not None:
  4270 
       
  4271             self.expr.checkRecursion( subRecCheckList )
  2139             self.expr.checkRecursion( subRecCheckList )
  4272 
  2140         
  4273         
       
  4274 
       
  4275     def validate( self, validateTrace=[] ):
  2141     def validate( self, validateTrace=[] ):
  4276 
       
  4277         tmp = validateTrace[:]+[self]
  2142         tmp = validateTrace[:]+[self]
  4278 
       
  4279         if self.expr is not None:
  2143         if self.expr is not None:
  4280 
       
  4281             self.expr.validate(tmp)
  2144             self.expr.validate(tmp)
  4282 
       
  4283         self.checkRecursion( [] )
  2145         self.checkRecursion( [] )
  4284 
  2146     
  4285     
       
  4286 
       
  4287     def __str__( self ):
  2147     def __str__( self ):
  4288 
       
  4289         try:
  2148         try:
  4290 
       
  4291             return super(ParseElementEnhance,self).__str__()
  2149             return super(ParseElementEnhance,self).__str__()
  4292 
       
  4293         except:
  2150         except:
  4294 
       
  4295             pass
  2151             pass
  4296 
       
  4297             
  2152             
  4298 
       
  4299         if self.strRepr is None and self.expr is not None:
  2153         if self.strRepr is None and self.expr is not None:
  4300 
       
  4301             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  2154             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  4302 
       
  4303         return self.strRepr
  2155         return self.strRepr
  4304 
  2156 
  4305 
  2157 
  4306 
       
  4307 
       
  4308 
       
  4309 class FollowedBy(ParseElementEnhance):
  2158 class FollowedBy(ParseElementEnhance):
  4310 
       
  4311     """Lookahead matching of the given parse expression.  FollowedBy
  2159     """Lookahead matching of the given parse expression.  FollowedBy
  4312 
       
  4313     does *not* advance the parsing position within the input string, it only 
  2160     does *not* advance the parsing position within the input string, it only 
  4314 
       
  4315     verifies that the specified parse expression matches at the current 
  2161     verifies that the specified parse expression matches at the current 
  4316 
       
  4317     position.  FollowedBy always returns a null token list."""
  2162     position.  FollowedBy always returns a null token list."""
  4318 
       
  4319     def __init__( self, expr ):
  2163     def __init__( self, expr ):
  4320 
       
  4321         super(FollowedBy,self).__init__(expr)
  2164         super(FollowedBy,self).__init__(expr)
  4322 
       
  4323         self.mayReturnEmpty = True
  2165         self.mayReturnEmpty = True
  4324 
  2166         
  4325         
       
  4326 
       
  4327     def parseImpl( self, instring, loc, doActions=True ):
  2167     def parseImpl( self, instring, loc, doActions=True ):
  4328 
       
  4329         self.expr.tryParse( instring, loc )
  2168         self.expr.tryParse( instring, loc )
  4330 
       
  4331         return loc, []
  2169         return loc, []
  4332 
  2170 
  4333 
  2171 
  4334 
       
  4335 
       
  4336 
       
  4337 class NotAny(ParseElementEnhance):
  2172 class NotAny(ParseElementEnhance):
  4338 
       
  4339     """Lookahead to disallow matching with the given parse expression.  NotAny
  2173     """Lookahead to disallow matching with the given parse expression.  NotAny
  4340 
       
  4341     does *not* advance the parsing position within the input string, it only 
  2174     does *not* advance the parsing position within the input string, it only 
  4342 
       
  4343     verifies that the specified parse expression does *not* match at the current 
  2175     verifies that the specified parse expression does *not* match at the current 
  4344 
       
  4345     position.  Also, NotAny does *not* skip over leading whitespace. NotAny 
  2176     position.  Also, NotAny does *not* skip over leading whitespace. NotAny 
  4346 
       
  4347     always returns a null token list.  May be constructed using the '~' operator."""
  2177     always returns a null token list.  May be constructed using the '~' operator."""
  4348 
       
  4349     def __init__( self, expr ):
  2178     def __init__( self, expr ):
  4350 
       
  4351         super(NotAny,self).__init__(expr)
  2179         super(NotAny,self).__init__(expr)
  4352 
       
  4353         #~ self.leaveWhitespace()
  2180         #~ self.leaveWhitespace()
  4354 
       
  4355         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  2181         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  4356 
       
  4357         self.mayReturnEmpty = True
  2182         self.mayReturnEmpty = True
  4358 
       
  4359         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  2183         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  4360 
       
  4361         self.myException = ParseException("",0,self.errmsg,self)
  2184         self.myException = ParseException("",0,self.errmsg,self)
  4362 
  2185         
  4363         
       
  4364 
       
  4365     def parseImpl( self, instring, loc, doActions=True ):
  2186     def parseImpl( self, instring, loc, doActions=True ):
  4366 
       
  4367         try:
  2187         try:
  4368 
       
  4369             self.expr.tryParse( instring, loc )
  2188             self.expr.tryParse( instring, loc )
  4370 
       
  4371         except (ParseException,IndexError):
  2189         except (ParseException,IndexError):
  4372 
       
  4373             pass
  2190             pass
  4374 
  2191         else:
  4375         else:
       
  4376 
       
  4377             #~ raise ParseException(instring, loc, self.errmsg )
  2192             #~ raise ParseException(instring, loc, self.errmsg )
  4378 
       
  4379             exc = self.myException
  2193             exc = self.myException
  4380 
       
  4381             exc.loc = loc
  2194             exc.loc = loc
  4382 
       
  4383             exc.pstr = instring
  2195             exc.pstr = instring
  4384 
       
  4385             raise exc
  2196             raise exc
  4386 
       
  4387         return loc, []
  2197         return loc, []
  4388 
  2198 
  4389 
       
  4390 
       
  4391     def __str__( self ):
  2199     def __str__( self ):
  4392 
       
  4393         if hasattr(self,"name"):
  2200         if hasattr(self,"name"):
  4394 
       
  4395             return self.name
  2201             return self.name
  4396 
       
  4397             
  2202             
  4398 
       
  4399         if self.strRepr is None:
  2203         if self.strRepr is None:
  4400 
       
  4401             self.strRepr = "~{" + _ustr(self.expr) + "}"
  2204             self.strRepr = "~{" + _ustr(self.expr) + "}"
  4402 
  2205         
  4403         
       
  4404 
       
  4405         return self.strRepr
  2206         return self.strRepr
  4406 
  2207 
  4407 
  2208 
  4408 
       
  4409 
       
  4410 
       
  4411 class ZeroOrMore(ParseElementEnhance):
  2209 class ZeroOrMore(ParseElementEnhance):
  4412 
       
  4413     """Optional repetition of zero or more of the given expression."""
  2210     """Optional repetition of zero or more of the given expression."""
  4414 
       
  4415     def __init__( self, expr ):
  2211     def __init__( self, expr ):
  4416 
       
  4417         super(ZeroOrMore,self).__init__(expr)
  2212         super(ZeroOrMore,self).__init__(expr)
  4418 
       
  4419         self.mayReturnEmpty = True
  2213         self.mayReturnEmpty = True
  4420 
  2214     
  4421     
       
  4422 
       
  4423     def parseImpl( self, instring, loc, doActions=True ):
  2215     def parseImpl( self, instring, loc, doActions=True ):
  4424 
       
  4425         tokens = []
  2216         tokens = []
  4426 
       
  4427         try:
  2217         try:
  4428 
       
  4429             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  2218             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  4430 
       
  4431             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
  2219             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
  4432 
       
  4433             while 1:
  2220             while 1:
  4434 
       
  4435                 if hasIgnoreExprs:
  2221                 if hasIgnoreExprs:
  4436 
       
  4437                     preloc = self.skipIgnorables( instring, loc )
  2222                     preloc = self.skipIgnorables( instring, loc )
  4438 
       
  4439                 else:
  2223                 else:
  4440 
       
  4441                     preloc = loc
  2224                     preloc = loc
  4442 
       
  4443                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
  2225                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
  4444 
       
  4445                 if tmptokens or tmptokens.keys():
  2226                 if tmptokens or tmptokens.keys():
  4446 
       
  4447                     tokens += tmptokens
  2227                     tokens += tmptokens
  4448 
       
  4449         except (ParseException,IndexError):
  2228         except (ParseException,IndexError):
  4450 
       
  4451             pass
  2229             pass
  4452 
  2230 
  4453 
       
  4454 
       
  4455         return loc, tokens
  2231         return loc, tokens
  4456 
  2232 
  4457 
       
  4458 
       
  4459     def __str__( self ):
  2233     def __str__( self ):
  4460 
       
  4461         if hasattr(self,"name"):
  2234         if hasattr(self,"name"):
  4462 
       
  4463             return self.name
  2235             return self.name
  4464 
       
  4465             
  2236             
  4466 
       
  4467         if self.strRepr is None:
  2237         if self.strRepr is None:
  4468 
       
  4469             self.strRepr = "[" + _ustr(self.expr) + "]..."
  2238             self.strRepr = "[" + _ustr(self.expr) + "]..."
  4470 
  2239         
  4471         
       
  4472 
       
  4473         return self.strRepr
  2240         return self.strRepr
  4474 
  2241     
  4475     
       
  4476 
       
  4477     def setResultsName( self, name, listAllMatches=False ):
  2242     def setResultsName( self, name, listAllMatches=False ):
  4478 
       
  4479         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
  2243         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
  4480 
       
  4481         ret.saveAsList = True
  2244         ret.saveAsList = True
  4482 
       
  4483         return ret
  2245         return ret
  4484 
  2246     
  4485     
       
  4486 
       
  4487 
       
  4488 
  2247 
  4489 class OneOrMore(ParseElementEnhance):
  2248 class OneOrMore(ParseElementEnhance):
  4490 
       
  4491     """Repetition of one or more of the given expression."""
  2249     """Repetition of one or more of the given expression."""
  4492 
       
  4493     def parseImpl( self, instring, loc, doActions=True ):
  2250     def parseImpl( self, instring, loc, doActions=True ):
  4494 
       
  4495         # must be at least one
  2251         # must be at least one
  4496 
       
  4497         loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  2252         loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  4498 
       
  4499         try:
  2253         try:
  4500 
       
  4501             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
  2254             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
  4502 
       
  4503             while 1:
  2255             while 1:
  4504 
       
  4505                 if hasIgnoreExprs:
  2256                 if hasIgnoreExprs:
  4506 
       
  4507                     preloc = self.skipIgnorables( instring, loc )
  2257                     preloc = self.skipIgnorables( instring, loc )
  4508 
       
  4509                 else:
  2258                 else:
  4510 
       
  4511                     preloc = loc
  2259                     preloc = loc
  4512 
       
  4513                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
  2260                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
  4514 
       
  4515                 if tmptokens or tmptokens.keys():
  2261                 if tmptokens or tmptokens.keys():
  4516 
       
  4517                     tokens += tmptokens
  2262                     tokens += tmptokens
  4518 
       
  4519         except (ParseException,IndexError):
  2263         except (ParseException,IndexError):
  4520 
       
  4521             pass
  2264             pass
  4522 
  2265 
  4523 
       
  4524 
       
  4525         return loc, tokens
  2266         return loc, tokens
  4526 
  2267 
  4527 
       
  4528 
       
  4529     def __str__( self ):
  2268     def __str__( self ):
  4530 
       
  4531         if hasattr(self,"name"):
  2269         if hasattr(self,"name"):
  4532 
       
  4533             return self.name
  2270             return self.name
  4534 
       
  4535             
  2271             
  4536 
       
  4537         if self.strRepr is None:
  2272         if self.strRepr is None:
  4538 
       
  4539             self.strRepr = "{" + _ustr(self.expr) + "}..."
  2273             self.strRepr = "{" + _ustr(self.expr) + "}..."
  4540 
  2274         
  4541         
       
  4542 
       
  4543         return self.strRepr
  2275         return self.strRepr
  4544 
  2276     
  4545     
       
  4546 
       
  4547     def setResultsName( self, name, listAllMatches=False ):
  2277     def setResultsName( self, name, listAllMatches=False ):
  4548 
       
  4549         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
  2278         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
  4550 
       
  4551         ret.saveAsList = True
  2279         ret.saveAsList = True
  4552 
       
  4553         return ret
  2280         return ret
  4554 
  2281 
  4555 
       
  4556 
       
  4557 class _NullToken(object):
  2282 class _NullToken(object):
  4558 
       
  4559     def __bool__(self):
  2283     def __bool__(self):
  4560 
       
  4561         return False
  2284         return False
  4562 
       
  4563     def __str__(self):
  2285     def __str__(self):
  4564 
       
  4565         return ""
  2286         return ""
  4566 
  2287 
  4567 
       
  4568 
       
  4569 _optionalNotMatched = _NullToken()
  2288 _optionalNotMatched = _NullToken()
  4570 
       
  4571 class Optional(ParseElementEnhance):
  2289 class Optional(ParseElementEnhance):
  4572 
       
  4573     """Optional matching of the given expression.
  2290     """Optional matching of the given expression.
  4574 
       
  4575        A default return string can also be specified, if the optional expression
  2291        A default return string can also be specified, if the optional expression
  4576 
       
  4577        is not found.
  2292        is not found.
  4578 
       
  4579     """
  2293     """
  4580 
       
  4581     def __init__( self, exprs, default=_optionalNotMatched ):
  2294     def __init__( self, exprs, default=_optionalNotMatched ):
  4582 
       
  4583         super(Optional,self).__init__( exprs, savelist=False )
  2295         super(Optional,self).__init__( exprs, savelist=False )
  4584 
       
  4585         self.defaultValue = default
  2296         self.defaultValue = default
  4586 
       
  4587         self.mayReturnEmpty = True
  2297         self.mayReturnEmpty = True
  4588 
  2298 
  4589 
       
  4590 
       
  4591     def parseImpl( self, instring, loc, doActions=True ):
  2299     def parseImpl( self, instring, loc, doActions=True ):
  4592 
       
  4593         try:
  2300         try:
  4594 
       
  4595             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  2301             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  4596 
       
  4597         except (ParseException,IndexError):
  2302         except (ParseException,IndexError):
  4598 
       
  4599             if self.defaultValue is not _optionalNotMatched:
  2303             if self.defaultValue is not _optionalNotMatched:
  4600 
       
  4601                 tokens = [ self.defaultValue ]
  2304                 tokens = [ self.defaultValue ]
  4602 
       
  4603             else:
  2305             else:
  4604 
       
  4605                 tokens = []
  2306                 tokens = []
  4606 
       
  4607         return loc, tokens
  2307         return loc, tokens
  4608 
  2308 
  4609 
       
  4610 
       
  4611     def __str__( self ):
  2309     def __str__( self ):
  4612 
       
  4613         if hasattr(self,"name"):
  2310         if hasattr(self,"name"):
  4614 
       
  4615             return self.name
  2311             return self.name
  4616 
       
  4617             
  2312             
  4618 
       
  4619         if self.strRepr is None:
  2313         if self.strRepr is None:
  4620 
       
  4621             self.strRepr = "[" + _ustr(self.expr) + "]"
  2314             self.strRepr = "[" + _ustr(self.expr) + "]"
  4622 
  2315         
  4623         
       
  4624 
       
  4625         return self.strRepr
  2316         return self.strRepr
  4626 
  2317 
  4627 
  2318 
  4628 
       
  4629 
       
  4630 
       
  4631 class SkipTo(ParseElementEnhance):
  2319 class SkipTo(ParseElementEnhance):
  4632 
       
  4633     """Token for skipping over all undefined text until the matched expression is found.
  2320     """Token for skipping over all undefined text until the matched expression is found.
  4634 
       
  4635        If include is set to true, the matched expression is also consumed.  The ignore
  2321        If include is set to true, the matched expression is also consumed.  The ignore
  4636 
       
  4637        argument is used to define grammars (typically quoted strings and comments) that 
  2322        argument is used to define grammars (typically quoted strings and comments) that 
  4638 
       
  4639        might contain false matches.
  2323        might contain false matches.
  4640 
       
  4641     """
  2324     """
  4642 
       
  4643     def __init__( self, other, include=False, ignore=None ):
  2325     def __init__( self, other, include=False, ignore=None ):
  4644 
       
  4645         super( SkipTo, self ).__init__( other )
  2326         super( SkipTo, self ).__init__( other )
  4646 
       
  4647         if ignore is not None:
  2327         if ignore is not None:
  4648 
       
  4649             self.expr = self.expr.copy()
  2328             self.expr = self.expr.copy()
  4650 
       
  4651             self.expr.ignore(ignore)
  2329             self.expr.ignore(ignore)
  4652 
       
  4653         self.mayReturnEmpty = True
  2330         self.mayReturnEmpty = True
  4654 
       
  4655         self.mayIndexError = False
  2331         self.mayIndexError = False
  4656 
       
  4657         self.includeMatch = include
  2332         self.includeMatch = include
  4658 
       
  4659         self.asList = False
  2333         self.asList = False
  4660 
       
  4661         self.errmsg = "No match found for "+_ustr(self.expr)
  2334         self.errmsg = "No match found for "+_ustr(self.expr)
  4662 
       
  4663         self.myException = ParseException("",0,self.errmsg,self)
  2335         self.myException = ParseException("",0,self.errmsg,self)
  4664 
  2336 
  4665 
       
  4666 
       
  4667     def parseImpl( self, instring, loc, doActions=True ):
  2337     def parseImpl( self, instring, loc, doActions=True ):
  4668 
       
  4669         startLoc = loc
  2338         startLoc = loc
  4670 
       
  4671         instrlen = len(instring)
  2339         instrlen = len(instring)
  4672 
       
  4673         expr = self.expr
  2340         expr = self.expr
  4674 
       
  4675         while loc <= instrlen:
  2341         while loc <= instrlen:
  4676 
       
  4677             try:
  2342             try:
  4678 
       
  4679                 loc = expr.skipIgnorables( instring, loc )
  2343                 loc = expr.skipIgnorables( instring, loc )
  4680 
       
  4681                 expr._parse( instring, loc, doActions=False, callPreParse=False )
  2344                 expr._parse( instring, loc, doActions=False, callPreParse=False )
  4682 
       
  4683                 if self.includeMatch:
  2345                 if self.includeMatch:
  4684 
       
  4685                     skipText = instring[startLoc:loc]
  2346                     skipText = instring[startLoc:loc]
  4686 
       
  4687                     loc,mat = expr._parse(instring,loc)
  2347                     loc,mat = expr._parse(instring,loc)
  4688 
       
  4689                     if mat:
  2348                     if mat:
  4690 
       
  4691                         return loc, [ skipText, mat ]
  2349                         return loc, [ skipText, mat ]
  4692 
       
  4693                     else:
  2350                     else:
  4694 
       
  4695                         return loc, [ skipText ]
  2351                         return loc, [ skipText ]
  4696 
       
  4697                 else:
  2352                 else:
  4698 
       
  4699                     return loc, [ instring[startLoc:loc] ]
  2353                     return loc, [ instring[startLoc:loc] ]
  4700 
       
  4701             except (ParseException,IndexError):
  2354             except (ParseException,IndexError):
  4702 
       
  4703                 loc += 1
  2355                 loc += 1
  4704 
       
  4705         exc = self.myException
  2356         exc = self.myException
  4706 
       
  4707         exc.loc = loc
  2357         exc.loc = loc
  4708 
       
  4709         exc.pstr = instring
  2358         exc.pstr = instring
  4710 
       
  4711         raise exc
  2359         raise exc
  4712 
  2360 
  4713 
       
  4714 
       
  4715 class Forward(ParseElementEnhance):
  2361 class Forward(ParseElementEnhance):
  4716 
       
  4717     """Forward declaration of an expression to be defined later -
  2362     """Forward declaration of an expression to be defined later -
  4718 
       
  4719        used for recursive grammars, such as algebraic infix notation.
  2363        used for recursive grammars, such as algebraic infix notation.
  4720 
       
  4721        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
  2364        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
  4722 
       
  4723        
  2365        
  4724 
       
  4725        Note: take care when assigning to Forward not to overlook precedence of operators.
  2366        Note: take care when assigning to Forward not to overlook precedence of operators.
  4726 
       
  4727        Specifically, '|' has a lower precedence than '<<', so that::
  2367        Specifically, '|' has a lower precedence than '<<', so that::
  4728 
       
  4729           fwdExpr << a | b | c
  2368           fwdExpr << a | b | c
  4730 
       
  4731        will actually be evaluated as::
  2369        will actually be evaluated as::
  4732 
       
  4733           (fwdExpr << a) | b | c
  2370           (fwdExpr << a) | b | c
  4734 
       
  4735        thereby leaving b and c out as parseable alternatives.  It is recommended that you
  2371        thereby leaving b and c out as parseable alternatives.  It is recommended that you
  4736 
       
  4737        explicitly group the values inserted into the Forward::
  2372        explicitly group the values inserted into the Forward::
  4738 
       
  4739           fwdExpr << (a | b | c)
  2373           fwdExpr << (a | b | c)
  4740 
       
  4741     """
  2374     """
  4742 
       
  4743     def __init__( self, other=None ):
  2375     def __init__( self, other=None ):
  4744 
       
  4745         super(Forward,self).__init__( other, savelist=False )
  2376         super(Forward,self).__init__( other, savelist=False )
  4746 
  2377 
  4747 
       
  4748 
       
  4749     def __lshift__( self, other ):
  2378     def __lshift__( self, other ):
  4750 
       
  4751         if isinstance( other, basestring ):
  2379         if isinstance( other, basestring ):
  4752 
       
  4753             other = Literal(other)
  2380             other = Literal(other)
  4754 
       
  4755         self.expr = other
  2381         self.expr = other
  4756 
       
  4757         self.mayReturnEmpty = other.mayReturnEmpty
  2382         self.mayReturnEmpty = other.mayReturnEmpty
  4758 
       
  4759         self.strRepr = None
  2383         self.strRepr = None
  4760 
       
  4761         return self
  2384         return self
  4762 
  2385 
  4763 
       
  4764 
       
  4765     def leaveWhitespace( self ):
  2386     def leaveWhitespace( self ):
  4766 
       
  4767         self.skipWhitespace = False
  2387         self.skipWhitespace = False
  4768 
       
  4769         return self
  2388         return self
  4770 
  2389 
  4771 
       
  4772 
       
  4773     def streamline( self ):
  2390     def streamline( self ):
  4774 
       
  4775         if not self.streamlined:
  2391         if not self.streamlined:
  4776 
       
  4777             self.streamlined = True
  2392             self.streamlined = True
  4778 
       
  4779             if self.expr is not None: 
  2393             if self.expr is not None: 
  4780 
       
  4781                 self.expr.streamline()
  2394                 self.expr.streamline()
  4782 
       
  4783         return self
  2395         return self
  4784 
  2396 
  4785 
       
  4786 
       
  4787     def validate( self, validateTrace=[] ):
  2397     def validate( self, validateTrace=[] ):
  4788 
       
  4789         if self not in validateTrace:
  2398         if self not in validateTrace:
  4790 
       
  4791             tmp = validateTrace[:]+[self]
  2399             tmp = validateTrace[:]+[self]
  4792 
       
  4793             if self.expr is not None: 
  2400             if self.expr is not None: 
  4794 
       
  4795                 self.expr.validate(tmp)
  2401                 self.expr.validate(tmp)
  4796 
       
  4797         self.checkRecursion([])        
  2402         self.checkRecursion([])        
  4798 
  2403         
  4799         
       
  4800 
       
  4801     def __str__( self ):
  2404     def __str__( self ):
  4802 
       
  4803         if hasattr(self,"name"):
  2405         if hasattr(self,"name"):
  4804 
       
  4805             return self.name
  2406             return self.name
  4806 
  2407 
  4807 
       
  4808 
       
  4809         self.__class__ = _ForwardNoRecurse
  2408         self.__class__ = _ForwardNoRecurse
  4810 
       
  4811         try:
  2409         try:
  4812 
       
  4813             if self.expr is not None: 
  2410             if self.expr is not None: 
  4814 
       
  4815                 retString = _ustr(self.expr)
  2411                 retString = _ustr(self.expr)
  4816 
       
  4817             else:
  2412             else:
  4818 
       
  4819                 retString = "None"
  2413                 retString = "None"
  4820 
       
  4821         finally:
  2414         finally:
  4822 
       
  4823             self.__class__ = Forward
  2415             self.__class__ = Forward
  4824 
       
  4825         return "Forward: "+retString
  2416         return "Forward: "+retString
  4826 
  2417         
  4827         
       
  4828 
       
  4829     def copy(self):
  2418     def copy(self):
  4830 
       
  4831         if self.expr is not None:
  2419         if self.expr is not None:
  4832 
       
  4833             return super(Forward,self).copy()
  2420             return super(Forward,self).copy()
  4834 
  2421         else:
  4835         else:
       
  4836 
       
  4837             ret = Forward()
  2422             ret = Forward()
  4838 
       
  4839             ret << self
  2423             ret << self
  4840 
       
  4841             return ret
  2424             return ret
  4842 
  2425 
  4843 
       
  4844 
       
  4845 class _ForwardNoRecurse(Forward):
  2426 class _ForwardNoRecurse(Forward):
  4846 
       
  4847     def __str__( self ):
  2427     def __str__( self ):
  4848 
       
  4849         return "..."
  2428         return "..."
  4850 
  2429         
  4851         
       
  4852 
       
  4853 class TokenConverter(ParseElementEnhance):
  2430 class TokenConverter(ParseElementEnhance):
  4854 
       
  4855     """Abstract subclass of ParseExpression, for converting parsed results."""
  2431     """Abstract subclass of ParseExpression, for converting parsed results."""
  4856 
       
  4857     def __init__( self, expr, savelist=False ):
  2432     def __init__( self, expr, savelist=False ):
  4858 
       
  4859         super(TokenConverter,self).__init__( expr )#, savelist )
  2433         super(TokenConverter,self).__init__( expr )#, savelist )
  4860 
       
  4861         self.saveAsList = False
  2434         self.saveAsList = False
  4862 
  2435 
  4863 
  2436 
  4864 
       
  4865 
       
  4866 
       
  4867 class Upcase(TokenConverter):
  2437 class Upcase(TokenConverter):
  4868 
       
  4869     """Converter to upper case all matching tokens."""
  2438     """Converter to upper case all matching tokens."""
  4870 
       
  4871     def __init__(self, *args):
  2439     def __init__(self, *args):
  4872 
       
  4873         super(Upcase,self).__init__(*args)
  2440         super(Upcase,self).__init__(*args)
  4874 
       
  4875         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
  2441         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 
  4876 
       
  4877                        DeprecationWarning,stacklevel=2)
  2442                        DeprecationWarning,stacklevel=2)
  4878 
  2443     
  4879     
       
  4880 
       
  4881     def postParse( self, instring, loc, tokenlist ):
  2444     def postParse( self, instring, loc, tokenlist ):
  4882 
       
  4883         return map( string.upper, tokenlist )
  2445         return map( string.upper, tokenlist )
  4884 
  2446 
  4885 
  2447 
  4886 
       
  4887 
       
  4888 
       
  4889 class Combine(TokenConverter):
  2448 class Combine(TokenConverter):
  4890 
       
  4891     """Converter to concatenate all matching tokens to a single string.
  2449     """Converter to concatenate all matching tokens to a single string.
  4892 
       
  4893        By default, the matching patterns must also be contiguous in the input string;
  2450        By default, the matching patterns must also be contiguous in the input string;
  4894 
       
  4895        this can be disabled by specifying 'adjacent=False' in the constructor.
  2451        this can be disabled by specifying 'adjacent=False' in the constructor.
  4896 
       
  4897     """
  2452     """
  4898 
       
  4899     def __init__( self, expr, joinString="", adjacent=True ):
  2453     def __init__( self, expr, joinString="", adjacent=True ):
  4900 
       
  4901         super(Combine,self).__init__( expr )
  2454         super(Combine,self).__init__( expr )
  4902 
       
  4903         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  2455         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  4904 
       
  4905         if adjacent:
  2456         if adjacent:
  4906 
       
  4907             self.leaveWhitespace()
  2457             self.leaveWhitespace()
  4908 
       
  4909         self.adjacent = adjacent
  2458         self.adjacent = adjacent
  4910 
       
  4911         self.skipWhitespace = True
  2459         self.skipWhitespace = True
  4912 
       
  4913         self.joinString = joinString
  2460         self.joinString = joinString
  4914 
  2461 
  4915 
       
  4916 
       
  4917     def ignore( self, other ):
  2462     def ignore( self, other ):
  4918 
       
  4919         if self.adjacent:
  2463         if self.adjacent:
  4920 
       
  4921             ParserElement.ignore(self, other)
  2464             ParserElement.ignore(self, other)
  4922 
  2465         else:
  4923         else:
       
  4924 
       
  4925             super( Combine, self).ignore( other )
  2466             super( Combine, self).ignore( other )
  4926 
       
  4927         return self
  2467         return self
  4928 
  2468 
  4929 
       
  4930 
       
  4931     def postParse( self, instring, loc, tokenlist ):
  2469     def postParse( self, instring, loc, tokenlist ):
  4932 
       
  4933         retToks = tokenlist.copy()
  2470         retToks = tokenlist.copy()
  4934 
       
  4935         del retToks[:]
  2471         del retToks[:]
  4936 
       
  4937         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  2472         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  4938 
  2473 
  4939 
       
  4940 
       
  4941         if self.resultsName and len(retToks.keys())>0:
  2474         if self.resultsName and len(retToks.keys())>0:
  4942 
       
  4943             return [ retToks ]
  2475             return [ retToks ]
  4944 
  2476         else:
  4945         else:
       
  4946 
       
  4947             return retToks
  2477             return retToks
  4948 
  2478 
  4949 
       
  4950 
       
  4951 class Group(TokenConverter):
  2479 class Group(TokenConverter):
  4952 
       
  4953     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
  2480     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
  4954 
       
  4955     def __init__( self, expr ):
  2481     def __init__( self, expr ):
  4956 
       
  4957         super(Group,self).__init__( expr )
  2482         super(Group,self).__init__( expr )
  4958 
       
  4959         self.saveAsList = True
  2483         self.saveAsList = True
  4960 
  2484 
  4961 
       
  4962 
       
  4963     def postParse( self, instring, loc, tokenlist ):
  2485     def postParse( self, instring, loc, tokenlist ):
  4964 
       
  4965         return [ tokenlist ]
  2486         return [ tokenlist ]
  4966 
  2487         
  4967         
       
  4968 
       
  4969 class Dict(TokenConverter):
  2488 class Dict(TokenConverter):
  4970 
       
  4971     """Converter to return a repetitive expression as a list, but also as a dictionary.
  2489     """Converter to return a repetitive expression as a list, but also as a dictionary.
  4972 
       
  4973        Each element can also be referenced using the first token in the expression as its key.
  2490        Each element can also be referenced using the first token in the expression as its key.
  4974 
       
  4975        Useful for tabular report scraping when the first column can be used as a item key.
  2491        Useful for tabular report scraping when the first column can be used as a item key.
  4976 
       
  4977     """
  2492     """
  4978 
       
  4979     def __init__( self, exprs ):
  2493     def __init__( self, exprs ):
  4980 
       
  4981         super(Dict,self).__init__( exprs )
  2494         super(Dict,self).__init__( exprs )
  4982 
       
  4983         self.saveAsList = True
  2495         self.saveAsList = True
  4984 
  2496 
  4985 
       
  4986 
       
  4987     def postParse( self, instring, loc, tokenlist ):
  2497     def postParse( self, instring, loc, tokenlist ):
  4988 
       
  4989         for i,tok in enumerate(tokenlist):
  2498         for i,tok in enumerate(tokenlist):
  4990 
       
  4991             ikey = _ustr(tok[0]).strip()
  2499             ikey = _ustr(tok[0]).strip()
  4992 
       
  4993             if len(tok)==1:
  2500             if len(tok)==1:
  4994 
       
  4995                 tokenlist[ikey] = ("",i)
  2501                 tokenlist[ikey] = ("",i)
  4996 
       
  4997             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  2502             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  4998 
       
  4999                 tokenlist[ikey] = (tok[1],i)
  2503                 tokenlist[ikey] = (tok[1],i)
  5000 
       
  5001             else:
  2504             else:
  5002 
       
  5003                 dictvalue = tok.copy() #ParseResults(i)
  2505                 dictvalue = tok.copy() #ParseResults(i)
  5004 
       
  5005                 del dictvalue[0]
  2506                 del dictvalue[0]
  5006 
       
  5007                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
  2507                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
  5008 
       
  5009                     tokenlist[ikey] = (dictvalue,i)
  2508                     tokenlist[ikey] = (dictvalue,i)
  5010 
       
  5011                 else:
  2509                 else:
  5012 
       
  5013                     tokenlist[ikey] = (dictvalue[0],i)
  2510                     tokenlist[ikey] = (dictvalue[0],i)
  5014 
  2511 
  5015 
       
  5016 
       
  5017         if self.resultsName:
  2512         if self.resultsName:
  5018 
       
  5019             return [ tokenlist ]
  2513             return [ tokenlist ]
  5020 
  2514         else:
  5021         else:
       
  5022 
       
  5023             return tokenlist
  2515             return tokenlist
  5024 
  2516 
  5025 
  2517 
  5026 
       
  5027 
       
  5028 
       
  5029 class Suppress(TokenConverter):
  2518 class Suppress(TokenConverter):
  5030 
       
  5031     """Converter for ignoring the results of a parsed expression."""
  2519     """Converter for ignoring the results of a parsed expression."""
  5032 
       
  5033     def postParse( self, instring, loc, tokenlist ):
  2520     def postParse( self, instring, loc, tokenlist ):
  5034 
       
  5035         return []
  2521         return []
  5036 
  2522     
  5037     
       
  5038 
       
  5039     def suppress( self ):
  2523     def suppress( self ):
  5040 
       
  5041         return self
  2524         return self
  5042 
  2525 
  5043 
  2526 
  5044 
       
  5045 
       
  5046 
       
  5047 class OnlyOnce(object):
  2527 class OnlyOnce(object):
  5048 
       
  5049     """Wrapper for parse actions, to ensure they are only called once."""
  2528     """Wrapper for parse actions, to ensure they are only called once."""
  5050 
       
  5051     def __init__(self, methodCall):
  2529     def __init__(self, methodCall):
  5052 
       
  5053         self.callable = ParserElement.normalizeParseActionArgs(methodCall)
  2530         self.callable = ParserElement.normalizeParseActionArgs(methodCall)
  5054 
       
  5055         self.called = False
  2531         self.called = False
  5056 
       
  5057     def __call__(self,s,l,t):
  2532     def __call__(self,s,l,t):
  5058 
       
  5059         if not self.called:
  2533         if not self.called:
  5060 
       
  5061             results = self.callable(s,l,t)
  2534             results = self.callable(s,l,t)
  5062 
       
  5063             self.called = True
  2535             self.called = True
  5064 
       
  5065             return results
  2536             return results
  5066 
       
  5067         raise ParseException(s,l,"")
  2537         raise ParseException(s,l,"")
  5068 
       
  5069     def reset():
  2538     def reset():
  5070 
       
  5071         self.called = False
  2539         self.called = False
  5072 
  2540 
  5073 
       
  5074 
       
  5075 def traceParseAction(f):
  2541 def traceParseAction(f):
  5076 
       
  5077     """Decorator for debugging parse actions."""
  2542     """Decorator for debugging parse actions."""
  5078 
       
  5079     f = ParserElement.normalizeParseActionArgs(f)
  2543     f = ParserElement.normalizeParseActionArgs(f)
  5080 
       
  5081     def z(*paArgs):
  2544     def z(*paArgs):
  5082 
       
  5083         thisFunc = f.func_name
  2545         thisFunc = f.func_name
  5084 
       
  5085         s,l,t = paArgs[-3:]
  2546         s,l,t = paArgs[-3:]
  5086 
       
  5087         if len(paArgs)>3:
  2547         if len(paArgs)>3:
  5088 
       
  5089             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  2548             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  5090 
       
  5091         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
  2549         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
  5092 
       
  5093         try:
  2550         try:
  5094 
       
  5095             ret = f(*paArgs)
  2551             ret = f(*paArgs)
  5096 
       
  5097         except Exception, exc:
  2552         except Exception, exc:
  5098 
       
  5099             sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  2553             sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  5100 
       
  5101             raise
  2554             raise
  5102 
       
  5103         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
  2555         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
  5104 
       
  5105         return ret
  2556         return ret
  5106 
       
  5107     return z
  2557     return z
  5108 
  2558         
  5109         
       
  5110 
       
  5111 #
  2559 #
  5112 
       
  5113 # global helpers
  2560 # global helpers
  5114 
       
  5115 #
  2561 #
  5116 
       
  5117 def delimitedList( expr, delim=",", combine=False ):
  2562 def delimitedList( expr, delim=",", combine=False ):
  5118 
       
  5119     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
  2563     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
  5120 
       
  5121        By default, the list elements and delimiters can have intervening whitespace, and 
  2564        By default, the list elements and delimiters can have intervening whitespace, and 
  5122 
       
  5123        comments, but this can be overridden by passing 'combine=True' in the constructor.
  2565        comments, but this can be overridden by passing 'combine=True' in the constructor.
  5124 
       
  5125        If combine is set to True, the matching tokens are returned as a single token
  2566        If combine is set to True, the matching tokens are returned as a single token
  5126 
       
  5127        string, with the delimiters included; otherwise, the matching tokens are returned
  2567        string, with the delimiters included; otherwise, the matching tokens are returned
  5128 
       
  5129        as a list of tokens, with the delimiters suppressed.
  2568        as a list of tokens, with the delimiters suppressed.
  5130 
       
  5131     """
  2569     """
  5132 
       
  5133     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  2570     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  5134 
       
  5135     if combine:
  2571     if combine:
  5136 
       
  5137         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  2572         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  5138 
       
  5139     else:
  2573     else:
  5140 
       
  5141         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  2574         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  5142 
  2575 
  5143 
       
  5144 
       
  5145 def countedArray( expr ):
  2576 def countedArray( expr ):
  5146 
       
  5147     """Helper to define a counted list of expressions.
  2577     """Helper to define a counted list of expressions.
  5148 
       
  5149        This helper defines a pattern of the form::
  2578        This helper defines a pattern of the form::
  5150 
       
  5151            integer expr expr expr...
  2579            integer expr expr expr...
  5152 
       
  5153        where the leading integer tells how many expr expressions follow.
  2580        where the leading integer tells how many expr expressions follow.
  5154 
       
  5155        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  2581        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  5156 
       
  5157     """
  2582     """
  5158 
       
  5159     arrayExpr = Forward()
  2583     arrayExpr = Forward()
  5160 
       
  5161     def countFieldParseAction(s,l,t):
  2584     def countFieldParseAction(s,l,t):
  5162 
       
  5163         n = int(t[0])
  2585         n = int(t[0])
  5164 
       
  5165         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  2586         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  5166 
       
  5167         return []
  2587         return []
  5168 
       
  5169     return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
  2588     return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
  5170 
  2589 
  5171 
       
  5172 
       
  5173 def _flatten(L):
  2590 def _flatten(L):
  5174 
       
  5175     if type(L) is not list: return [L]
  2591     if type(L) is not list: return [L]
  5176 
       
  5177     if L == []: return L
  2592     if L == []: return L
  5178 
       
  5179     return _flatten(L[0]) + _flatten(L[1:])
  2593     return _flatten(L[0]) + _flatten(L[1:])
  5180 
  2594 
  5181 
       
  5182 
       
  5183 def matchPreviousLiteral(expr):
  2595 def matchPreviousLiteral(expr):
  5184 
       
  5185     """Helper to define an expression that is indirectly defined from
  2596     """Helper to define an expression that is indirectly defined from
  5186 
       
  5187        the tokens matched in a previous expression, that is, it looks
  2597        the tokens matched in a previous expression, that is, it looks
  5188 
       
  5189        for a 'repeat' of a previous expression.  For example::
  2598        for a 'repeat' of a previous expression.  For example::
  5190 
       
  5191            first = Word(nums)
  2599            first = Word(nums)
  5192 
       
  5193            second = matchPreviousLiteral(first)
  2600            second = matchPreviousLiteral(first)
  5194 
       
  5195            matchExpr = first + ":" + second
  2601            matchExpr = first + ":" + second
  5196 
       
  5197        will match "1:1", but not "1:2".  Because this matches a 
  2602        will match "1:1", but not "1:2".  Because this matches a 
  5198 
       
  5199        previous literal, will also match the leading "1:1" in "1:10".  
  2603        previous literal, will also match the leading "1:1" in "1:10".  
  5200 
       
  5201        If this is not desired, use matchPreviousExpr.
  2604        If this is not desired, use matchPreviousExpr.
  5202 
       
  5203        Do *not* use with packrat parsing enabled.
  2605        Do *not* use with packrat parsing enabled.
  5204 
       
  5205     """
  2606     """
  5206 
       
  5207     rep = Forward()
  2607     rep = Forward()
  5208 
       
  5209     def copyTokenToRepeater(s,l,t):
  2608     def copyTokenToRepeater(s,l,t):
  5210 
       
  5211         if t:
  2609         if t:
  5212 
       
  5213             if len(t) == 1:
  2610             if len(t) == 1:
  5214 
       
  5215                 rep << t[0]
  2611                 rep << t[0]
  5216 
       
  5217             else:
  2612             else:
  5218 
       
  5219                 # flatten t tokens
  2613                 # flatten t tokens
  5220 
       
  5221                 tflat = _flatten(t.asList())
  2614                 tflat = _flatten(t.asList())
  5222 
       
  5223                 rep << And( [ Literal(tt) for tt in tflat ] )
  2615                 rep << And( [ Literal(tt) for tt in tflat ] )
  5224 
  2616         else:
  5225         else:
       
  5226 
       
  5227             rep << Empty()
  2617             rep << Empty()
  5228 
       
  5229     expr.addParseAction(copyTokenToRepeater)
  2618     expr.addParseAction(copyTokenToRepeater)
  5230 
       
  5231     return rep
  2619     return rep
  5232 
  2620     
  5233     
       
  5234 
       
  5235 def matchPreviousExpr(expr):
  2621 def matchPreviousExpr(expr):
  5236 
       
  5237     """Helper to define an expression that is indirectly defined from
  2622     """Helper to define an expression that is indirectly defined from
  5238 
       
  5239        the tokens matched in a previous expression, that is, it looks
  2623        the tokens matched in a previous expression, that is, it looks
  5240 
       
  5241        for a 'repeat' of a previous expression.  For example::
  2624        for a 'repeat' of a previous expression.  For example::
  5242 
       
  5243            first = Word(nums)
  2625            first = Word(nums)
  5244 
       
  5245            second = matchPreviousExpr(first)
  2626            second = matchPreviousExpr(first)
  5246 
       
  5247            matchExpr = first + ":" + second
  2627            matchExpr = first + ":" + second
  5248 
       
  5249        will match "1:1", but not "1:2".  Because this matches by
  2628        will match "1:1", but not "1:2".  Because this matches by
  5250 
       
  5251        expressions, will *not* match the leading "1:1" in "1:10";
  2629        expressions, will *not* match the leading "1:1" in "1:10";
  5252 
       
  5253        the expressions are evaluated first, and then compared, so
  2630        the expressions are evaluated first, and then compared, so
  5254 
       
  5255        "1" is compared with "10".
  2631        "1" is compared with "10".
  5256 
       
  5257        Do *not* use with packrat parsing enabled.
  2632        Do *not* use with packrat parsing enabled.
  5258 
       
  5259     """
  2633     """
  5260 
       
  5261     rep = Forward()
  2634     rep = Forward()
  5262 
       
  5263     e2 = expr.copy()
  2635     e2 = expr.copy()
  5264 
       
  5265     rep << e2
  2636     rep << e2
  5266 
       
  5267     def copyTokenToRepeater(s,l,t):
  2637     def copyTokenToRepeater(s,l,t):
  5268 
       
  5269         matchTokens = _flatten(t.asList())
  2638         matchTokens = _flatten(t.asList())
  5270 
       
  5271         def mustMatchTheseTokens(s,l,t):
  2639         def mustMatchTheseTokens(s,l,t):
  5272 
       
  5273             theseTokens = _flatten(t.asList())
  2640             theseTokens = _flatten(t.asList())
  5274 
       
  5275             if  theseTokens != matchTokens:
  2641             if  theseTokens != matchTokens:
  5276 
       
  5277                 raise ParseException("",0,"")
  2642                 raise ParseException("",0,"")
  5278 
       
  5279         rep.setParseAction( mustMatchTheseTokens )
  2643         rep.setParseAction( mustMatchTheseTokens )
  5280 
       
  5281     expr.addParseAction(copyTokenToRepeater)
  2644     expr.addParseAction(copyTokenToRepeater)
  5282 
       
  5283     return rep
  2645     return rep
  5284 
  2646     
  5285     
       
  5286 
       
  5287 def _escapeRegexRangeChars(s):
  2647 def _escapeRegexRangeChars(s):
  5288 
       
  5289     #~  escape these chars: ^-]
  2648     #~  escape these chars: ^-]
  5290 
       
  5291     for c in r"\^-]":
  2649     for c in r"\^-]":
  5292 
       
  5293         s = s.replace(c,"\\"+c)
  2650         s = s.replace(c,"\\"+c)
  5294 
       
  5295     s = s.replace("\n",r"\n")
  2651     s = s.replace("\n",r"\n")
  5296 
       
  5297     s = s.replace("\t",r"\t")
  2652     s = s.replace("\t",r"\t")
  5298 
       
  5299     return _ustr(s)
  2653     return _ustr(s)
  5300 
  2654     
  5301     
       
  5302 
       
  5303 def oneOf( strs, caseless=False, useRegex=True ):
  2655 def oneOf( strs, caseless=False, useRegex=True ):
  5304 
       
  5305     """Helper to quickly define a set of alternative Literals, and makes sure to do 
  2656     """Helper to quickly define a set of alternative Literals, and makes sure to do 
  5306 
       
  5307        longest-first testing when there is a conflict, regardless of the input order, 
  2657        longest-first testing when there is a conflict, regardless of the input order, 
  5308 
       
  5309        but returns a MatchFirst for best performance.  
  2658        but returns a MatchFirst for best performance.  
  5310 
       
  5311        
  2659        
  5312 
       
  5313        Parameters:
  2660        Parameters:
  5314 
       
  5315         - strs - a string of space-delimited literals, or a list of string literals
  2661         - strs - a string of space-delimited literals, or a list of string literals
  5316 
       
  5317         - caseless - (default=False) - treat all literals as caseless
  2662         - caseless - (default=False) - treat all literals as caseless
  5318 
       
  5319         - useRegex - (default=True) - as an optimization, will generate a Regex
  2663         - useRegex - (default=True) - as an optimization, will generate a Regex
  5320 
       
  5321           object; otherwise, will generate a MatchFirst object (if caseless=True, or
  2664           object; otherwise, will generate a MatchFirst object (if caseless=True, or
  5322 
       
  5323           if creating a Regex raises an exception)
  2665           if creating a Regex raises an exception)
  5324 
       
  5325     """
  2666     """
  5326 
       
  5327     if caseless:
  2667     if caseless:
  5328 
       
  5329         isequal = ( lambda a,b: a.upper() == b.upper() )
  2668         isequal = ( lambda a,b: a.upper() == b.upper() )
  5330 
       
  5331         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  2669         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  5332 
       
  5333         parseElementClass = CaselessLiteral
  2670         parseElementClass = CaselessLiteral
  5334 
       
  5335     else:
  2671     else:
  5336 
       
  5337         isequal = ( lambda a,b: a == b )
  2672         isequal = ( lambda a,b: a == b )
  5338 
       
  5339         masks = ( lambda a,b: b.startswith(a) )
  2673         masks = ( lambda a,b: b.startswith(a) )
  5340 
       
  5341         parseElementClass = Literal
  2674         parseElementClass = Literal
  5342 
  2675     
  5343     
       
  5344 
       
  5345     if isinstance(strs,(list,tuple)):
  2676     if isinstance(strs,(list,tuple)):
  5346 
       
  5347         symbols = strs[:]
  2677         symbols = strs[:]
  5348 
       
  5349     elif isinstance(strs,basestring):
  2678     elif isinstance(strs,basestring):
  5350 
       
  5351         symbols = strs.split()
  2679         symbols = strs.split()
  5352 
       
  5353     else:
  2680     else:
  5354 
       
  5355         warnings.warn("Invalid argument to oneOf, expected string or list",
  2681         warnings.warn("Invalid argument to oneOf, expected string or list",
  5356 
       
  5357                 SyntaxWarning, stacklevel=2)
  2682                 SyntaxWarning, stacklevel=2)
  5358 
  2683         
  5359         
       
  5360 
       
  5361     i = 0
  2684     i = 0
  5362 
       
  5363     while i < len(symbols)-1:
  2685     while i < len(symbols)-1:
  5364 
       
  5365         cur = symbols[i]
  2686         cur = symbols[i]
  5366 
       
  5367         for j,other in enumerate(symbols[i+1:]):
  2687         for j,other in enumerate(symbols[i+1:]):
  5368 
       
  5369             if ( isequal(other, cur) ):
  2688             if ( isequal(other, cur) ):
  5370 
       
  5371                 del symbols[i+j+1]
  2689                 del symbols[i+j+1]
  5372 
       
  5373                 break
  2690                 break
  5374 
       
  5375             elif ( masks(cur, other) ):
  2691             elif ( masks(cur, other) ):
  5376 
       
  5377                 del symbols[i+j+1]
  2692                 del symbols[i+j+1]
  5378 
       
  5379                 symbols.insert(i,other)
  2693                 symbols.insert(i,other)
  5380 
       
  5381                 cur = other
  2694                 cur = other
  5382 
       
  5383                 break
  2695                 break
  5384 
  2696         else:
  5385         else:
       
  5386 
       
  5387             i += 1
  2697             i += 1
  5388 
  2698 
  5389 
       
  5390 
       
  5391     if not caseless and useRegex:
  2699     if not caseless and useRegex:
  5392 
       
  5393         #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
  2700         #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
  5394 
       
  5395         try:
  2701         try:
  5396 
       
  5397             if len(symbols)==len("".join(symbols)):
  2702             if len(symbols)==len("".join(symbols)):
  5398 
       
  5399                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
  2703                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
  5400 
       
  5401             else:
  2704             else:
  5402 
       
  5403                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
  2705                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
  5404 
       
  5405         except:
  2706         except:
  5406 
       
  5407             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  2707             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  5408 
       
  5409                     SyntaxWarning, stacklevel=2)
  2708                     SyntaxWarning, stacklevel=2)
  5410 
  2709 
  5411 
  2710 
  5412 
       
  5413 
       
  5414 
       
  5415     # last resort, just use MatchFirst
  2711     # last resort, just use MatchFirst
  5416 
       
  5417     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
  2712     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
  5418 
  2713 
  5419 
       
  5420 
       
  5421 def dictOf( key, value ):
  2714 def dictOf( key, value ):
  5422 
       
  5423     """Helper to easily and clearly define a dictionary by specifying the respective patterns
  2715     """Helper to easily and clearly define a dictionary by specifying the respective patterns
  5424 
       
  5425        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
  2716        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
  5426 
       
  5427        in the proper order.  The key pattern can include delimiting markers or punctuation,
  2717        in the proper order.  The key pattern can include delimiting markers or punctuation,
  5428 
       
  5429        as long as they are suppressed, thereby leaving the significant key text.  The value
  2718        as long as they are suppressed, thereby leaving the significant key text.  The value
  5430 
       
  5431        pattern can include named results, so that the Dict results can include named token 
  2719        pattern can include named results, so that the Dict results can include named token 
  5432 
       
  5433        fields.
  2720        fields.
  5434 
       
  5435     """
  2721     """
  5436 
       
  5437     return Dict( ZeroOrMore( Group ( key + value ) ) )
  2722     return Dict( ZeroOrMore( Group ( key + value ) ) )
  5438 
  2723 
  5439 
       
  5440 
       
  5441 _bslash = "\\"
  2724 _bslash = "\\"
  5442 
       
  5443 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
  2725 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
  5444 
  2726 
  5445 
       
  5446 
       
  5447 # convenience constants for positional expressions
  2727 # convenience constants for positional expressions
  5448 
       
  5449 empty       = Empty().setName("empty")
  2728 empty       = Empty().setName("empty")
  5450 
       
  5451 lineStart   = LineStart().setName("lineStart")
  2729 lineStart   = LineStart().setName("lineStart")
  5452 
       
  5453 lineEnd     = LineEnd().setName("lineEnd")
  2730 lineEnd     = LineEnd().setName("lineEnd")
  5454 
       
  5455 stringStart = StringStart().setName("stringStart")
  2731 stringStart = StringStart().setName("stringStart")
  5456 
       
  5457 stringEnd   = StringEnd().setName("stringEnd")
  2732 stringEnd   = StringEnd().setName("stringEnd")
  5458 
  2733 
  5459 
       
  5460 
       
  5461 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  2734 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  5462 
       
  5463 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
  2735 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
  5464 
       
  5465 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
  2736 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
  5466 
       
  5467 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
  2737 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
  5468 
       
  5469 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
  2738 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
  5470 
       
  5471 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  2739 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  5472 
       
  5473 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  2740 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  5474 
  2741 
  5475 
       
  5476 
       
  5477 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
  2742 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
  5478 
  2743         
  5479         
       
  5480 
       
  5481 def srange(s):
  2744 def srange(s):
  5482 
       
  5483     r"""Helper to easily define string ranges for use in Word construction.  Borrows
  2745     r"""Helper to easily define string ranges for use in Word construction.  Borrows
  5484 
       
  5485        syntax from regexp '[]' string range definitions::
  2746        syntax from regexp '[]' string range definitions::
  5486 
       
  5487           srange("[0-9]")   -> "0123456789"
  2747           srange("[0-9]")   -> "0123456789"
  5488 
       
  5489           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
  2748           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
  5490 
       
  5491           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  2749           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  5492 
       
  5493        The input string must be enclosed in []'s, and the returned string is the expanded 
  2750        The input string must be enclosed in []'s, and the returned string is the expanded 
  5494 
       
  5495        character set joined into a single string.
  2751        character set joined into a single string.
  5496 
       
  5497        The values enclosed in the []'s may be::
  2752        The values enclosed in the []'s may be::
  5498 
       
  5499           a single character
  2753           a single character
  5500 
       
  5501           an escaped character with a leading backslash (such as \- or \])
  2754           an escaped character with a leading backslash (such as \- or \])
  5502 
       
  5503           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
  2755           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
  5504 
       
  5505           an escaped octal character with a leading '\0' (\041, which is a '!' character)
  2756           an escaped octal character with a leading '\0' (\041, which is a '!' character)
  5506 
       
  5507           a range of any of the above, separated by a dash ('a-z', etc.)
  2757           a range of any of the above, separated by a dash ('a-z', etc.)
  5508 
       
  5509           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
  2758           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
  5510 
       
  5511     """
  2759     """
  5512 
       
  5513     try:
  2760     try:
  5514 
       
  5515         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
  2761         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
  5516 
       
  5517     except:
  2762     except:
  5518 
       
  5519         return ""
  2763         return ""
  5520 
  2764 
  5521 
       
  5522 
       
  5523 def replaceWith(replStr):
  2765 def replaceWith(replStr):
  5524 
       
  5525     """Helper method for common parse actions that simply return a literal value.  Especially 
  2766     """Helper method for common parse actions that simply return a literal value.  Especially 
  5526 
       
  5527        useful when used with transformString().
  2767        useful when used with transformString().
  5528 
       
  5529     """
  2768     """
  5530 
       
  5531     def _replFunc(*args):
  2769     def _replFunc(*args):
  5532 
       
  5533         return [replStr]
  2770         return [replStr]
  5534 
       
  5535     return _replFunc
  2771     return _replFunc
  5536 
  2772 
  5537 
       
  5538 
       
  5539 def removeQuotes(s,l,t):
  2773 def removeQuotes(s,l,t):
  5540 
       
  5541     """Helper parse action for removing quotation marks from parsed quoted strings.
  2774     """Helper parse action for removing quotation marks from parsed quoted strings.
  5542 
       
  5543        To use, add this parse action to quoted string using::
  2775        To use, add this parse action to quoted string using::
  5544 
       
  5545          quotedString.setParseAction( removeQuotes )
  2776          quotedString.setParseAction( removeQuotes )
  5546 
       
  5547     """
  2777     """
  5548 
       
  5549     return t[0][1:-1]
  2778     return t[0][1:-1]
  5550 
  2779 
  5551 
       
  5552 
       
  5553 def upcaseTokens(s,l,t):
  2780 def upcaseTokens(s,l,t):
  5554 
       
  5555     """Helper parse action to convert tokens to upper case."""
  2781     """Helper parse action to convert tokens to upper case."""
  5556 
       
  5557     return [ str(tt).upper() for tt in t ]
  2782     return [ str(tt).upper() for tt in t ]
  5558 
  2783 
  5559 
       
  5560 
       
  5561 def downcaseTokens(s,l,t):
  2784 def downcaseTokens(s,l,t):
  5562 
       
  5563     """Helper parse action to convert tokens to lower case."""
  2785     """Helper parse action to convert tokens to lower case."""
  5564 
       
  5565     return [ str(tt).lower() for tt in t ]
  2786     return [ str(tt).lower() for tt in t ]
  5566 
  2787 
  5567 
       
  5568 
       
  5569 def keepOriginalText(s,startLoc,t):
  2788 def keepOriginalText(s,startLoc,t):
  5570 
       
  5571     import inspect
  2789     import inspect
  5572 
       
  5573     """Helper parse action to preserve original parsed text,
  2790     """Helper parse action to preserve original parsed text,
  5574 
       
  5575        overriding any nested parse actions."""
  2791        overriding any nested parse actions."""
  5576 
       
  5577     f = inspect.stack()[1][0]
  2792     f = inspect.stack()[1][0]
  5578 
       
  5579     try:
  2793     try:
  5580 
       
  5581         endloc = f.f_locals["loc"]
  2794         endloc = f.f_locals["loc"]
  5582 
       
  5583     finally:
  2795     finally:
  5584 
       
  5585         del f
  2796         del f
  5586 
       
  5587     return s[startLoc:endloc]
  2797     return s[startLoc:endloc]
  5588 
  2798         
  5589         
       
  5590 
       
  5591 def _makeTags(tagStr, xml):
  2799 def _makeTags(tagStr, xml):
  5592 
       
  5593     """Internal helper to construct opening and closing tag expressions, given a tag name"""
  2800     """Internal helper to construct opening and closing tag expressions, given a tag name"""
  5594 
       
  5595     if isinstance(tagStr,basestring):
  2801     if isinstance(tagStr,basestring):
  5596 
       
  5597         resname = tagStr
  2802         resname = tagStr
  5598 
       
  5599         tagStr = Keyword(tagStr, caseless=not xml)
  2803         tagStr = Keyword(tagStr, caseless=not xml)
  5600 
       
  5601     else:
  2804     else:
  5602 
       
  5603         resname = tagStr.name
  2805         resname = tagStr.name
  5604 
  2806         
  5605         
       
  5606 
       
  5607     tagAttrName = Word(alphas,alphanums+"_-")
  2807     tagAttrName = Word(alphas,alphanums+"_-")
  5608 
       
  5609     if (xml):
  2808     if (xml):
  5610 
       
  5611         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  2809         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  5612 
       
  5613         openTag = Suppress("<") + tagStr + \
  2810         openTag = Suppress("<") + tagStr + \
  5614                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  2811                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  5615                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  2812                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  5616 
       
  5617     else:
  2813     else:
  5618 
       
  5619         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
  2814         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
  5620 
       
  5621         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  2815         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  5622 
       
  5623         openTag = Suppress("<") + tagStr + \
  2816         openTag = Suppress("<") + tagStr + \
  5624                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  2817                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  5625 
       
  5626                 Suppress("=") + tagAttrValue ))) + \
  2818                 Suppress("=") + tagAttrValue ))) + \
  5627                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  2819                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  5628 
       
  5629     closeTag = Combine("</" + tagStr + ">")
  2820     closeTag = Combine("</" + tagStr + ">")
  5630 
  2821     
  5631     
       
  5632 
       
  5633     openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
  2822     openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
  5634 
       
  5635     closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
  2823     closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
  5636 
  2824     
  5637     
       
  5638 
       
  5639     return openTag, closeTag
  2825     return openTag, closeTag
  5640 
  2826 
  5641 
       
  5642 
       
  5643 def makeHTMLTags(tagStr):
  2827 def makeHTMLTags(tagStr):
  5644 
       
  5645     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
  2828     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
  5646 
       
  5647     return _makeTags( tagStr, False )
  2829     return _makeTags( tagStr, False )
  5648 
  2830 
  5649 
       
  5650 
       
  5651 def makeXMLTags(tagStr):
  2831 def makeXMLTags(tagStr):
  5652 
       
  5653     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
  2832     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
  5654 
       
  5655     return _makeTags( tagStr, True )
  2833     return _makeTags( tagStr, True )
  5656 
  2834 
  5657 
       
  5658 
       
  5659 opAssoc = _Constants()
  2835 opAssoc = _Constants()
  5660 
       
  5661 opAssoc.LEFT = object()
  2836 opAssoc.LEFT = object()
  5662 
       
  5663 opAssoc.RIGHT = object()
  2837 opAssoc.RIGHT = object()
  5664 
  2838 
  5665 
       
  5666 
       
  5667 def operatorPrecedence( baseExpr, opList ):
  2839 def operatorPrecedence( baseExpr, opList ):
  5668 
       
  5669     """Helper method for constructing grammars of expressions made up of 
  2840     """Helper method for constructing grammars of expressions made up of 
  5670 
       
  5671        operators working in a precedence hierarchy.  Operators may be unary or
  2841        operators working in a precedence hierarchy.  Operators may be unary or
  5672 
       
  5673        binary, left- or right-associative.  Parse actions can also be attached
  2842        binary, left- or right-associative.  Parse actions can also be attached
  5674 
       
  5675        to operator expressions.
  2843        to operator expressions.
  5676 
  2844         
  5677         
       
  5678 
       
  5679        Parameters:
  2845        Parameters:
  5680 
       
  5681         - baseExpr - expression representing the most basic element for the nested 
  2846         - baseExpr - expression representing the most basic element for the nested 
  5682 
       
  5683         - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form
  2847         - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form
  5684 
       
  5685           (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  2848           (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  5686 
       
  5687            - opExpr is the pyparsing expression for the operator;
  2849            - opExpr is the pyparsing expression for the operator;
  5688 
       
  5689               may also be a string, which will be converted to a Literal
  2850               may also be a string, which will be converted to a Literal
  5690 
       
  5691            - numTerms is the number of terms for this operator (must
  2851            - numTerms is the number of terms for this operator (must
  5692 
       
  5693               be 1 or 2)
  2852               be 1 or 2)
  5694 
       
  5695            - rightLeftAssoc is the indicator whether the operator is
  2853            - rightLeftAssoc is the indicator whether the operator is
  5696 
       
  5697               right or left associative, using the pyparsing-defined
  2854               right or left associative, using the pyparsing-defined
  5698 
       
  5699               constants opAssoc.RIGHT and opAssoc.LEFT.
  2855               constants opAssoc.RIGHT and opAssoc.LEFT.
  5700 
       
  5701            - parseAction is the parse action to be associated with 
  2856            - parseAction is the parse action to be associated with 
  5702 
       
  5703               expressions matching this operator expression (the
  2857               expressions matching this operator expression (the
  5704 
       
  5705               parse action tuple member may be omitted)
  2858               parse action tuple member may be omitted)
  5706 
       
  5707     """
  2859     """
  5708 
       
  5709     ret = Forward()
  2860     ret = Forward()
  5710 
       
  5711     lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
  2861     lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
  5712 
       
  5713     for i,operDef in enumerate(opList):
  2862     for i,operDef in enumerate(opList):
  5714 
       
  5715         opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  2863         opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  5716 
       
  5717         thisExpr = Forward().setName("expr%d" % i)
  2864         thisExpr = Forward().setName("expr%d" % i)
  5718 
       
  5719         if rightLeftAssoc == opAssoc.LEFT:
  2865         if rightLeftAssoc == opAssoc.LEFT:
  5720 
       
  5721             if arity == 1:
  2866             if arity == 1:
  5722 
       
  5723                 matchExpr = Group( lastExpr + opExpr )
  2867                 matchExpr = Group( lastExpr + opExpr )
  5724 
       
  5725             elif arity == 2:
  2868             elif arity == 2:
  5726 
       
  5727                 matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  2869                 matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  5728 
       
  5729             else:
  2870             else:
  5730 
       
  5731                 raise ValueError, "operator must be unary (1) or binary (2)"
  2871                 raise ValueError, "operator must be unary (1) or binary (2)"
  5732 
       
  5733         elif rightLeftAssoc == opAssoc.RIGHT:
  2872         elif rightLeftAssoc == opAssoc.RIGHT:
  5734 
       
  5735             if arity == 1:
  2873             if arity == 1:
  5736 
       
  5737                 # try to avoid LR with this extra test
  2874                 # try to avoid LR with this extra test
  5738 
       
  5739                 if not isinstance(opExpr, Optional):
  2875                 if not isinstance(opExpr, Optional):
  5740 
       
  5741                     opExpr = Optional(opExpr)
  2876                     opExpr = Optional(opExpr)
  5742 
       
  5743                 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
  2877                 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 
  5744 
       
  5745             elif arity == 2:
  2878             elif arity == 2:
  5746 
       
  5747                 matchExpr = Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  2879                 matchExpr = Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  5748 
       
  5749             else:
  2880             else:
  5750 
       
  5751                 raise ValueError, "operator must be unary (1) or binary (2)"
  2881                 raise ValueError, "operator must be unary (1) or binary (2)"
  5752 
  2882         else:
  5753         else:
       
  5754 
       
  5755             raise ValueError, "operator must indicate right or left associativity"
  2883             raise ValueError, "operator must indicate right or left associativity"
  5756 
       
  5757         if pa:
  2884         if pa:
  5758 
       
  5759             matchExpr.setParseAction( pa )
  2885             matchExpr.setParseAction( pa )
  5760 
       
  5761         thisExpr << ( matchExpr | lastExpr )
  2886         thisExpr << ( matchExpr | lastExpr )
  5762 
       
  5763         lastExpr = thisExpr
  2887         lastExpr = thisExpr
  5764 
       
  5765     ret << lastExpr
  2888     ret << lastExpr
  5766 
       
  5767     return ret
  2889     return ret
  5768 
  2890 
  5769 
       
  5770 
       
  5771 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  2891 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  5772 
       
  5773 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  2892 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  5774 
  2893 
  5775 
       
  5776 
       
  5777 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
  2894 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
  5778 
       
  5779 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
  2895 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
  5780 
       
  5781 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
  2896 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
  5782 
  2897 
  5783 
       
  5784 
       
  5785 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_"))
  2898 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_"))
  5786 
       
  5787 commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
  2899 commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
  5788 
       
  5789 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
  2900 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
  5790 
       
  5791 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
  2901 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
  5792 
  2902     
  5793     
       
  5794 
       
  5795 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  2903 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  5796 
       
  5797 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
  2904 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
  5798 
  2905 
  5799 
       
  5800 
       
  5801 htmlComment = Regex(r"<!--[\s\S]*?-->")
  2906 htmlComment = Regex(r"<!--[\s\S]*?-->")
  5802 
       
  5803 restOfLine = Regex(r".*").leaveWhitespace()
  2907 restOfLine = Regex(r".*").leaveWhitespace()
  5804 
       
  5805 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
  2908 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
  5806 
       
  5807 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
  2909 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
  5808 
  2910 
  5809 
       
  5810 
       
  5811 javaStyleComment = cppStyleComment
  2911 javaStyleComment = cppStyleComment
  5812 
       
  5813 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  2912 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  5814 
       
  5815 _noncomma = "".join( [ c for c in printables if c != "," ] )
  2913 _noncomma = "".join( [ c for c in printables if c != "," ] )
  5816 
       
  5817 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
  2914 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 
  5818 
       
  5819                                   Optional( Word(" \t") + 
  2915                                   Optional( Word(" \t") + 
  5820 
       
  5821                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  2916                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  5822 
       
  5823 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
  2917 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
  5824 
  2918 
  5825 
  2919 
  5826 
       
  5827 
       
  5828 
       
  5829 if __name__ == "__main__":
  2920 if __name__ == "__main__":
  5830 
  2921 
  5831 
       
  5832 
       
  5833     def test( teststring ):
  2922     def test( teststring ):
  5834 
       
  5835         print teststring,"->",
  2923         print teststring,"->",
  5836 
       
  5837         try:
  2924         try:
  5838 
       
  5839             tokens = simpleSQL.parseString( teststring )
  2925             tokens = simpleSQL.parseString( teststring )
  5840 
       
  5841             tokenlist = tokens.asList()
  2926             tokenlist = tokens.asList()
  5842 
       
  5843             print tokenlist
  2927             print tokenlist
  5844 
       
  5845             print "tokens = ",        tokens
  2928             print "tokens = ",        tokens
  5846 
       
  5847             print "tokens.columns =", tokens.columns
  2929             print "tokens.columns =", tokens.columns
  5848 
       
  5849             print "tokens.tables =",  tokens.tables
  2930             print "tokens.tables =",  tokens.tables
  5850 
       
  5851             print tokens.asXML("SQL",True)
  2931             print tokens.asXML("SQL",True)
  5852 
       
  5853         except ParseException, err:
  2932         except ParseException, err:
  5854 
       
  5855             print err.line
  2933             print err.line
  5856 
       
  5857             print " "*(err.column-1) + "^"
  2934             print " "*(err.column-1) + "^"
  5858 
       
  5859             print err
  2935             print err
  5860 
       
  5861         print
  2936         print
  5862 
  2937 
  5863 
       
  5864 
       
  5865     selectToken    = CaselessLiteral( "select" )
  2938     selectToken    = CaselessLiteral( "select" )
  5866 
       
  5867     fromToken      = CaselessLiteral( "from" )
  2939     fromToken      = CaselessLiteral( "from" )
  5868 
  2940 
  5869 
       
  5870 
       
  5871     ident          = Word( alphas, alphanums + "_$" )
  2941     ident          = Word( alphas, alphanums + "_$" )
  5872 
       
  5873     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
  2942     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
  5874 
       
  5875     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
  2943     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
  5876 
       
  5877     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
  2944     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
  5878 
       
  5879     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
  2945     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
  5880 
       
  5881     simpleSQL      = ( selectToken + \
  2946     simpleSQL      = ( selectToken + \
  5882 
       
  5883                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
  2947                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
  5884 
       
  5885                      fromToken + \
  2948                      fromToken + \
  5886 
       
  5887                      tableNameList.setResultsName( "tables" ) )
  2949                      tableNameList.setResultsName( "tables" ) )
  5888 
  2950     
  5889     
       
  5890 
       
  5891     test( "SELECT * from XYZZY, ABC" )
  2951     test( "SELECT * from XYZZY, ABC" )
  5892 
       
  5893     test( "select * from SYS.XYZZY" )
  2952     test( "select * from SYS.XYZZY" )
  5894 
       
  5895     test( "Select A from Sys.dual" )
  2953     test( "Select A from Sys.dual" )
  5896 
       
  5897     test( "Select AA,BB,CC from Sys.dual" )
  2954     test( "Select AA,BB,CC from Sys.dual" )
  5898 
       
  5899     test( "Select A, B, C from Sys.dual" )
  2955     test( "Select A, B, C from Sys.dual" )
  5900 
       
  5901     test( "Select A, B, C from Sys.dual" )
  2956     test( "Select A, B, C from Sys.dual" )
  5902 
       
  5903     test( "Xelect A, B, C from Sys.dual" )
  2957     test( "Xelect A, B, C from Sys.dual" )
  5904 
       
  5905     test( "Select A, B, C frox Sys.dual" )
  2958     test( "Select A, B, C frox Sys.dual" )
  5906 
       
  5907     test( "Select" )
  2959     test( "Select" )
  5908 
       
  5909     test( "Select ^^^ frox Sys.dual" )
  2960     test( "Select ^^^ frox Sys.dual" )
  5910 
       
  5911     test( "Select A, B, C from Sys.dual, Table2   " )
  2961     test( "Select A, B, C from Sys.dual, Table2   " )
  5912