3
+ − 1
# module pyparsing.py
+ − 2
#
+ − 3
# Copyright (c) 2003-2006 Paul T. McGuire
+ − 4
#
+ − 5
# Permission is hereby granted, free of charge, to any person obtaining
+ − 6
# a copy of this software and associated documentation files (the
+ − 7
# "Software"), to deal in the Software without restriction, including
+ − 8
# without limitation the rights to use, copy, modify, merge, publish,
+ − 9
# distribute, sublicense, and/or sell copies of the Software, and to
+ − 10
# permit persons to whom the Software is furnished to do so, subject to
+ − 11
# the following conditions:
+ − 12
#
+ − 13
# The above copyright notice and this permission notice shall be
+ − 14
# included in all copies or substantial portions of the Software.
+ − 15
#
+ − 16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ − 17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ − 18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ − 19
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ − 20
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ − 21
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ − 22
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ − 23
#
+ − 24
#from __future__ import generators
+ − 25
+ − 26
__doc__ = \
+ − 27
"""
+ − 28
pyparsing module - Classes and methods to define and execute parsing grammars
+ − 29
+ − 30
The pyparsing module is an alternative approach to creating and executing simple grammars,
+ − 31
vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
+ − 32
don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
+ − 33
provides a library of classes that you use to construct the grammar directly in Python.
+ − 34
+ − 35
Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
+ − 36
+ − 37
from pyparsing import Word, alphas
+ − 38
+ − 39
# define grammar of a greeting
+ − 40
greet = Word( alphas ) + "," + Word( alphas ) + "!"
+ − 41
+ − 42
hello = "Hello, World!"
+ − 43
print hello, "->", greet.parseString( hello )
+ − 44
+ − 45
The program outputs the following::
+ − 46
+ − 47
Hello, World! -> ['Hello', ',', 'World', '!']
+ − 48
+ − 49
The Python representation of the grammar is quite readable, owing to the self-explanatory
+ − 50
class names, and the use of '+', '|' and '^' operators.
+ − 51
+ − 52
The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
+ − 53
object with named attributes.
+ − 54
+ − 55
The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
+ − 56
- extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
+ − 57
- quoted strings
+ − 58
- embedded comments
+ − 59
"""
+ − 60
__version__ = "1.4.5"
+ − 61
__versionTime__ = "16 December 2006 07:20"
+ − 62
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
+ − 63
+ − 64
import string
+ − 65
import copy,sys
+ − 66
import warnings
+ − 67
import re
+ − 68
import sre_constants
+ − 69
import xml.sax.saxutils
+ − 70
#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
+ − 71
+ − 72
def _ustr(obj):
+ − 73
"""Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
+ − 74
str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
+ − 75
then < returns the unicode object | encodes it with the default encoding | ... >.
+ − 76
"""
+ − 77
try:
+ − 78
# If this works, then _ustr(obj) has the same behaviour as str(obj), so
+ − 79
# it won't break any existing code.
+ − 80
return str(obj)
+ − 81
+ − 82
except UnicodeEncodeError, e:
+ − 83
# The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
+ − 84
# state that "The return value must be a string object". However, does a
+ − 85
# unicode object (being a subclass of basestring) count as a "string
+ − 86
# object"?
+ − 87
# If so, then return a unicode object:
+ − 88
return unicode(obj)
+ − 89
# Else encode it... but how? There are many choices... :)
+ − 90
# Replace unprintables with escape codes?
+ − 91
#return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
+ − 92
# Replace unprintables with question marks?
+ − 93
#return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
+ − 94
# ...
+ − 95
+ − 96
def _str2dict(strg):
+ − 97
return dict( [(c,0) for c in strg] )
+ − 98
#~ return set( [c for c in strg] )
+ − 99
+ − 100
class _Constants(object):
+ − 101
pass
+ − 102
+ − 103
alphas = string.lowercase + string.uppercase
+ − 104
nums = string.digits
+ − 105
hexnums = nums + "ABCDEFabcdef"
+ − 106
alphanums = alphas + nums
+ − 107
+ − 108
class ParseBaseException(Exception):
+ − 109
"""base exception class for all parsing runtime exceptions"""
+ − 110
__slots__ = ( "loc","msg","pstr","parserElement" )
+ − 111
# Performance tuning: we construct a *lot* of these, so keep this
+ − 112
# constructor as small and fast as possible
+ − 113
def __init__( self, pstr, loc, msg, elem=None ):
+ − 114
self.loc = loc
+ − 115
self.msg = msg
+ − 116
self.pstr = pstr
+ − 117
self.parserElement = elem
+ − 118
+ − 119
def __getattr__( self, aname ):
+ − 120
"""supported attributes by name are:
+ − 121
- lineno - returns the line number of the exception text
+ − 122
- col - returns the column number of the exception text
+ − 123
- line - returns the line containing the exception text
+ − 124
"""
+ − 125
if( aname == "lineno" ):
+ − 126
return lineno( self.loc, self.pstr )
+ − 127
elif( aname in ("col", "column") ):
+ − 128
return col( self.loc, self.pstr )
+ − 129
elif( aname == "line" ):
+ − 130
return line( self.loc, self.pstr )
+ − 131
else:
+ − 132
raise AttributeError, aname
+ − 133
+ − 134
def __str__( self ):
+ − 135
return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
+ − 136
def __repr__( self ):
+ − 137
return _ustr(self)
+ − 138
def markInputline( self, markerString = ">!<" ):
+ − 139
"""Extracts the exception line from the input string, and marks
+ − 140
the location of the exception with a special symbol.
+ − 141
"""
+ − 142
line_str = self.line
+ − 143
line_column = self.column - 1
+ − 144
if markerString:
+ − 145
line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]])
+ − 146
return line_str.strip()
+ − 147
+ − 148
class ParseException(ParseBaseException):
+ − 149
"""exception thrown when parse expressions don't match class"""
+ − 150
"""supported attributes by name are:
+ − 151
- lineno - returns the line number of the exception text
+ − 152
- col - returns the column number of the exception text
+ − 153
- line - returns the line containing the exception text
+ − 154
"""
+ − 155
pass
+ − 156
+ − 157
class ParseFatalException(ParseBaseException):
+ − 158
"""user-throwable exception thrown when inconsistent parse content
+ − 159
is found; stops all parsing immediately"""
+ − 160
pass
+ − 161
+ − 162
class ReparseException(ParseBaseException):
+ − 163
def __init_( self, newstring, restartLoc ):
+ − 164
self.newParseText = newstring
+ − 165
self.reparseLoc = restartLoc
+ − 166
+ − 167
+ − 168
class RecursiveGrammarException(Exception):
+ − 169
"""exception thrown by validate() if the grammar could be improperly recursive"""
+ − 170
def __init__( self, parseElementList ):
+ − 171
self.parseElementTrace = parseElementList
+ − 172
+ − 173
def __str__( self ):
+ − 174
return "RecursiveGrammarException: %s" % self.parseElementTrace
+ − 175
+ − 176
class ParseResults(object):
+ − 177
"""Structured parse results, to provide multiple means of access to the parsed data:
+ − 178
- as a list (len(results))
+ − 179
- by list index (results[0], results[1], etc.)
+ − 180
- by attribute (results.<resultsName>)
+ − 181
"""
+ − 182
__slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames" )
+ − 183
def __new__(cls, toklist, name=None, asList=True, modal=True ):
+ − 184
if isinstance(toklist, cls):
+ − 185
return toklist
+ − 186
retobj = object.__new__(cls)
+ − 187
retobj.__doinit = True
+ − 188
return retobj
+ − 189
+ − 190
# Performance tuning: we construct a *lot* of these, so keep this
+ − 191
# constructor as small and fast as possible
+ − 192
def __init__( self, toklist, name=None, asList=True, modal=True ):
+ − 193
if self.__doinit:
+ − 194
self.__doinit = False
+ − 195
self.__name = None
+ − 196
self.__parent = None
+ − 197
self.__accumNames = {}
+ − 198
if isinstance(toklist, list):
+ − 199
self.__toklist = toklist[:]
+ − 200
else:
+ − 201
self.__toklist = [toklist]
+ − 202
self.__tokdict = dict()
+ − 203
+ − 204
# this line is related to debugging the asXML bug
+ − 205
#~ asList = False
+ − 206
+ − 207
if name:
+ − 208
if not modal:
+ − 209
self.__accumNames[name] = 0
+ − 210
if isinstance(name,int):
+ − 211
name = _ustr(name) # will always return a str, but use _ustr for consistency
+ − 212
self.__name = name
+ − 213
if not toklist in (None,'',[]):
+ − 214
if isinstance(toklist,basestring):
+ − 215
toklist = [ toklist ]
+ − 216
if asList:
+ − 217
if isinstance(toklist,ParseResults):
+ − 218
self[name] = (toklist.copy(),-1)
+ − 219
else:
+ − 220
self[name] = (ParseResults(toklist[0]),-1)
+ − 221
self[name].__name = name
+ − 222
else:
+ − 223
try:
+ − 224
self[name] = toklist[0]
+ − 225
except (KeyError,TypeError):
+ − 226
self[name] = toklist
+ − 227
+ − 228
def __getitem__( self, i ):
+ − 229
if isinstance( i, (int,slice) ):
+ − 230
return self.__toklist[i]
+ − 231
else:
+ − 232
if i not in self.__accumNames:
+ − 233
return self.__tokdict[i][-1][0]
+ − 234
else:
+ − 235
return ParseResults([ v[0] for v in self.__tokdict[i] ])
+ − 236
+ − 237
def __setitem__( self, k, v ):
+ − 238
if isinstance(v,tuple):
+ − 239
self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
+ − 240
sub = v[0]
+ − 241
elif isinstance(k,int):
+ − 242
self.__toklist[k] = v
+ − 243
sub = v
+ − 244
else:
+ − 245
self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
+ − 246
sub = v
+ − 247
if isinstance(sub,ParseResults):
+ − 248
sub.__parent = self
+ − 249
+ − 250
def __delitem__( self, i ):
+ − 251
if isinstance(i,(int,slice)):
+ − 252
del self.__toklist[i]
+ − 253
else:
+ − 254
del self._tokdict[i]
+ − 255
+ − 256
def __contains__( self, k ):
+ − 257
return self.__tokdict.has_key(k)
+ − 258
+ − 259
def __len__( self ): return len( self.__toklist )
+ − 260
def __nonzero__( self ): return len( self.__toklist ) > 0
+ − 261
def __iter__( self ): return iter( self.__toklist )
+ − 262
def keys( self ):
+ − 263
"""Returns all named result keys."""
+ − 264
return self.__tokdict.keys()
+ − 265
+ − 266
def items( self ):
+ − 267
"""Returns all named result keys and values as a list of tuples."""
+ − 268
return [(k,self[k]) for k in self.__tokdict.keys()]
+ − 269
+ − 270
def values( self ):
+ − 271
"""Returns all named result values."""
+ − 272
return [ v[-1][0] for v in self.__tokdict.values() ]
+ − 273
+ − 274
def __getattr__( self, name ):
+ − 275
if name not in self.__slots__:
+ − 276
if self.__tokdict.has_key( name ):
+ − 277
if name not in self.__accumNames:
+ − 278
return self.__tokdict[name][-1][0]
+ − 279
else:
+ − 280
return ParseResults([ v[0] for v in self.__tokdict[name] ])
+ − 281
else:
+ − 282
return ""
+ − 283
return None
+ − 284
+ − 285
def __add__( self, other ):
+ − 286
ret = self.copy()
+ − 287
ret += other
+ − 288
return ret
+ − 289
+ − 290
def __iadd__( self, other ):
+ − 291
if other.__tokdict:
+ − 292
offset = len(self.__toklist)
+ − 293
addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
+ − 294
otheritems = other.__tokdict.items()
+ − 295
otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in otheritems for v in vlist]
+ − 296
for k,v in otherdictitems:
+ − 297
self[k] = v
+ − 298
if isinstance(v[0],ParseResults):
+ − 299
v[0].__parent = self
+ − 300
self.__toklist += other.__toklist
+ − 301
self.__accumNames.update( other.__accumNames )
+ − 302
del other
+ − 303
return self
+ − 304
+ − 305
def __repr__( self ):
+ − 306
return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
+ − 307
+ − 308
def __str__( self ):
+ − 309
out = "["
+ − 310
sep = ""
+ − 311
for i in self.__toklist:
+ − 312
if isinstance(i, ParseResults):
+ − 313
out += sep + _ustr(i)
+ − 314
else:
+ − 315
out += sep + repr(i)
+ − 316
sep = ", "
+ − 317
out += "]"
+ − 318
return out
+ − 319
+ − 320
def _asStringList( self, sep='' ):
+ − 321
out = []
+ − 322
for item in self.__toklist:
+ − 323
if out and sep:
+ − 324
out.append(sep)
+ − 325
if isinstance( item, ParseResults ):
+ − 326
out += item._asStringList()
+ − 327
else:
+ − 328
out.append( _ustr(item) )
+ − 329
return out
+ − 330
+ − 331
def asList( self ):
+ − 332
"""Returns the parse results as a nested list of matching tokens, all converted to strings."""
+ − 333
out = []
+ − 334
for res in self.__toklist:
+ − 335
if isinstance(res,ParseResults):
+ − 336
out.append( res.asList() )
+ − 337
else:
+ − 338
out.append( res )
+ − 339
return out
+ − 340
+ − 341
def asDict( self ):
+ − 342
"""Returns the named parse results as dictionary."""
+ − 343
return dict( self.items() )
+ − 344
+ − 345
def copy( self ):
+ − 346
"""Returns a new copy of a ParseResults object."""
+ − 347
ret = ParseResults( self.__toklist )
+ − 348
ret.__tokdict = self.__tokdict.copy()
+ − 349
ret.__parent = self.__parent
+ − 350
ret.__accumNames.update( self.__accumNames )
+ − 351
ret.__name = self.__name
+ − 352
return ret
+ − 353
+ − 354
def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
+ − 355
"""Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
+ − 356
nl = "\n"
+ − 357
out = []
+ − 358
namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] )
+ − 359
nextLevelIndent = indent + " "
+ − 360
+ − 361
# collapse out indents if formatting is not desired
+ − 362
if not formatted:
+ − 363
indent = ""
+ − 364
nextLevelIndent = ""
+ − 365
nl = ""
+ − 366
+ − 367
selfTag = None
+ − 368
if doctag is not None:
+ − 369
selfTag = doctag
+ − 370
else:
+ − 371
if self.__name:
+ − 372
selfTag = self.__name
+ − 373
+ − 374
if not selfTag:
+ − 375
if namedItemsOnly:
+ − 376
return ""
+ − 377
else:
+ − 378
selfTag = "ITEM"
+ − 379
+ − 380
out += [ nl, indent, "<", selfTag, ">" ]
+ − 381
+ − 382
worklist = self.__toklist
+ − 383
for i,res in enumerate(worklist):
+ − 384
if isinstance(res,ParseResults):
+ − 385
if i in namedItems:
+ − 386
out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
+ − 387
else:
+ − 388
out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)]
+ − 389
else:
+ − 390
# individual token, see if there is a name for it
+ − 391
resTag = None
+ − 392
if i in namedItems:
+ − 393
resTag = namedItems[i]
+ − 394
if not resTag:
+ − 395
if namedItemsOnly:
+ − 396
continue
+ − 397
else:
+ − 398
resTag = "ITEM"
+ − 399
xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
+ − 400
out += [ nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">" ]
+ − 401
+ − 402
out += [ nl, indent, "</", selfTag, ">" ]
+ − 403
return "".join(out)
+ − 404
+ − 405
def __lookup(self,sub):
+ − 406
for k,vlist in self.__tokdict.items():
+ − 407
for v,loc in vlist:
+ − 408
if sub is v:
+ − 409
return k
+ − 410
return None
+ − 411
+ − 412
def getName(self):
+ − 413
"""Returns the results name for this token expression."""
+ − 414
if self.__name:
+ − 415
return self.__name
+ − 416
elif self.__parent:
+ − 417
par = self.__parent
+ − 418
if par:
+ − 419
return par.__lookup(self)
+ − 420
else:
+ − 421
return None
+ − 422
elif (len(self) == 1 and
+ − 423
len(self.__tokdict) == 1 and
+ − 424
self.__tokdict.values()[0][0][1] in (0,-1)):
+ − 425
return self.__tokdict.keys()[0]
+ − 426
else:
+ − 427
return None
+ − 428
+ − 429
def dump(self,indent='',depth=0):
+ − 430
"""Diagnostic method for listing out the contents of a ParseResults.
+ − 431
Accepts an optional indent argument so that this string can be embedded
+ − 432
in a nested display of other data."""
+ − 433
out = []
+ − 434
out.append( indent+str(self.asList()) )
+ − 435
keys = self.items()
+ − 436
keys.sort()
+ − 437
for k,v in keys:
+ − 438
if out:
+ − 439
out.append('\n')
+ − 440
out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
+ − 441
if isinstance(v,ParseResults):
+ − 442
if v.keys():
+ − 443
#~ out.append('\n')
+ − 444
out.append( v.dump(indent,depth+1) )
+ − 445
#~ out.append('\n')
+ − 446
else:
+ − 447
out.append(str(v))
+ − 448
else:
+ − 449
out.append(str(v))
+ − 450
#~ out.append('\n')
+ − 451
return "".join(out)
+ − 452
+ − 453
# add support for pickle protocol
+ − 454
def __getstate__(self):
+ − 455
return ( self.__toklist,
+ − 456
( self.__tokdict.copy(),
+ − 457
self.__parent,
+ − 458
self.__accumNames,
+ − 459
self.__name ) )
+ − 460
+ − 461
def __setstate__(self,state):
+ − 462
self.__toklist = state[0]
+ − 463
self.__tokdict, \
+ − 464
self.__parent, \
+ − 465
inAccumNames, \
+ − 466
self.__name = state[1]
+ − 467
self.__accumNames = {}
+ − 468
self.__accumNames.update(inAccumNames)
+ − 469
+ − 470
+ − 471
def col (loc,strg):
+ − 472
"""Returns current column within a string, counting newlines as line separators.
+ − 473
The first column is number 1.
+ − 474
"""
+ − 475
return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
+ − 476
+ − 477
def lineno(loc,strg):
+ − 478
"""Returns current line number within a string, counting newlines as line separators.
+ − 479
The first line is number 1.
+ − 480
"""
+ − 481
return strg.count("\n",0,loc) + 1
+ − 482
+ − 483
def line( loc, strg ):
+ − 484
"""Returns the line of text containing loc within a string, counting newlines as line separators.
+ − 485
"""
+ − 486
lastCR = strg.rfind("\n", 0, loc)
+ − 487
nextCR = strg.find("\n", loc)
+ − 488
if nextCR > 0:
+ − 489
return strg[lastCR+1:nextCR]
+ − 490
else:
+ − 491
return strg[lastCR+1:]
+ − 492
+ − 493
def _defaultStartDebugAction( instring, loc, expr ):
+ − 494
print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
+ − 495
+ − 496
def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
+ − 497
print "Matched",expr,"->",toks.asList()
+ − 498
+ − 499
def _defaultExceptionDebugAction( instring, loc, expr, exc ):
+ − 500
print "Exception raised:", exc
+ − 501
+ − 502
def nullDebugAction(*args):
+ − 503
"""'Do-nothing' debug action, to suppress debugging output during parsing."""
+ − 504
pass
+ − 505
+ − 506
class ParserElement(object):
+ − 507
"""Abstract base level parser element class."""
+ − 508
DEFAULT_WHITE_CHARS = " \n\t\r"
+ − 509
+ − 510
def setDefaultWhitespaceChars( chars ):
+ − 511
"""Overrides the default whitespace chars
+ − 512
"""
+ − 513
ParserElement.DEFAULT_WHITE_CHARS = chars
+ − 514
setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
+ − 515
+ − 516
def __init__( self, savelist=False ):
+ − 517
self.parseAction = list()
+ − 518
self.failAction = None
+ − 519
#~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
+ − 520
self.strRepr = None
+ − 521
self.resultsName = None
+ − 522
self.saveAsList = savelist
+ − 523
self.skipWhitespace = True
+ − 524
self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ − 525
self.copyDefaultWhiteChars = True
+ − 526
self.mayReturnEmpty = False
+ − 527
self.keepTabs = False
+ − 528
self.ignoreExprs = list()
+ − 529
self.debug = False
+ − 530
self.streamlined = False
+ − 531
self.mayIndexError = True
+ − 532
self.errmsg = ""
+ − 533
self.modalResults = True
+ − 534
self.debugActions = ( None, None, None )
+ − 535
self.re = None
+ − 536
+ − 537
def copy( self ):
+ − 538
"""Make a copy of this ParserElement. Useful for defining different parse actions
+ − 539
for the same parsing pattern, using copies of the original parse element."""
+ − 540
cpy = copy.copy( self )
+ − 541
cpy.parseAction = self.parseAction[:]
+ − 542
cpy.ignoreExprs = self.ignoreExprs[:]
+ − 543
if self.copyDefaultWhiteChars:
+ − 544
cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
+ − 545
return cpy
+ − 546
+ − 547
def setName( self, name ):
+ − 548
"""Define name for this expression, for use in debugging."""
+ − 549
self.name = name
+ − 550
self.errmsg = "Expected " + self.name
+ − 551
return self
+ − 552
+ − 553
def setResultsName( self, name, listAllMatches=False ):
+ − 554
"""Define name for referencing matching tokens as a nested attribute
+ − 555
of the returned parse results.
+ − 556
NOTE: this returns a *copy* of the original ParserElement object;
+ − 557
this is so that the client can define a basic element, such as an
+ − 558
integer, and reference it in multiple places with different names.
+ − 559
"""
+ − 560
newself = self.copy()
+ − 561
newself.resultsName = name
+ − 562
newself.modalResults = not listAllMatches
+ − 563
return newself
+ − 564
+ − 565
def normalizeParseActionArgs( f ):
+ − 566
"""Internal method used to decorate parse actions that take fewer than 3 arguments,
+ − 567
so that all parse actions can be called as f(s,l,t)."""
+ − 568
STAR_ARGS = 4
+ − 569
+ − 570
try:
+ − 571
restore = None
+ − 572
if isinstance(f,type):
+ − 573
restore = f
+ − 574
f = f.__init__
+ − 575
if f.func_code.co_flags & STAR_ARGS:
+ − 576
return f
+ − 577
numargs = f.func_code.co_argcount
+ − 578
if hasattr(f,"im_self"):
+ − 579
numargs -= 1
+ − 580
if restore:
+ − 581
f = restore
+ − 582
except AttributeError:
+ − 583
try:
+ − 584
# not a function, must be a callable object, get info from the
+ − 585
# im_func binding of its bound __call__ method
+ − 586
if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
+ − 587
return f
+ − 588
numargs = f.__call__.im_func.func_code.co_argcount
+ − 589
if hasattr(f.__call__,"im_self"):
+ − 590
numargs -= 1
+ − 591
except AttributeError:
+ − 592
# not a bound method, get info directly from __call__ method
+ − 593
if f.__call__.func_code.co_flags & STAR_ARGS:
+ − 594
return f
+ − 595
numargs = f.__call__.func_code.co_argcount
+ − 596
if hasattr(f.__call__,"im_self"):
+ − 597
numargs -= 1
+ − 598
+ − 599
#~ print "adding function %s with %d args" % (f.func_name,numargs)
+ − 600
if numargs == 3:
+ − 601
return f
+ − 602
else:
+ − 603
if numargs == 2:
+ − 604
def tmp(s,l,t):
+ − 605
return f(l,t)
+ − 606
elif numargs == 1:
+ − 607
def tmp(s,l,t):
+ − 608
return f(t)
+ − 609
else: #~ numargs == 0:
+ − 610
def tmp(s,l,t):
+ − 611
return f()
+ − 612
return tmp
+ − 613
normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
+ − 614
+ − 615
def setParseAction( self, *fns ):
+ − 616
"""Define action to perform when successfully matching parse element definition.
+ − 617
Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
+ − 618
fn(loc,toks), fn(toks), or just fn(), where:
+ − 619
- s = the original string being parsed
+ − 620
- loc = the location of the matching substring
+ − 621
- toks = a list of the matched tokens, packaged as a ParseResults object
+ − 622
If the functions in fns modify the tokens, they can return them as the return
+ − 623
value from fn, and the modified list of tokens will replace the original.
+ − 624
Otherwise, fn does not need to return any value."""
+ − 625
self.parseAction = map(self.normalizeParseActionArgs, list(fns))
+ − 626
return self
+ − 627
+ − 628
def addParseAction( self, *fns ):
+ − 629
"""Add parse action to expression's list of parse actions. See setParseAction_."""
+ − 630
self.parseAction += map(self.normalizeParseActionArgs, list(fns))
+ − 631
return self
+ − 632
+ − 633
def setFailAction( self, fn ):
+ − 634
"""Define action to perform if parsing fails at this expression.
+ − 635
Fail acton fn is a callable function that takes the arguments
+ − 636
fn(s,loc,expr,err) where:
+ − 637
- s = string being parsed
+ − 638
- loc = location where expression match was attempted and failed
+ − 639
- expr = the parse expression that failed
+ − 640
- err = the exception thrown
+ − 641
The function returns no value. It may throw ParseFatalException
+ − 642
if it is desired to stop parsing immediately."""
+ − 643
self.failAction = fn
+ − 644
return self
+ − 645
+ − 646
def skipIgnorables( self, instring, loc ):
+ − 647
exprsFound = True
+ − 648
while exprsFound:
+ − 649
exprsFound = False
+ − 650
for e in self.ignoreExprs:
+ − 651
try:
+ − 652
while 1:
+ − 653
loc,dummy = e._parse( instring, loc )
+ − 654
exprsFound = True
+ − 655
except ParseException:
+ − 656
pass
+ − 657
return loc
+ − 658
+ − 659
def preParse( self, instring, loc ):
+ − 660
if self.ignoreExprs:
+ − 661
loc = self.skipIgnorables( instring, loc )
+ − 662
+ − 663
if self.skipWhitespace:
+ − 664
wt = self.whiteChars
+ − 665
instrlen = len(instring)
+ − 666
while loc < instrlen and instring[loc] in wt:
+ − 667
loc += 1
+ − 668
+ − 669
return loc
+ − 670
+ − 671
def parseImpl( self, instring, loc, doActions=True ):
+ − 672
return loc, []
+ − 673
+ − 674
def postParse( self, instring, loc, tokenlist ):
+ − 675
return tokenlist
+ − 676
+ − 677
#~ @profile
+ − 678
def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
+ − 679
debugging = ( self.debug ) #and doActions )
+ − 680
+ − 681
if debugging or self.failAction:
+ − 682
#~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
+ − 683
if (self.debugActions[0] ):
+ − 684
self.debugActions[0]( instring, loc, self )
+ − 685
if callPreParse:
+ − 686
preloc = self.preParse( instring, loc )
+ − 687
else:
+ − 688
preloc = loc
+ − 689
tokensStart = loc
+ − 690
try:
+ − 691
try:
+ − 692
loc,tokens = self.parseImpl( instring, preloc, doActions )
+ − 693
except IndexError:
+ − 694
raise ParseException( instring, len(instring), self.errmsg, self )
+ − 695
#~ except ReparseException, retryEx:
+ − 696
#~ pass
+ − 697
except ParseException, err:
+ − 698
#~ print "Exception raised:", err
+ − 699
if self.debugActions[2]:
+ − 700
self.debugActions[2]( instring, tokensStart, self, err )
+ − 701
if self.failAction:
+ − 702
self.failAction( instring, tokensStart, self, err )
+ − 703
raise
+ − 704
else:
+ − 705
if callPreParse:
+ − 706
preloc = self.preParse( instring, loc )
+ − 707
else:
+ − 708
preloc = loc
+ − 709
tokensStart = loc
+ − 710
if self.mayIndexError or loc >= len(instring):
+ − 711
try:
+ − 712
loc,tokens = self.parseImpl( instring, preloc, doActions )
+ − 713
except IndexError:
+ − 714
raise ParseException( instring, len(instring), self.errmsg, self )
+ − 715
else:
+ − 716
loc,tokens = self.parseImpl( instring, preloc, doActions )
+ − 717
+ − 718
tokens = self.postParse( instring, loc, tokens )
+ − 719
+ − 720
retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
+ − 721
if self.parseAction and doActions:
+ − 722
if debugging:
+ − 723
try:
+ − 724
for fn in self.parseAction:
+ − 725
tokens = fn( instring, tokensStart, retTokens )
+ − 726
if tokens is not None:
+ − 727
retTokens = ParseResults( tokens,
+ − 728
self.resultsName,
+ − 729
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ − 730
modal=self.modalResults )
+ − 731
except ParseException, err:
+ − 732
#~ print "Exception raised in user parse action:", err
+ − 733
if (self.debugActions[2] ):
+ − 734
self.debugActions[2]( instring, tokensStart, self, err )
+ − 735
raise
+ − 736
else:
+ − 737
for fn in self.parseAction:
+ − 738
tokens = fn( instring, tokensStart, retTokens )
+ − 739
if tokens is not None:
+ − 740
retTokens = ParseResults( tokens,
+ − 741
self.resultsName,
+ − 742
asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
+ − 743
modal=self.modalResults )
+ − 744
+ − 745
if debugging:
+ − 746
#~ print "Matched",self,"->",retTokens.asList()
+ − 747
if (self.debugActions[1] ):
+ − 748
self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
+ − 749
+ − 750
return loc, retTokens
+ − 751
+ − 752
def tryParse( self, instring, loc ):
+ − 753
return self._parse( instring, loc, doActions=False )[0]
+ − 754
+ − 755
# this method gets repeatedly called during backtracking with the same arguments -
+ − 756
# we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
+ − 757
def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
+ − 758
if doActions and self.parseAction:
+ − 759
return self._parseNoCache( instring, loc, doActions, callPreParse )
+ − 760
lookup = (self,instring,loc,callPreParse)
+ − 761
if lookup in ParserElement._exprArgCache:
+ − 762
value = ParserElement._exprArgCache[ lookup ]
+ − 763
if isinstance(value,Exception):
+ − 764
if isinstance(value,ParseBaseException):
+ − 765
value.loc = loc
+ − 766
raise value
+ − 767
return value
+ − 768
else:
+ − 769
try:
+ − 770
ParserElement._exprArgCache[ lookup ] = \
+ − 771
value = self._parseNoCache( instring, loc, doActions, callPreParse )
+ − 772
return value
+ − 773
except ParseBaseException, pe:
+ − 774
ParserElement._exprArgCache[ lookup ] = pe
+ − 775
raise
+ − 776
+ − 777
_parse = _parseNoCache
+ − 778
+ − 779
# argument cache for optimizing repeated calls when backtracking through recursive expressions
+ − 780
_exprArgCache = {}
+ − 781
def resetCache():
+ − 782
ParserElement._exprArgCache.clear()
+ − 783
resetCache = staticmethod(resetCache)
+ − 784
+ − 785
_packratEnabled = False
+ − 786
def enablePackrat():
+ − 787
"""Enables "packrat" parsing, which adds memoizing to the parsing logic.
+ − 788
Repeated parse attempts at the same string location (which happens
+ − 789
often in many complex grammars) can immediately return a cached value,
+ − 790
instead of re-executing parsing/validating code. Memoizing is done of
+ − 791
both valid results and parsing exceptions.
+ − 792
+ − 793
This speedup may break existing programs that use parse actions that
+ − 794
have side-effects. For this reason, packrat parsing is disabled when
+ − 795
you first import pyparsing. To activate the packrat feature, your
+ − 796
program must call the class method ParserElement.enablePackrat(). If
+ − 797
your program uses psyco to "compile as you go", you must call
+ − 798
enablePackrat before calling psyco.full(). If you do not do this,
+ − 799
Python will crash. For best results, call enablePackrat() immediately
+ − 800
after importing pyparsing.
+ − 801
"""
+ − 802
if not ParserElement._packratEnabled:
+ − 803
ParserElement._packratEnabled = True
+ − 804
ParserElement._parse = ParserElement._parseCache
+ − 805
enablePackrat = staticmethod(enablePackrat)
+ − 806
+ − 807
def parseString( self, instring ):
+ − 808
"""Execute the parse expression with the given string.
+ − 809
This is the main interface to the client code, once the complete
+ − 810
expression has been built.
+ − 811
"""
+ − 812
ParserElement.resetCache()
+ − 813
if not self.streamlined:
+ − 814
self.streamline()
+ − 815
#~ self.saveAsList = True
+ − 816
for e in self.ignoreExprs:
+ − 817
e.streamline()
+ − 818
if self.keepTabs:
+ − 819
loc, tokens = self._parse( instring, 0 )
+ − 820
else:
+ − 821
loc, tokens = self._parse( instring.expandtabs(), 0 )
+ − 822
return tokens
+ − 823
+ − 824
def scanString( self, instring, maxMatches=sys.maxint ):
+ − 825
"""Scan the input string for expression matches. Each match will return the
+ − 826
matching tokens, start location, and end location. May be called with optional
+ − 827
maxMatches argument, to clip scanning after 'n' matches are found."""
+ − 828
if not self.streamlined:
+ − 829
self.streamline()
+ − 830
for e in self.ignoreExprs:
+ − 831
e.streamline()
+ − 832
+ − 833
if not self.keepTabs:
+ − 834
instring = instring.expandtabs()
+ − 835
instrlen = len(instring)
+ − 836
loc = 0
+ − 837
preparseFn = self.preParse
+ − 838
parseFn = self._parse
+ − 839
ParserElement.resetCache()
+ − 840
matches = 0
+ − 841
while loc <= instrlen and matches < maxMatches:
+ − 842
try:
+ − 843
preloc = preparseFn( instring, loc )
+ − 844
nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
+ − 845
except ParseException:
+ − 846
loc = preloc+1
+ − 847
else:
+ − 848
matches += 1
+ − 849
yield tokens, preloc, nextLoc
+ − 850
loc = nextLoc
+ − 851
+ − 852
def transformString( self, instring ):
+ − 853
"""Extension to scanString, to modify matching text with modified tokens that may
+ − 854
be returned from a parse action. To use transformString, define a grammar and
+ − 855
attach a parse action to it that modifies the returned token list.
+ − 856
Invoking transformString() on a target string will then scan for matches,
+ − 857
and replace the matched text patterns according to the logic in the parse
+ − 858
action. transformString() returns the resulting transformed string."""
+ − 859
out = []
+ − 860
lastE = 0
+ − 861
# force preservation of <TAB>s, to minimize unwanted transformation of string, and to
+ − 862
# keep string locs straight between transformString and scanString
+ − 863
self.keepTabs = True
+ − 864
for t,s,e in self.scanString( instring ):
+ − 865
out.append( instring[lastE:s] )
+ − 866
if t:
+ − 867
if isinstance(t,ParseResults):
+ − 868
out += t.asList()
+ − 869
elif isinstance(t,list):
+ − 870
out += t
+ − 871
else:
+ − 872
out.append(t)
+ − 873
lastE = e
+ − 874
out.append(instring[lastE:])
+ − 875
return "".join(out)
+ − 876
+ − 877
def searchString( self, instring, maxMatches=sys.maxint ):
+ − 878
"""Another extension to scanString, simplifying the access to the tokens found
+ − 879
to match the given parse expression. May be called with optional
+ − 880
maxMatches argument, to clip searching after 'n' matches are found.
+ − 881
"""
+ − 882
return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
+ − 883
+ − 884
def __add__(self, other ):
+ − 885
"""Implementation of + operator - returns And"""
+ − 886
if isinstance( other, basestring ):
+ − 887
other = Literal( other )
+ − 888
if not isinstance( other, ParserElement ):
+ − 889
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 890
SyntaxWarning, stacklevel=2)
+ − 891
return And( [ self, other ] )
+ − 892
+ − 893
def __radd__(self, other ):
+ − 894
"""Implementation of += operator"""
+ − 895
if isinstance( other, basestring ):
+ − 896
other = Literal( other )
+ − 897
if not isinstance( other, ParserElement ):
+ − 898
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 899
SyntaxWarning, stacklevel=2)
+ − 900
return other + self
+ − 901
+ − 902
def __or__(self, other ):
+ − 903
"""Implementation of | operator - returns MatchFirst"""
+ − 904
if isinstance( other, basestring ):
+ − 905
other = Literal( other )
+ − 906
if not isinstance( other, ParserElement ):
+ − 907
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 908
SyntaxWarning, stacklevel=2)
+ − 909
return MatchFirst( [ self, other ] )
+ − 910
+ − 911
def __ror__(self, other ):
+ − 912
"""Implementation of |= operator"""
+ − 913
if isinstance( other, basestring ):
+ − 914
other = Literal( other )
+ − 915
if not isinstance( other, ParserElement ):
+ − 916
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 917
SyntaxWarning, stacklevel=2)
+ − 918
return other | self
+ − 919
+ − 920
def __xor__(self, other ):
+ − 921
"""Implementation of ^ operator - returns Or"""
+ − 922
if isinstance( other, basestring ):
+ − 923
other = Literal( other )
+ − 924
if not isinstance( other, ParserElement ):
+ − 925
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 926
SyntaxWarning, stacklevel=2)
+ − 927
return Or( [ self, other ] )
+ − 928
+ − 929
def __rxor__(self, other ):
+ − 930
"""Implementation of ^= operator"""
+ − 931
if isinstance( other, basestring ):
+ − 932
other = Literal( other )
+ − 933
if not isinstance( other, ParserElement ):
+ − 934
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 935
SyntaxWarning, stacklevel=2)
+ − 936
return other ^ self
+ − 937
+ − 938
def __and__(self, other ):
+ − 939
"""Implementation of & operator - returns Each"""
+ − 940
if isinstance( other, basestring ):
+ − 941
other = Literal( other )
+ − 942
if not isinstance( other, ParserElement ):
+ − 943
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 944
SyntaxWarning, stacklevel=2)
+ − 945
return Each( [ self, other ] )
+ − 946
+ − 947
def __rand__(self, other ):
+ − 948
"""Implementation of right-& operator"""
+ − 949
if isinstance( other, basestring ):
+ − 950
other = Literal( other )
+ − 951
if not isinstance( other, ParserElement ):
+ − 952
warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
+ − 953
SyntaxWarning, stacklevel=2)
+ − 954
return other & self
+ − 955
+ − 956
def __invert__( self ):
+ − 957
"""Implementation of ~ operator - returns NotAny"""
+ − 958
return NotAny( self )
+ − 959
+ − 960
def suppress( self ):
+ − 961
"""Suppresses the output of this ParserElement; useful to keep punctuation from
+ − 962
cluttering up returned output.
+ − 963
"""
+ − 964
return Suppress( self )
+ − 965
+ − 966
def leaveWhitespace( self ):
+ − 967
"""Disables the skipping of whitespace before matching the characters in the
+ − 968
ParserElement's defined pattern. This is normally only used internally by
+ − 969
the pyparsing module, but may be needed in some whitespace-sensitive grammars.
+ − 970
"""
+ − 971
self.skipWhitespace = False
+ − 972
return self
+ − 973
+ − 974
def setWhitespaceChars( self, chars ):
+ − 975
"""Overrides the default whitespace chars
+ − 976
"""
+ − 977
self.skipWhitespace = True
+ − 978
self.whiteChars = chars
+ − 979
self.copyDefaultWhiteChars = False
+ − 980
return self
+ − 981
+ − 982
def parseWithTabs( self ):
+ − 983
"""Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
+ − 984
Must be called before parseString when the input grammar contains elements that
+ − 985
match <TAB> characters."""
+ − 986
self.keepTabs = True
+ − 987
return self
+ − 988
+ − 989
def ignore( self, other ):
+ − 990
"""Define expression to be ignored (e.g., comments) while doing pattern
+ − 991
matching; may be called repeatedly, to define multiple comment or other
+ − 992
ignorable patterns.
+ − 993
"""
+ − 994
if isinstance( other, Suppress ):
+ − 995
if other not in self.ignoreExprs:
+ − 996
self.ignoreExprs.append( other )
+ − 997
else:
+ − 998
self.ignoreExprs.append( Suppress( other ) )
+ − 999
return self
+ − 1000
+ − 1001
def setDebugActions( self, startAction, successAction, exceptionAction ):
+ − 1002
"""Enable display of debugging messages while doing pattern matching."""
+ − 1003
self.debugActions = (startAction or _defaultStartDebugAction,
+ − 1004
successAction or _defaultSuccessDebugAction,
+ − 1005
exceptionAction or _defaultExceptionDebugAction)
+ − 1006
self.debug = True
+ − 1007
return self
+ − 1008
+ − 1009
def setDebug( self, flag=True ):
+ − 1010
"""Enable display of debugging messages while doing pattern matching."""
+ − 1011
if flag:
+ − 1012
self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
+ − 1013
else:
+ − 1014
self.debug = False
+ − 1015
return self
+ − 1016
+ − 1017
def __str__( self ):
+ − 1018
return self.name
+ − 1019
+ − 1020
def __repr__( self ):
+ − 1021
return _ustr(self)
+ − 1022
+ − 1023
def streamline( self ):
+ − 1024
self.streamlined = True
+ − 1025
self.strRepr = None
+ − 1026
return self
+ − 1027
+ − 1028
def checkRecursion( self, parseElementList ):
+ − 1029
pass
+ − 1030
+ − 1031
def validate( self, validateTrace=[] ):
+ − 1032
"""Check defined expressions for valid structure, check for infinite recursive definitions."""
+ − 1033
self.checkRecursion( [] )
+ − 1034
+ − 1035
def parseFile( self, file_or_filename ):
+ − 1036
"""Execute the parse expression on the given file or filename.
+ − 1037
If a filename is specified (instead of a file object),
+ − 1038
the entire file is opened, read, and closed before parsing.
+ − 1039
"""
+ − 1040
try:
+ − 1041
file_contents = file_or_filename.read()
+ − 1042
except AttributeError:
+ − 1043
f = open(file_or_filename, "rb")
+ − 1044
file_contents = f.read()
+ − 1045
f.close()
+ − 1046
return self.parseString(file_contents)
+ − 1047
+ − 1048
+ − 1049
class Token(ParserElement):
+ − 1050
"""Abstract ParserElement subclass, for defining atomic matching patterns."""
+ − 1051
def __init__( self ):
+ − 1052
super(Token,self).__init__( savelist=False )
+ − 1053
self.myException = ParseException("",0,"",self)
+ − 1054
+ − 1055
def setName(self, name):
+ − 1056
s = super(Token,self).setName(name)
+ − 1057
self.errmsg = "Expected " + self.name
+ − 1058
s.myException.msg = self.errmsg
+ − 1059
return s
+ − 1060
+ − 1061
+ − 1062
class Empty(Token):
+ − 1063
"""An empty token, will always match."""
+ − 1064
def __init__( self ):
+ − 1065
super(Empty,self).__init__()
+ − 1066
self.name = "Empty"
+ − 1067
self.mayReturnEmpty = True
+ − 1068
self.mayIndexError = False
+ − 1069
+ − 1070
+ − 1071
class NoMatch(Token):
+ − 1072
"""A token that will never match."""
+ − 1073
def __init__( self ):
+ − 1074
super(NoMatch,self).__init__()
+ − 1075
self.name = "NoMatch"
+ − 1076
self.mayReturnEmpty = True
+ − 1077
self.mayIndexError = False
+ − 1078
self.errmsg = "Unmatchable token"
+ − 1079
self.myException.msg = self.errmsg
+ − 1080
+ − 1081
def parseImpl( self, instring, loc, doActions=True ):
+ − 1082
exc = self.myException
+ − 1083
exc.loc = loc
+ − 1084
exc.pstr = instring
+ − 1085
raise exc
+ − 1086
+ − 1087
+ − 1088
class Literal(Token):
+ − 1089
"""Token to exactly match a specified string."""
+ − 1090
def __init__( self, matchString ):
+ − 1091
super(Literal,self).__init__()
+ − 1092
self.match = matchString
+ − 1093
self.matchLen = len(matchString)
+ − 1094
try:
+ − 1095
self.firstMatchChar = matchString[0]
+ − 1096
except IndexError:
+ − 1097
warnings.warn("null string passed to Literal; use Empty() instead",
+ − 1098
SyntaxWarning, stacklevel=2)
+ − 1099
self.__class__ = Empty
+ − 1100
self.name = '"%s"' % self.match
+ − 1101
self.errmsg = "Expected " + self.name
+ − 1102
self.mayReturnEmpty = False
+ − 1103
self.myException.msg = self.errmsg
+ − 1104
self.mayIndexError = False
+ − 1105
+ − 1106
# Performance tuning: this routine gets called a *lot*
+ − 1107
# if this is a single character match string and the first character matches,
+ − 1108
# short-circuit as quickly as possible, and avoid calling startswith
+ − 1109
#~ @profile
+ − 1110
def parseImpl( self, instring, loc, doActions=True ):
+ − 1111
if (instring[loc] == self.firstMatchChar and
+ − 1112
(self.matchLen==1 or instring.startswith(self.match,loc)) ):
+ − 1113
return loc+self.matchLen, self.match
+ − 1114
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1115
exc = self.myException
+ − 1116
exc.loc = loc
+ − 1117
exc.pstr = instring
+ − 1118
raise exc
+ − 1119
+ − 1120
class Keyword(Token):
+ − 1121
"""Token to exactly match a specified string as a keyword, that is, it must be
+ − 1122
immediately followed by a non-keyword character. Compare with Literal::
+ − 1123
Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
+ − 1124
Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
+ − 1125
Accepts two optional constructor arguments in addition to the keyword string:
+ − 1126
identChars is a string of characters that would be valid identifier characters,
+ − 1127
defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
+ − 1128
matching, default is False.
+ − 1129
"""
+ − 1130
DEFAULT_KEYWORD_CHARS = alphanums+"_$"
+ − 1131
+ − 1132
def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
+ − 1133
super(Keyword,self).__init__()
+ − 1134
self.match = matchString
+ − 1135
self.matchLen = len(matchString)
+ − 1136
try:
+ − 1137
self.firstMatchChar = matchString[0]
+ − 1138
except IndexError:
+ − 1139
warnings.warn("null string passed to Keyword; use Empty() instead",
+ − 1140
SyntaxWarning, stacklevel=2)
+ − 1141
self.name = '"%s"' % self.match
+ − 1142
self.errmsg = "Expected " + self.name
+ − 1143
self.mayReturnEmpty = False
+ − 1144
self.myException.msg = self.errmsg
+ − 1145
self.mayIndexError = False
+ − 1146
self.caseless = caseless
+ − 1147
if caseless:
+ − 1148
self.caselessmatch = matchString.upper()
+ − 1149
identChars = identChars.upper()
+ − 1150
self.identChars = _str2dict(identChars)
+ − 1151
+ − 1152
def parseImpl( self, instring, loc, doActions=True ):
+ − 1153
if self.caseless:
+ − 1154
if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ − 1155
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
+ − 1156
(loc == 0 or instring[loc-1].upper() not in self.identChars) ):
+ − 1157
return loc+self.matchLen, self.match
+ − 1158
else:
+ − 1159
if (instring[loc] == self.firstMatchChar and
+ − 1160
(self.matchLen==1 or instring.startswith(self.match,loc)) and
+ − 1161
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
+ − 1162
(loc == 0 or instring[loc-1] not in self.identChars) ):
+ − 1163
return loc+self.matchLen, self.match
+ − 1164
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1165
exc = self.myException
+ − 1166
exc.loc = loc
+ − 1167
exc.pstr = instring
+ − 1168
raise exc
+ − 1169
+ − 1170
def copy(self):
+ − 1171
c = super(Keyword,self).copy()
+ − 1172
c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
+ − 1173
return c
+ − 1174
+ − 1175
def setDefaultKeywordChars( chars ):
+ − 1176
"""Overrides the default Keyword chars
+ − 1177
"""
+ − 1178
Keyword.DEFAULT_KEYWORD_CHARS = chars
+ − 1179
setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
+ − 1180
+ − 1181
+ − 1182
class CaselessLiteral(Literal):
+ − 1183
"""Token to match a specified string, ignoring case of letters.
+ − 1184
Note: the matched results will always be in the case of the given
+ − 1185
match string, NOT the case of the input text.
+ − 1186
"""
+ − 1187
def __init__( self, matchString ):
+ − 1188
super(CaselessLiteral,self).__init__( matchString.upper() )
+ − 1189
# Preserve the defining literal.
+ − 1190
self.returnString = matchString
+ − 1191
self.name = "'%s'" % self.returnString
+ − 1192
self.errmsg = "Expected " + self.name
+ − 1193
self.myException.msg = self.errmsg
+ − 1194
+ − 1195
def parseImpl( self, instring, loc, doActions=True ):
+ − 1196
if instring[ loc:loc+self.matchLen ].upper() == self.match:
+ − 1197
return loc+self.matchLen, self.returnString
+ − 1198
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1199
exc = self.myException
+ − 1200
exc.loc = loc
+ − 1201
exc.pstr = instring
+ − 1202
raise exc
+ − 1203
+ − 1204
class CaselessKeyword(Keyword):
+ − 1205
def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
+ − 1206
super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
+ − 1207
+ − 1208
def parseImpl( self, instring, loc, doActions=True ):
+ − 1209
if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
+ − 1210
(loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
+ − 1211
return loc+self.matchLen, self.match
+ − 1212
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1213
exc = self.myException
+ − 1214
exc.loc = loc
+ − 1215
exc.pstr = instring
+ − 1216
raise exc
+ − 1217
+ − 1218
class Word(Token):
+ − 1219
"""Token for matching words composed of allowed character sets.
+ − 1220
Defined with string containing all allowed initial characters,
+ − 1221
an optional string containing allowed body characters (if omitted,
+ − 1222
defaults to the initial character set), and an optional minimum,
+ − 1223
maximum, and/or exact length.
+ − 1224
"""
+ − 1225
def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
+ − 1226
super(Word,self).__init__()
+ − 1227
self.initCharsOrig = initChars
+ − 1228
self.initChars = _str2dict(initChars)
+ − 1229
if bodyChars :
+ − 1230
self.bodyCharsOrig = bodyChars
+ − 1231
self.bodyChars = _str2dict(bodyChars)
+ − 1232
else:
+ − 1233
self.bodyCharsOrig = initChars
+ − 1234
self.bodyChars = _str2dict(initChars)
+ − 1235
+ − 1236
self.maxSpecified = max > 0
+ − 1237
+ − 1238
self.minLen = min
+ − 1239
+ − 1240
if max > 0:
+ − 1241
self.maxLen = max
+ − 1242
else:
+ − 1243
self.maxLen = sys.maxint
+ − 1244
+ − 1245
if exact > 0:
+ − 1246
self.maxLen = exact
+ − 1247
self.minLen = exact
+ − 1248
+ − 1249
self.name = _ustr(self)
+ − 1250
self.errmsg = "Expected " + self.name
+ − 1251
self.myException.msg = self.errmsg
+ − 1252
self.mayIndexError = False
+ − 1253
+ − 1254
if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
+ − 1255
if self.bodyCharsOrig == self.initCharsOrig:
+ − 1256
self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
+ − 1257
elif len(self.bodyCharsOrig) == 1:
+ − 1258
self.reString = "%s[%s]*" % \
+ − 1259
(re.escape(self.initCharsOrig),
+ − 1260
_escapeRegexRangeChars(self.bodyCharsOrig),)
+ − 1261
else:
+ − 1262
self.reString = "[%s][%s]*" % \
+ − 1263
(_escapeRegexRangeChars(self.initCharsOrig),
+ − 1264
_escapeRegexRangeChars(self.bodyCharsOrig),)
+ − 1265
try:
+ − 1266
self.re = re.compile( self.reString )
+ − 1267
except:
+ − 1268
self.re = None
+ − 1269
+ − 1270
def parseImpl( self, instring, loc, doActions=True ):
+ − 1271
if self.re:
+ − 1272
result = self.re.match(instring,loc)
+ − 1273
if not result:
+ − 1274
exc = self.myException
+ − 1275
exc.loc = loc
+ − 1276
exc.pstr = instring
+ − 1277
raise exc
+ − 1278
+ − 1279
loc = result.end()
+ − 1280
return loc,result.group()
+ − 1281
+ − 1282
if not(instring[ loc ] in self.initChars):
+ − 1283
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1284
exc = self.myException
+ − 1285
exc.loc = loc
+ − 1286
exc.pstr = instring
+ − 1287
raise exc
+ − 1288
start = loc
+ − 1289
loc += 1
+ − 1290
instrlen = len(instring)
+ − 1291
bodychars = self.bodyChars
+ − 1292
maxloc = start + self.maxLen
+ − 1293
maxloc = min( maxloc, instrlen )
+ − 1294
while loc < maxloc and instring[loc] in bodychars:
+ − 1295
loc += 1
+ − 1296
+ − 1297
throwException = False
+ − 1298
if loc - start < self.minLen:
+ − 1299
throwException = True
+ − 1300
if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
+ − 1301
throwException = True
+ − 1302
+ − 1303
if throwException:
+ − 1304
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1305
exc = self.myException
+ − 1306
exc.loc = loc
+ − 1307
exc.pstr = instring
+ − 1308
raise exc
+ − 1309
+ − 1310
return loc, instring[start:loc]
+ − 1311
+ − 1312
def __str__( self ):
+ − 1313
try:
+ − 1314
return super(Word,self).__str__()
+ − 1315
except:
+ − 1316
pass
+ − 1317
+ − 1318
+ − 1319
if self.strRepr is None:
+ − 1320
+ − 1321
def charsAsStr(s):
+ − 1322
if len(s)>4:
+ − 1323
return s[:4]+"..."
+ − 1324
else:
+ − 1325
return s
+ − 1326
+ − 1327
if ( self.initCharsOrig != self.bodyCharsOrig ):
+ − 1328
self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
+ − 1329
else:
+ − 1330
self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
+ − 1331
+ − 1332
return self.strRepr
+ − 1333
+ − 1334
+ − 1335
class Regex(Token):
+ − 1336
"""Token for matching strings that match a given regular expression.
+ − 1337
Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
+ − 1338
"""
+ − 1339
def __init__( self, pattern, flags=0):
+ − 1340
"""The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
+ − 1341
super(Regex,self).__init__()
+ − 1342
+ − 1343
if len(pattern) == 0:
+ − 1344
warnings.warn("null string passed to Regex; use Empty() instead",
+ − 1345
SyntaxWarning, stacklevel=2)
+ − 1346
+ − 1347
self.pattern = pattern
+ − 1348
self.flags = flags
+ − 1349
+ − 1350
try:
+ − 1351
self.re = re.compile(self.pattern, self.flags)
+ − 1352
self.reString = self.pattern
+ − 1353
except sre_constants.error,e:
+ − 1354
warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
+ − 1355
SyntaxWarning, stacklevel=2)
+ − 1356
raise
+ − 1357
+ − 1358
self.name = _ustr(self)
+ − 1359
self.errmsg = "Expected " + self.name
+ − 1360
self.myException.msg = self.errmsg
+ − 1361
self.mayIndexError = False
+ − 1362
self.mayReturnEmpty = True
+ − 1363
+ − 1364
def parseImpl( self, instring, loc, doActions=True ):
+ − 1365
result = self.re.match(instring,loc)
+ − 1366
if not result:
+ − 1367
exc = self.myException
+ − 1368
exc.loc = loc
+ − 1369
exc.pstr = instring
+ − 1370
raise exc
+ − 1371
+ − 1372
loc = result.end()
+ − 1373
d = result.groupdict()
+ − 1374
ret = ParseResults(result.group())
+ − 1375
if d:
+ − 1376
for k in d.keys():
+ − 1377
ret[k] = d[k]
+ − 1378
return loc,ret
+ − 1379
+ − 1380
def __str__( self ):
+ − 1381
try:
+ − 1382
return super(Regex,self).__str__()
+ − 1383
except:
+ − 1384
pass
+ − 1385
+ − 1386
if self.strRepr is None:
+ − 1387
self.strRepr = "Re:(%s)" % repr(self.pattern)
+ − 1388
+ − 1389
return self.strRepr
+ − 1390
+ − 1391
+ − 1392
class QuotedString(Token):
+ − 1393
"""Token for matching strings that are delimited by quoting characters.
+ − 1394
"""
+ − 1395
def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
+ − 1396
"""
+ − 1397
Defined with the following parameters:
+ − 1398
- quoteChar - string of one or more characters defining the quote delimiting string
+ − 1399
- escChar - character to escape quotes, typically backslash (default=None)
+ − 1400
- escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
+ − 1401
- multiline - boolean indicating whether quotes can span multiple lines (default=False)
+ − 1402
- unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
+ − 1403
- endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
+ − 1404
"""
+ − 1405
super(QuotedString,self).__init__()
+ − 1406
+ − 1407
# remove white space from quote chars - wont work anyway
+ − 1408
quoteChar = quoteChar.strip()
+ − 1409
if len(quoteChar) == 0:
+ − 1410
warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ − 1411
raise SyntaxError()
+ − 1412
+ − 1413
if endQuoteChar is None:
+ − 1414
endQuoteChar = quoteChar
+ − 1415
else:
+ − 1416
endQuoteChar = endQuoteChar.strip()
+ − 1417
if len(endQuoteChar) == 0:
+ − 1418
warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
+ − 1419
raise SyntaxError()
+ − 1420
+ − 1421
self.quoteChar = quoteChar
+ − 1422
self.quoteCharLen = len(quoteChar)
+ − 1423
self.firstQuoteChar = quoteChar[0]
+ − 1424
self.endQuoteChar = endQuoteChar
+ − 1425
self.endQuoteCharLen = len(endQuoteChar)
+ − 1426
self.escChar = escChar
+ − 1427
self.escQuote = escQuote
+ − 1428
self.unquoteResults = unquoteResults
+ − 1429
+ − 1430
if multiline:
+ − 1431
self.flags = re.MULTILINE | re.DOTALL
+ − 1432
self.pattern = r'%s(?:[^%s%s]' % \
+ − 1433
( re.escape(self.quoteChar),
+ − 1434
_escapeRegexRangeChars(self.endQuoteChar[0]),
+ − 1435
(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ − 1436
else:
+ − 1437
self.flags = 0
+ − 1438
self.pattern = r'%s(?:[^%s\n\r%s]' % \
+ − 1439
( re.escape(self.quoteChar),
+ − 1440
_escapeRegexRangeChars(self.endQuoteChar[0]),
+ − 1441
(escChar is not None and _escapeRegexRangeChars(escChar) or '') )
+ − 1442
if len(self.endQuoteChar) > 1:
+ − 1443
self.pattern += (
+ − 1444
'|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
+ − 1445
_escapeRegexRangeChars(self.endQuoteChar[i]))
+ − 1446
for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
+ − 1447
)
+ − 1448
if escQuote:
+ − 1449
self.pattern += (r'|(?:%s)' % re.escape(escQuote))
+ − 1450
if escChar:
+ − 1451
self.pattern += (r'|(?:%s.)' % re.escape(escChar))
+ − 1452
self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
+ − 1453
self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
+ − 1454
+ − 1455
try:
+ − 1456
self.re = re.compile(self.pattern, self.flags)
+ − 1457
self.reString = self.pattern
+ − 1458
except sre_constants.error,e:
+ − 1459
warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
+ − 1460
SyntaxWarning, stacklevel=2)
+ − 1461
raise
+ − 1462
+ − 1463
self.name = _ustr(self)
+ − 1464
self.errmsg = "Expected " + self.name
+ − 1465
self.myException.msg = self.errmsg
+ − 1466
self.mayIndexError = False
+ − 1467
self.mayReturnEmpty = True
+ − 1468
+ − 1469
def parseImpl( self, instring, loc, doActions=True ):
+ − 1470
result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
+ − 1471
if not result:
+ − 1472
exc = self.myException
+ − 1473
exc.loc = loc
+ − 1474
exc.pstr = instring
+ − 1475
raise exc
+ − 1476
+ − 1477
loc = result.end()
+ − 1478
ret = result.group()
+ − 1479
+ − 1480
if self.unquoteResults:
+ − 1481
+ − 1482
# strip off quotes
+ − 1483
ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
+ − 1484
+ − 1485
if isinstance(ret,basestring):
+ − 1486
# replace escaped characters
+ − 1487
if self.escChar:
+ − 1488
ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
+ − 1489
+ − 1490
# replace escaped quotes
+ − 1491
if self.escQuote:
+ − 1492
ret = ret.replace(self.escQuote, self.endQuoteChar)
+ − 1493
+ − 1494
return loc, ret
+ − 1495
+ − 1496
def __str__( self ):
+ − 1497
try:
+ − 1498
return super(QuotedString,self).__str__()
+ − 1499
except:
+ − 1500
pass
+ − 1501
+ − 1502
if self.strRepr is None:
+ − 1503
self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
+ − 1504
+ − 1505
return self.strRepr
+ − 1506
+ − 1507
+ − 1508
class CharsNotIn(Token):
+ − 1509
"""Token for matching words composed of characters *not* in a given set.
+ − 1510
Defined with string containing all disallowed characters, and an optional
+ − 1511
minimum, maximum, and/or exact length.
+ − 1512
"""
+ − 1513
def __init__( self, notChars, min=1, max=0, exact=0 ):
+ − 1514
super(CharsNotIn,self).__init__()
+ − 1515
self.skipWhitespace = False
+ − 1516
self.notChars = notChars
+ − 1517
+ − 1518
self.minLen = min
+ − 1519
+ − 1520
if max > 0:
+ − 1521
self.maxLen = max
+ − 1522
else:
+ − 1523
self.maxLen = sys.maxint
+ − 1524
+ − 1525
if exact > 0:
+ − 1526
self.maxLen = exact
+ − 1527
self.minLen = exact
+ − 1528
+ − 1529
self.name = _ustr(self)
+ − 1530
self.errmsg = "Expected " + self.name
+ − 1531
self.mayReturnEmpty = ( self.minLen == 0 )
+ − 1532
self.myException.msg = self.errmsg
+ − 1533
self.mayIndexError = False
+ − 1534
+ − 1535
def parseImpl( self, instring, loc, doActions=True ):
+ − 1536
if instring[loc] in self.notChars:
+ − 1537
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1538
exc = self.myException
+ − 1539
exc.loc = loc
+ − 1540
exc.pstr = instring
+ − 1541
raise exc
+ − 1542
+ − 1543
start = loc
+ − 1544
loc += 1
+ − 1545
notchars = self.notChars
+ − 1546
maxlen = min( start+self.maxLen, len(instring) )
+ − 1547
while loc < maxlen and (instring[loc] not in notchars):
+ − 1548
loc += 1
+ − 1549
+ − 1550
if loc - start < self.minLen:
+ − 1551
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1552
exc = self.myException
+ − 1553
exc.loc = loc
+ − 1554
exc.pstr = instring
+ − 1555
raise exc
+ − 1556
+ − 1557
return loc, instring[start:loc]
+ − 1558
+ − 1559
def __str__( self ):
+ − 1560
try:
+ − 1561
return super(CharsNotIn, self).__str__()
+ − 1562
except:
+ − 1563
pass
+ − 1564
+ − 1565
if self.strRepr is None:
+ − 1566
if len(self.notChars) > 4:
+ − 1567
self.strRepr = "!W:(%s...)" % self.notChars[:4]
+ − 1568
else:
+ − 1569
self.strRepr = "!W:(%s)" % self.notChars
+ − 1570
+ − 1571
return self.strRepr
+ − 1572
+ − 1573
class White(Token):
+ − 1574
"""Special matching class for matching whitespace. Normally, whitespace is ignored
+ − 1575
by pyparsing grammars. This class is included when some whitespace structures
+ − 1576
are significant. Define with a string containing the whitespace characters to be
+ − 1577
matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
+ − 1578
as defined for the Word class."""
+ − 1579
whiteStrs = {
+ − 1580
" " : "<SPC>",
+ − 1581
"\t": "<TAB>",
+ − 1582
"\n": "<LF>",
+ − 1583
"\r": "<CR>",
+ − 1584
"\f": "<FF>",
+ − 1585
}
+ − 1586
def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
+ − 1587
super(White,self).__init__()
+ − 1588
self.matchWhite = ws
+ − 1589
self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
+ − 1590
#~ self.leaveWhitespace()
+ − 1591
self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
+ − 1592
self.mayReturnEmpty = True
+ − 1593
self.errmsg = "Expected " + self.name
+ − 1594
self.myException.msg = self.errmsg
+ − 1595
+ − 1596
self.minLen = min
+ − 1597
+ − 1598
if max > 0:
+ − 1599
self.maxLen = max
+ − 1600
else:
+ − 1601
self.maxLen = sys.maxint
+ − 1602
+ − 1603
if exact > 0:
+ − 1604
self.maxLen = exact
+ − 1605
self.minLen = exact
+ − 1606
+ − 1607
def parseImpl( self, instring, loc, doActions=True ):
+ − 1608
if not(instring[ loc ] in self.matchWhite):
+ − 1609
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1610
exc = self.myException
+ − 1611
exc.loc = loc
+ − 1612
exc.pstr = instring
+ − 1613
raise exc
+ − 1614
start = loc
+ − 1615
loc += 1
+ − 1616
maxloc = start + self.maxLen
+ − 1617
maxloc = min( maxloc, len(instring) )
+ − 1618
while loc < maxloc and instring[loc] in self.matchWhite:
+ − 1619
loc += 1
+ − 1620
+ − 1621
if loc - start < self.minLen:
+ − 1622
#~ raise ParseException( instring, loc, self.errmsg )
+ − 1623
exc = self.myException
+ − 1624
exc.loc = loc
+ − 1625
exc.pstr = instring
+ − 1626
raise exc
+ − 1627
+ − 1628
return loc, instring[start:loc]
+ − 1629
+ − 1630
+ − 1631
class PositionToken(Token):
+ − 1632
def __init__( self ):
+ − 1633
super(PositionToken,self).__init__()
+ − 1634
self.name=self.__class__.__name__
+ − 1635
self.mayReturnEmpty = True
+ − 1636
self.mayIndexError = False
+ − 1637
+ − 1638
class GoToColumn(PositionToken):
+ − 1639
"""Token to advance to a specific column of input text; useful for tabular report scraping."""
+ − 1640
def __init__( self, colno ):
+ − 1641
super(GoToColumn,self).__init__()
+ − 1642
self.col = colno
+ − 1643
+ − 1644
def preParse( self, instring, loc ):
+ − 1645
if col(loc,instring) != self.col:
+ − 1646
instrlen = len(instring)
+ − 1647
if self.ignoreExprs:
+ − 1648
loc = self.skipIgnorables( instring, loc )
+ − 1649
while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
+ − 1650
loc += 1
+ − 1651
return loc
+ − 1652
+ − 1653
def parseImpl( self, instring, loc, doActions=True ):
+ − 1654
thiscol = col( loc, instring )
+ − 1655
if thiscol > self.col:
+ − 1656
raise ParseException( instring, loc, "Text not in expected column", self )
+ − 1657
newloc = loc + self.col - thiscol
+ − 1658
ret = instring[ loc: newloc ]
+ − 1659
return newloc, ret
+ − 1660
+ − 1661
class LineStart(PositionToken):
+ − 1662
"""Matches if current position is at the beginning of a line within the parse string"""
+ − 1663
def __init__( self ):
+ − 1664
super(LineStart,self).__init__()
+ − 1665
self.setWhitespaceChars( " \t" )
+ − 1666
self.errmsg = "Expected start of line"
+ − 1667
self.myException.msg = self.errmsg
+ − 1668
+ − 1669
def preParse( self, instring, loc ):
+ − 1670
preloc = super(LineStart,self).preParse(instring,loc)
+ − 1671
if instring[preloc] == "\n":
+ − 1672
loc += 1
+ − 1673
return loc
+ − 1674
+ − 1675
def parseImpl( self, instring, loc, doActions=True ):
+ − 1676
if not( loc==0 or
+ − 1677
(loc == self.preParse( instring, 0 )) or
+ − 1678
(instring[loc-1] == "\n") ): #col(loc, instring) != 1:
+ − 1679
#~ raise ParseException( instring, loc, "Expected start of line" )
+ − 1680
exc = self.myException
+ − 1681
exc.loc = loc
+ − 1682
exc.pstr = instring
+ − 1683
raise exc
+ − 1684
return loc, []
+ − 1685
+ − 1686
class LineEnd(PositionToken):
+ − 1687
"""Matches if current position is at the end of a line within the parse string"""
+ − 1688
def __init__( self ):
+ − 1689
super(LineEnd,self).__init__()
+ − 1690
self.setWhitespaceChars( " \t" )
+ − 1691
self.errmsg = "Expected end of line"
+ − 1692
self.myException.msg = self.errmsg
+ − 1693
+ − 1694
def parseImpl( self, instring, loc, doActions=True ):
+ − 1695
if loc<len(instring):
+ − 1696
if instring[loc] == "\n":
+ − 1697
return loc+1, "\n"
+ − 1698
else:
+ − 1699
#~ raise ParseException( instring, loc, "Expected end of line" )
+ − 1700
exc = self.myException
+ − 1701
exc.loc = loc
+ − 1702
exc.pstr = instring
+ − 1703
raise exc
+ − 1704
elif loc == len(instring):
+ − 1705
return loc+1, []
+ − 1706
else:
+ − 1707
exc = self.myException
+ − 1708
exc.loc = loc
+ − 1709
exc.pstr = instring
+ − 1710
raise exc
+ − 1711
+ − 1712
class StringStart(PositionToken):
+ − 1713
"""Matches if current position is at the beginning of the parse string"""
+ − 1714
def __init__( self ):
+ − 1715
super(StringStart,self).__init__()
+ − 1716
self.errmsg = "Expected start of text"
+ − 1717
self.myException.msg = self.errmsg
+ − 1718
+ − 1719
def parseImpl( self, instring, loc, doActions=True ):
+ − 1720
if loc != 0:
+ − 1721
# see if entire string up to here is just whitespace and ignoreables
+ − 1722
if loc != self.preParse( instring, 0 ):
+ − 1723
#~ raise ParseException( instring, loc, "Expected start of text" )
+ − 1724
exc = self.myException
+ − 1725
exc.loc = loc
+ − 1726
exc.pstr = instring
+ − 1727
raise exc
+ − 1728
return loc, []
+ − 1729
+ − 1730
class StringEnd(PositionToken):
+ − 1731
"""Matches if current position is at the end of the parse string"""
+ − 1732
def __init__( self ):
+ − 1733
super(StringEnd,self).__init__()
+ − 1734
self.errmsg = "Expected end of text"
+ − 1735
self.myException.msg = self.errmsg
+ − 1736
+ − 1737
def parseImpl( self, instring, loc, doActions=True ):
+ − 1738
if loc < len(instring):
+ − 1739
#~ raise ParseException( instring, loc, "Expected end of text" )
+ − 1740
exc = self.myException
+ − 1741
exc.loc = loc
+ − 1742
exc.pstr = instring
+ − 1743
raise exc
+ − 1744
elif loc == len(instring):
+ − 1745
return loc+1, []
+ − 1746
else:
+ − 1747
exc = self.myException
+ − 1748
exc.loc = loc
+ − 1749
exc.pstr = instring
+ − 1750
raise exc
+ − 1751
+ − 1752
+ − 1753
class ParseExpression(ParserElement):
+ − 1754
"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ − 1755
def __init__( self, exprs, savelist = False ):
+ − 1756
super(ParseExpression,self).__init__(savelist)
+ − 1757
if isinstance( exprs, list ):
+ − 1758
self.exprs = exprs
+ − 1759
elif isinstance( exprs, basestring ):
+ − 1760
self.exprs = [ Literal( exprs ) ]
+ − 1761
else:
+ − 1762
self.exprs = [ exprs ]
+ − 1763
+ − 1764
def __getitem__( self, i ):
+ − 1765
return self.exprs[i]
+ − 1766
+ − 1767
def append( self, other ):
+ − 1768
self.exprs.append( other )
+ − 1769
self.strRepr = None
+ − 1770
return self
+ − 1771
+ − 1772
def leaveWhitespace( self ):
+ − 1773
"""Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
+ − 1774
all contained expressions."""
+ − 1775
self.skipWhitespace = False
+ − 1776
self.exprs = [ e.copy() for e in self.exprs ]
+ − 1777
for e in self.exprs:
+ − 1778
e.leaveWhitespace()
+ − 1779
return self
+ − 1780
+ − 1781
def ignore( self, other ):
+ − 1782
if isinstance( other, Suppress ):
+ − 1783
if other not in self.ignoreExprs:
+ − 1784
super( ParseExpression, self).ignore( other )
+ − 1785
for e in self.exprs:
+ − 1786
e.ignore( self.ignoreExprs[-1] )
+ − 1787
else:
+ − 1788
super( ParseExpression, self).ignore( other )
+ − 1789
for e in self.exprs:
+ − 1790
e.ignore( self.ignoreExprs[-1] )
+ − 1791
return self
+ − 1792
+ − 1793
def __str__( self ):
+ − 1794
try:
+ − 1795
return super(ParseExpression,self).__str__()
+ − 1796
except:
+ − 1797
pass
+ − 1798
+ − 1799
if self.strRepr is None:
+ − 1800
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
+ − 1801
return self.strRepr
+ − 1802
+ − 1803
def streamline( self ):
+ − 1804
super(ParseExpression,self).streamline()
+ − 1805
+ − 1806
for e in self.exprs:
+ − 1807
e.streamline()
+ − 1808
+ − 1809
# collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
+ − 1810
# but only if there are no parse actions or resultsNames on the nested And's
+ − 1811
# (likewise for Or's and MatchFirst's)
+ − 1812
if ( len(self.exprs) == 2 ):
+ − 1813
other = self.exprs[0]
+ − 1814
if ( isinstance( other, self.__class__ ) and
+ − 1815
not(other.parseAction) and
+ − 1816
other.resultsName is None and
+ − 1817
not other.debug ):
+ − 1818
self.exprs = other.exprs[:] + [ self.exprs[1] ]
+ − 1819
self.strRepr = None
+ − 1820
self.mayReturnEmpty |= other.mayReturnEmpty
+ − 1821
self.mayIndexError |= other.mayIndexError
+ − 1822
+ − 1823
other = self.exprs[-1]
+ − 1824
if ( isinstance( other, self.__class__ ) and
+ − 1825
not(other.parseAction) and
+ − 1826
other.resultsName is None and
+ − 1827
not other.debug ):
+ − 1828
self.exprs = self.exprs[:-1] + other.exprs[:]
+ − 1829
self.strRepr = None
+ − 1830
self.mayReturnEmpty |= other.mayReturnEmpty
+ − 1831
self.mayIndexError |= other.mayIndexError
+ − 1832
+ − 1833
return self
+ − 1834
+ − 1835
def setResultsName( self, name, listAllMatches=False ):
+ − 1836
ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
+ − 1837
return ret
+ − 1838
+ − 1839
def validate( self, validateTrace=[] ):
+ − 1840
tmp = validateTrace[:]+[self]
+ − 1841
for e in self.exprs:
+ − 1842
e.validate(tmp)
+ − 1843
self.checkRecursion( [] )
+ − 1844
+ − 1845
class And(ParseExpression):
+ − 1846
"""Requires all given ParseExpressions to be found in the given order.
+ − 1847
Expressions may be separated by whitespace.
+ − 1848
May be constructed using the '+' operator.
+ − 1849
"""
+ − 1850
def __init__( self, exprs, savelist = True ):
+ − 1851
super(And,self).__init__(exprs, savelist)
+ − 1852
self.mayReturnEmpty = True
+ − 1853
for e in self.exprs:
+ − 1854
if not e.mayReturnEmpty:
+ − 1855
self.mayReturnEmpty = False
+ − 1856
break
+ − 1857
self.setWhitespaceChars( exprs[0].whiteChars )
+ − 1858
self.skipWhitespace = exprs[0].skipWhitespace
+ − 1859
+ − 1860
def parseImpl( self, instring, loc, doActions=True ):
+ − 1861
# pass False as last arg to _parse for first element, since we already
+ − 1862
# pre-parsed the string as part of our And pre-parsing
+ − 1863
loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
+ − 1864
for e in self.exprs[1:]:
+ − 1865
loc, exprtokens = e._parse( instring, loc, doActions )
+ − 1866
if exprtokens or exprtokens.keys():
+ − 1867
resultlist += exprtokens
+ − 1868
return loc, resultlist
+ − 1869
+ − 1870
def __iadd__(self, other ):
+ − 1871
if isinstance( other, basestring ):
+ − 1872
other = Literal( other )
+ − 1873
return self.append( other ) #And( [ self, other ] )
+ − 1874
+ − 1875
def checkRecursion( self, parseElementList ):
+ − 1876
subRecCheckList = parseElementList[:] + [ self ]
+ − 1877
for e in self.exprs:
+ − 1878
e.checkRecursion( subRecCheckList )
+ − 1879
if not e.mayReturnEmpty:
+ − 1880
break
+ − 1881
+ − 1882
def __str__( self ):
+ − 1883
if hasattr(self,"name"):
+ − 1884
return self.name
+ − 1885
+ − 1886
if self.strRepr is None:
+ − 1887
self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+ − 1888
+ − 1889
return self.strRepr
+ − 1890
+ − 1891
+ − 1892
class Or(ParseExpression):
+ − 1893
"""Requires that at least one ParseExpression is found.
+ − 1894
If two expressions match, the expression that matches the longest string will be used.
+ − 1895
May be constructed using the '^' operator.
+ − 1896
"""
+ − 1897
def __init__( self, exprs, savelist = False ):
+ − 1898
super(Or,self).__init__(exprs, savelist)
+ − 1899
self.mayReturnEmpty = False
+ − 1900
for e in self.exprs:
+ − 1901
if e.mayReturnEmpty:
+ − 1902
self.mayReturnEmpty = True
+ − 1903
break
+ − 1904
+ − 1905
def parseImpl( self, instring, loc, doActions=True ):
+ − 1906
maxExcLoc = -1
+ − 1907
maxMatchLoc = -1
+ − 1908
for e in self.exprs:
+ − 1909
try:
+ − 1910
loc2 = e.tryParse( instring, loc )
+ − 1911
except ParseException, err:
+ − 1912
if err.loc > maxExcLoc:
+ − 1913
maxException = err
+ − 1914
maxExcLoc = err.loc
+ − 1915
except IndexError, err:
+ − 1916
if len(instring) > maxExcLoc:
+ − 1917
maxException = ParseException(instring,len(instring),e.errmsg,self)
+ − 1918
maxExcLoc = len(instring)
+ − 1919
else:
+ − 1920
if loc2 > maxMatchLoc:
+ − 1921
maxMatchLoc = loc2
+ − 1922
maxMatchExp = e
+ − 1923
+ − 1924
if maxMatchLoc < 0:
+ − 1925
if self.exprs:
+ − 1926
raise maxException
+ − 1927
else:
+ − 1928
raise ParseException(instring, loc, "no defined alternatives to match", self)
+ − 1929
+ − 1930
return maxMatchExp._parse( instring, loc, doActions )
+ − 1931
+ − 1932
def __ixor__(self, other ):
+ − 1933
if isinstance( other, basestring ):
+ − 1934
other = Literal( other )
+ − 1935
return self.append( other ) #Or( [ self, other ] )
+ − 1936
+ − 1937
def __str__( self ):
+ − 1938
if hasattr(self,"name"):
+ − 1939
return self.name
+ − 1940
+ − 1941
if self.strRepr is None:
+ − 1942
self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+ − 1943
+ − 1944
return self.strRepr
+ − 1945
+ − 1946
def checkRecursion( self, parseElementList ):
+ − 1947
subRecCheckList = parseElementList[:] + [ self ]
+ − 1948
for e in self.exprs:
+ − 1949
e.checkRecursion( subRecCheckList )
+ − 1950
+ − 1951
+ − 1952
class MatchFirst(ParseExpression):
+ − 1953
"""Requires that at least one ParseExpression is found.
+ − 1954
If two expressions match, the first one listed is the one that will match.
+ − 1955
May be constructed using the '|' operator.
+ − 1956
"""
+ − 1957
def __init__( self, exprs, savelist = False ):
+ − 1958
super(MatchFirst,self).__init__(exprs, savelist)
+ − 1959
if exprs:
+ − 1960
self.mayReturnEmpty = False
+ − 1961
for e in self.exprs:
+ − 1962
if e.mayReturnEmpty:
+ − 1963
self.mayReturnEmpty = True
+ − 1964
break
+ − 1965
else:
+ − 1966
self.mayReturnEmpty = True
+ − 1967
+ − 1968
def parseImpl( self, instring, loc, doActions=True ):
+ − 1969
maxExcLoc = -1
+ − 1970
for e in self.exprs:
+ − 1971
try:
+ − 1972
ret = e._parse( instring, loc, doActions )
+ − 1973
return ret
+ − 1974
except ParseException, err:
+ − 1975
if err.loc > maxExcLoc:
+ − 1976
maxException = err
+ − 1977
maxExcLoc = err.loc
+ − 1978
except IndexError, err:
+ − 1979
if len(instring) > maxExcLoc:
+ − 1980
maxException = ParseException(instring,len(instring),e.errmsg,self)
+ − 1981
maxExcLoc = len(instring)
+ − 1982
+ − 1983
# only got here if no expression matched, raise exception for match that made it the furthest
+ − 1984
else:
+ − 1985
if self.exprs:
+ − 1986
raise maxException
+ − 1987
else:
+ − 1988
raise ParseException(instring, loc, "no defined alternatives to match", self)
+ − 1989
+ − 1990
def __ior__(self, other ):
+ − 1991
if isinstance( other, basestring ):
+ − 1992
other = Literal( other )
+ − 1993
return self.append( other ) #MatchFirst( [ self, other ] )
+ − 1994
+ − 1995
def __str__( self ):
+ − 1996
if hasattr(self,"name"):
+ − 1997
return self.name
+ − 1998
+ − 1999
if self.strRepr is None:
+ − 2000
self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+ − 2001
+ − 2002
return self.strRepr
+ − 2003
+ − 2004
def checkRecursion( self, parseElementList ):
+ − 2005
subRecCheckList = parseElementList[:] + [ self ]
+ − 2006
for e in self.exprs:
+ − 2007
e.checkRecursion( subRecCheckList )
+ − 2008
+ − 2009
class Each(ParseExpression):
+ − 2010
"""Requires all given ParseExpressions to be found, but in any order.
+ − 2011
Expressions may be separated by whitespace.
+ − 2012
May be constructed using the '&' operator.
+ − 2013
"""
+ − 2014
def __init__( self, exprs, savelist = True ):
+ − 2015
super(Each,self).__init__(exprs, savelist)
+ − 2016
self.mayReturnEmpty = True
+ − 2017
for e in self.exprs:
+ − 2018
if not e.mayReturnEmpty:
+ − 2019
self.mayReturnEmpty = False
+ − 2020
break
+ − 2021
self.skipWhitespace = True
+ − 2022
self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
+ − 2023
self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
+ − 2024
self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
+ − 2025
self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
+ − 2026
self.required += self.multirequired
+ − 2027
+ − 2028
def parseImpl( self, instring, loc, doActions=True ):
+ − 2029
tmpLoc = loc
+ − 2030
tmpReqd = self.required[:]
+ − 2031
tmpOpt = self.optionals[:]
+ − 2032
matchOrder = []
+ − 2033
+ − 2034
keepMatching = True
+ − 2035
while keepMatching:
+ − 2036
tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
+ − 2037
failed = []
+ − 2038
for e in tmpExprs:
+ − 2039
try:
+ − 2040
tmpLoc = e.tryParse( instring, tmpLoc )
+ − 2041
except ParseException:
+ − 2042
failed.append(e)
+ − 2043
else:
+ − 2044
matchOrder.append(e)
+ − 2045
if e in tmpReqd:
+ − 2046
tmpReqd.remove(e)
+ − 2047
elif e in tmpOpt:
+ − 2048
tmpOpt.remove(e)
+ − 2049
if len(failed) == len(tmpExprs):
+ − 2050
keepMatching = False
+ − 2051
+ − 2052
if tmpReqd:
+ − 2053
missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
+ − 2054
raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
+ − 2055
+ − 2056
resultlist = []
+ − 2057
for e in matchOrder:
+ − 2058
loc,results = e._parse(instring,loc,doActions)
+ − 2059
resultlist.append(results)
+ − 2060
+ − 2061
finalResults = ParseResults([])
+ − 2062
for r in resultlist:
+ − 2063
dups = {}
+ − 2064
for k in r.keys():
+ − 2065
if k in finalResults.keys():
+ − 2066
tmp = ParseResults(finalResults[k])
+ − 2067
tmp += ParseResults(r[k])
+ − 2068
dups[k] = tmp
+ − 2069
finalResults += ParseResults(r)
+ − 2070
for k,v in dups.items():
+ − 2071
finalResults[k] = v
+ − 2072
return loc, finalResults
+ − 2073
+ − 2074
def __str__( self ):
+ − 2075
if hasattr(self,"name"):
+ − 2076
return self.name
+ − 2077
+ − 2078
if self.strRepr is None:
+ − 2079
self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
+ − 2080
+ − 2081
return self.strRepr
+ − 2082
+ − 2083
def checkRecursion( self, parseElementList ):
+ − 2084
subRecCheckList = parseElementList[:] + [ self ]
+ − 2085
for e in self.exprs:
+ − 2086
e.checkRecursion( subRecCheckList )
+ − 2087
+ − 2088
+ − 2089
class ParseElementEnhance(ParserElement):
+ − 2090
"""Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
+ − 2091
def __init__( self, expr, savelist=False ):
+ − 2092
super(ParseElementEnhance,self).__init__(savelist)
+ − 2093
if isinstance( expr, basestring ):
+ − 2094
expr = Literal(expr)
+ − 2095
self.expr = expr
+ − 2096
self.strRepr = None
+ − 2097
if expr is not None:
+ − 2098
self.mayIndexError = expr.mayIndexError
+ − 2099
self.setWhitespaceChars( expr.whiteChars )
+ − 2100
self.skipWhitespace = expr.skipWhitespace
+ − 2101
self.saveAsList = expr.saveAsList
+ − 2102
+ − 2103
def parseImpl( self, instring, loc, doActions=True ):
+ − 2104
if self.expr is not None:
+ − 2105
return self.expr._parse( instring, loc, doActions, callPreParse=False )
+ − 2106
else:
+ − 2107
raise ParseException("",loc,self.errmsg,self)
+ − 2108
+ − 2109
def leaveWhitespace( self ):
+ − 2110
self.skipWhitespace = False
+ − 2111
self.expr = self.expr.copy()
+ − 2112
if self.expr is not None:
+ − 2113
self.expr.leaveWhitespace()
+ − 2114
return self
+ − 2115
+ − 2116
def ignore( self, other ):
+ − 2117
if isinstance( other, Suppress ):
+ − 2118
if other not in self.ignoreExprs:
+ − 2119
super( ParseElementEnhance, self).ignore( other )
+ − 2120
if self.expr is not None:
+ − 2121
self.expr.ignore( self.ignoreExprs[-1] )
+ − 2122
else:
+ − 2123
super( ParseElementEnhance, self).ignore( other )
+ − 2124
if self.expr is not None:
+ − 2125
self.expr.ignore( self.ignoreExprs[-1] )
+ − 2126
return self
+ − 2127
+ − 2128
def streamline( self ):
+ − 2129
super(ParseElementEnhance,self).streamline()
+ − 2130
if self.expr is not None:
+ − 2131
self.expr.streamline()
+ − 2132
return self
+ − 2133
+ − 2134
def checkRecursion( self, parseElementList ):
+ − 2135
if self in parseElementList:
+ − 2136
raise RecursiveGrammarException( parseElementList+[self] )
+ − 2137
subRecCheckList = parseElementList[:] + [ self ]
+ − 2138
if self.expr is not None:
+ − 2139
self.expr.checkRecursion( subRecCheckList )
+ − 2140
+ − 2141
def validate( self, validateTrace=[] ):
+ − 2142
tmp = validateTrace[:]+[self]
+ − 2143
if self.expr is not None:
+ − 2144
self.expr.validate(tmp)
+ − 2145
self.checkRecursion( [] )
+ − 2146
+ − 2147
def __str__( self ):
+ − 2148
try:
+ − 2149
return super(ParseElementEnhance,self).__str__()
+ − 2150
except:
+ − 2151
pass
+ − 2152
+ − 2153
if self.strRepr is None and self.expr is not None:
+ − 2154
self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
+ − 2155
return self.strRepr
+ − 2156
+ − 2157
+ − 2158
class FollowedBy(ParseElementEnhance):
+ − 2159
"""Lookahead matching of the given parse expression. FollowedBy
+ − 2160
does *not* advance the parsing position within the input string, it only
+ − 2161
verifies that the specified parse expression matches at the current
+ − 2162
position. FollowedBy always returns a null token list."""
+ − 2163
def __init__( self, expr ):
+ − 2164
super(FollowedBy,self).__init__(expr)
+ − 2165
self.mayReturnEmpty = True
+ − 2166
+ − 2167
def parseImpl( self, instring, loc, doActions=True ):
+ − 2168
self.expr.tryParse( instring, loc )
+ − 2169
return loc, []
+ − 2170
+ − 2171
+ − 2172
class NotAny(ParseElementEnhance):
+ − 2173
"""Lookahead to disallow matching with the given parse expression. NotAny
+ − 2174
does *not* advance the parsing position within the input string, it only
+ − 2175
verifies that the specified parse expression does *not* match at the current
+ − 2176
position. Also, NotAny does *not* skip over leading whitespace. NotAny
+ − 2177
always returns a null token list. May be constructed using the '~' operator."""
+ − 2178
def __init__( self, expr ):
+ − 2179
super(NotAny,self).__init__(expr)
+ − 2180
#~ self.leaveWhitespace()
+ − 2181
self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
+ − 2182
self.mayReturnEmpty = True
+ − 2183
self.errmsg = "Found unwanted token, "+_ustr(self.expr)
+ − 2184
self.myException = ParseException("",0,self.errmsg,self)
+ − 2185
+ − 2186
def parseImpl( self, instring, loc, doActions=True ):
+ − 2187
try:
+ − 2188
self.expr.tryParse( instring, loc )
+ − 2189
except (ParseException,IndexError):
+ − 2190
pass
+ − 2191
else:
+ − 2192
#~ raise ParseException(instring, loc, self.errmsg )
+ − 2193
exc = self.myException
+ − 2194
exc.loc = loc
+ − 2195
exc.pstr = instring
+ − 2196
raise exc
+ − 2197
return loc, []
+ − 2198
+ − 2199
def __str__( self ):
+ − 2200
if hasattr(self,"name"):
+ − 2201
return self.name
+ − 2202
+ − 2203
if self.strRepr is None:
+ − 2204
self.strRepr = "~{" + _ustr(self.expr) + "}"
+ − 2205
+ − 2206
return self.strRepr
+ − 2207
+ − 2208
+ − 2209
class ZeroOrMore(ParseElementEnhance):
+ − 2210
"""Optional repetition of zero or more of the given expression."""
+ − 2211
def __init__( self, expr ):
+ − 2212
super(ZeroOrMore,self).__init__(expr)
+ − 2213
self.mayReturnEmpty = True
+ − 2214
+ − 2215
def parseImpl( self, instring, loc, doActions=True ):
+ − 2216
tokens = []
+ − 2217
try:
+ − 2218
loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ − 2219
hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ − 2220
while 1:
+ − 2221
if hasIgnoreExprs:
+ − 2222
preloc = self.skipIgnorables( instring, loc )
+ − 2223
else:
+ − 2224
preloc = loc
+ − 2225
loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ − 2226
if tmptokens or tmptokens.keys():
+ − 2227
tokens += tmptokens
+ − 2228
except (ParseException,IndexError):
+ − 2229
pass
+ − 2230
+ − 2231
return loc, tokens
+ − 2232
+ − 2233
def __str__( self ):
+ − 2234
if hasattr(self,"name"):
+ − 2235
return self.name
+ − 2236
+ − 2237
if self.strRepr is None:
+ − 2238
self.strRepr = "[" + _ustr(self.expr) + "]..."
+ − 2239
+ − 2240
return self.strRepr
+ − 2241
+ − 2242
def setResultsName( self, name, listAllMatches=False ):
+ − 2243
ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
+ − 2244
ret.saveAsList = True
+ − 2245
return ret
+ − 2246
+ − 2247
+ − 2248
class OneOrMore(ParseElementEnhance):
+ − 2249
"""Repetition of one or more of the given expression."""
+ − 2250
def parseImpl( self, instring, loc, doActions=True ):
+ − 2251
# must be at least one
+ − 2252
loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ − 2253
try:
+ − 2254
hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
+ − 2255
while 1:
+ − 2256
if hasIgnoreExprs:
+ − 2257
preloc = self.skipIgnorables( instring, loc )
+ − 2258
else:
+ − 2259
preloc = loc
+ − 2260
loc, tmptokens = self.expr._parse( instring, preloc, doActions )
+ − 2261
if tmptokens or tmptokens.keys():
+ − 2262
tokens += tmptokens
+ − 2263
except (ParseException,IndexError):
+ − 2264
pass
+ − 2265
+ − 2266
return loc, tokens
+ − 2267
+ − 2268
def __str__( self ):
+ − 2269
if hasattr(self,"name"):
+ − 2270
return self.name
+ − 2271
+ − 2272
if self.strRepr is None:
+ − 2273
self.strRepr = "{" + _ustr(self.expr) + "}..."
+ − 2274
+ − 2275
return self.strRepr
+ − 2276
+ − 2277
def setResultsName( self, name, listAllMatches=False ):
+ − 2278
ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
+ − 2279
ret.saveAsList = True
+ − 2280
return ret
+ − 2281
+ − 2282
class _NullToken(object):
+ − 2283
def __bool__(self):
+ − 2284
return False
+ − 2285
def __str__(self):
+ − 2286
return ""
+ − 2287
+ − 2288
_optionalNotMatched = _NullToken()
+ − 2289
class Optional(ParseElementEnhance):
+ − 2290
"""Optional matching of the given expression.
+ − 2291
A default return string can also be specified, if the optional expression
+ − 2292
is not found.
+ − 2293
"""
+ − 2294
def __init__( self, exprs, default=_optionalNotMatched ):
+ − 2295
super(Optional,self).__init__( exprs, savelist=False )
+ − 2296
self.defaultValue = default
+ − 2297
self.mayReturnEmpty = True
+ − 2298
+ − 2299
def parseImpl( self, instring, loc, doActions=True ):
+ − 2300
try:
+ − 2301
loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
+ − 2302
except (ParseException,IndexError):
+ − 2303
if self.defaultValue is not _optionalNotMatched:
+ − 2304
tokens = [ self.defaultValue ]
+ − 2305
else:
+ − 2306
tokens = []
+ − 2307
return loc, tokens
+ − 2308
+ − 2309
def __str__( self ):
+ − 2310
if hasattr(self,"name"):
+ − 2311
return self.name
+ − 2312
+ − 2313
if self.strRepr is None:
+ − 2314
self.strRepr = "[" + _ustr(self.expr) + "]"
+ − 2315
+ − 2316
return self.strRepr
+ − 2317
+ − 2318
+ − 2319
class SkipTo(ParseElementEnhance):
+ − 2320
"""Token for skipping over all undefined text until the matched expression is found.
+ − 2321
If include is set to true, the matched expression is also consumed. The ignore
+ − 2322
argument is used to define grammars (typically quoted strings and comments) that
+ − 2323
might contain false matches.
+ − 2324
"""
+ − 2325
def __init__( self, other, include=False, ignore=None ):
+ − 2326
super( SkipTo, self ).__init__( other )
+ − 2327
if ignore is not None:
+ − 2328
self.expr = self.expr.copy()
+ − 2329
self.expr.ignore(ignore)
+ − 2330
self.mayReturnEmpty = True
+ − 2331
self.mayIndexError = False
+ − 2332
self.includeMatch = include
+ − 2333
self.asList = False
+ − 2334
self.errmsg = "No match found for "+_ustr(self.expr)
+ − 2335
self.myException = ParseException("",0,self.errmsg,self)
+ − 2336
+ − 2337
def parseImpl( self, instring, loc, doActions=True ):
+ − 2338
startLoc = loc
+ − 2339
instrlen = len(instring)
+ − 2340
expr = self.expr
+ − 2341
while loc <= instrlen:
+ − 2342
try:
+ − 2343
loc = expr.skipIgnorables( instring, loc )
+ − 2344
expr._parse( instring, loc, doActions=False, callPreParse=False )
+ − 2345
if self.includeMatch:
+ − 2346
skipText = instring[startLoc:loc]
+ − 2347
loc,mat = expr._parse(instring,loc)
+ − 2348
if mat:
+ − 2349
return loc, [ skipText, mat ]
+ − 2350
else:
+ − 2351
return loc, [ skipText ]
+ − 2352
else:
+ − 2353
return loc, [ instring[startLoc:loc] ]
+ − 2354
except (ParseException,IndexError):
+ − 2355
loc += 1
+ − 2356
exc = self.myException
+ − 2357
exc.loc = loc
+ − 2358
exc.pstr = instring
+ − 2359
raise exc
+ − 2360
+ − 2361
class Forward(ParseElementEnhance):
+ − 2362
"""Forward declaration of an expression to be defined later -
+ − 2363
used for recursive grammars, such as algebraic infix notation.
+ − 2364
When the expression is known, it is assigned to the Forward variable using the '<<' operator.
+ − 2365
+ − 2366
Note: take care when assigning to Forward not to overlook precedence of operators.
+ − 2367
Specifically, '|' has a lower precedence than '<<', so that::
+ − 2368
fwdExpr << a | b | c
+ − 2369
will actually be evaluated as::
+ − 2370
(fwdExpr << a) | b | c
+ − 2371
thereby leaving b and c out as parseable alternatives. It is recommended that you
+ − 2372
explicitly group the values inserted into the Forward::
+ − 2373
fwdExpr << (a | b | c)
+ − 2374
"""
+ − 2375
def __init__( self, other=None ):
+ − 2376
super(Forward,self).__init__( other, savelist=False )
+ − 2377
+ − 2378
def __lshift__( self, other ):
+ − 2379
if isinstance( other, basestring ):
+ − 2380
other = Literal(other)
+ − 2381
self.expr = other
+ − 2382
self.mayReturnEmpty = other.mayReturnEmpty
+ − 2383
self.strRepr = None
+ − 2384
return self
+ − 2385
+ − 2386
def leaveWhitespace( self ):
+ − 2387
self.skipWhitespace = False
+ − 2388
return self
+ − 2389
+ − 2390
def streamline( self ):
+ − 2391
if not self.streamlined:
+ − 2392
self.streamlined = True
+ − 2393
if self.expr is not None:
+ − 2394
self.expr.streamline()
+ − 2395
return self
+ − 2396
+ − 2397
def validate( self, validateTrace=[] ):
+ − 2398
if self not in validateTrace:
+ − 2399
tmp = validateTrace[:]+[self]
+ − 2400
if self.expr is not None:
+ − 2401
self.expr.validate(tmp)
+ − 2402
self.checkRecursion([])
+ − 2403
+ − 2404
def __str__( self ):
+ − 2405
if hasattr(self,"name"):
+ − 2406
return self.name
+ − 2407
+ − 2408
self.__class__ = _ForwardNoRecurse
+ − 2409
try:
+ − 2410
if self.expr is not None:
+ − 2411
retString = _ustr(self.expr)
+ − 2412
else:
+ − 2413
retString = "None"
+ − 2414
finally:
+ − 2415
self.__class__ = Forward
+ − 2416
return "Forward: "+retString
+ − 2417
+ − 2418
def copy(self):
+ − 2419
if self.expr is not None:
+ − 2420
return super(Forward,self).copy()
+ − 2421
else:
+ − 2422
ret = Forward()
+ − 2423
ret << self
+ − 2424
return ret
+ − 2425
+ − 2426
class _ForwardNoRecurse(Forward):
+ − 2427
def __str__( self ):
+ − 2428
return "..."
+ − 2429
+ − 2430
class TokenConverter(ParseElementEnhance):
+ − 2431
"""Abstract subclass of ParseExpression, for converting parsed results."""
+ − 2432
def __init__( self, expr, savelist=False ):
+ − 2433
super(TokenConverter,self).__init__( expr )#, savelist )
+ − 2434
self.saveAsList = False
+ − 2435
+ − 2436
+ − 2437
class Upcase(TokenConverter):
+ − 2438
"""Converter to upper case all matching tokens."""
+ − 2439
def __init__(self, *args):
+ − 2440
super(Upcase,self).__init__(*args)
+ − 2441
warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
+ − 2442
DeprecationWarning,stacklevel=2)
+ − 2443
+ − 2444
def postParse( self, instring, loc, tokenlist ):
+ − 2445
return map( string.upper, tokenlist )
+ − 2446
+ − 2447
+ − 2448
class Combine(TokenConverter):
+ − 2449
"""Converter to concatenate all matching tokens to a single string.
+ − 2450
By default, the matching patterns must also be contiguous in the input string;
+ − 2451
this can be disabled by specifying 'adjacent=False' in the constructor.
+ − 2452
"""
+ − 2453
def __init__( self, expr, joinString="", adjacent=True ):
+ − 2454
super(Combine,self).__init__( expr )
+ − 2455
# suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
+ − 2456
if adjacent:
+ − 2457
self.leaveWhitespace()
+ − 2458
self.adjacent = adjacent
+ − 2459
self.skipWhitespace = True
+ − 2460
self.joinString = joinString
+ − 2461
+ − 2462
def ignore( self, other ):
+ − 2463
if self.adjacent:
+ − 2464
ParserElement.ignore(self, other)
+ − 2465
else:
+ − 2466
super( Combine, self).ignore( other )
+ − 2467
return self
+ − 2468
+ − 2469
def postParse( self, instring, loc, tokenlist ):
+ − 2470
retToks = tokenlist.copy()
+ − 2471
del retToks[:]
+ − 2472
retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
+ − 2473
+ − 2474
if self.resultsName and len(retToks.keys())>0:
+ − 2475
return [ retToks ]
+ − 2476
else:
+ − 2477
return retToks
+ − 2478
+ − 2479
class Group(TokenConverter):
+ − 2480
"""Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
+ − 2481
def __init__( self, expr ):
+ − 2482
super(Group,self).__init__( expr )
+ − 2483
self.saveAsList = True
+ − 2484
+ − 2485
def postParse( self, instring, loc, tokenlist ):
+ − 2486
return [ tokenlist ]
+ − 2487
+ − 2488
class Dict(TokenConverter):
+ − 2489
"""Converter to return a repetitive expression as a list, but also as a dictionary.
+ − 2490
Each element can also be referenced using the first token in the expression as its key.
+ − 2491
Useful for tabular report scraping when the first column can be used as a item key.
+ − 2492
"""
+ − 2493
def __init__( self, exprs ):
+ − 2494
super(Dict,self).__init__( exprs )
+ − 2495
self.saveAsList = True
+ − 2496
+ − 2497
def postParse( self, instring, loc, tokenlist ):
+ − 2498
for i,tok in enumerate(tokenlist):
+ − 2499
ikey = _ustr(tok[0]).strip()
+ − 2500
if len(tok)==1:
+ − 2501
tokenlist[ikey] = ("",i)
+ − 2502
elif len(tok)==2 and not isinstance(tok[1],ParseResults):
+ − 2503
tokenlist[ikey] = (tok[1],i)
+ − 2504
else:
+ − 2505
dictvalue = tok.copy() #ParseResults(i)
+ − 2506
del dictvalue[0]
+ − 2507
if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
+ − 2508
tokenlist[ikey] = (dictvalue,i)
+ − 2509
else:
+ − 2510
tokenlist[ikey] = (dictvalue[0],i)
+ − 2511
+ − 2512
if self.resultsName:
+ − 2513
return [ tokenlist ]
+ − 2514
else:
+ − 2515
return tokenlist
+ − 2516
+ − 2517
+ − 2518
class Suppress(TokenConverter):
+ − 2519
"""Converter for ignoring the results of a parsed expression."""
+ − 2520
def postParse( self, instring, loc, tokenlist ):
+ − 2521
return []
+ − 2522
+ − 2523
def suppress( self ):
+ − 2524
return self
+ − 2525
+ − 2526
+ − 2527
class OnlyOnce(object):
+ − 2528
"""Wrapper for parse actions, to ensure they are only called once."""
+ − 2529
def __init__(self, methodCall):
+ − 2530
self.callable = ParserElement.normalizeParseActionArgs(methodCall)
+ − 2531
self.called = False
+ − 2532
def __call__(self,s,l,t):
+ − 2533
if not self.called:
+ − 2534
results = self.callable(s,l,t)
+ − 2535
self.called = True
+ − 2536
return results
+ − 2537
raise ParseException(s,l,"")
+ − 2538
def reset():
+ − 2539
self.called = False
+ − 2540
+ − 2541
def traceParseAction(f):
+ − 2542
"""Decorator for debugging parse actions."""
+ − 2543
f = ParserElement.normalizeParseActionArgs(f)
+ − 2544
def z(*paArgs):
+ − 2545
thisFunc = f.func_name
+ − 2546
s,l,t = paArgs[-3:]
+ − 2547
if len(paArgs)>3:
+ − 2548
thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
+ − 2549
sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
+ − 2550
try:
+ − 2551
ret = f(*paArgs)
+ − 2552
except Exception, exc:
+ − 2553
sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
+ − 2554
raise
+ − 2555
sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
+ − 2556
return ret
+ − 2557
return z
+ − 2558
+ − 2559
#
+ − 2560
# global helpers
+ − 2561
#
+ − 2562
def delimitedList( expr, delim=",", combine=False ):
+ − 2563
"""Helper to define a delimited list of expressions - the delimiter defaults to ','.
+ − 2564
By default, the list elements and delimiters can have intervening whitespace, and
+ − 2565
comments, but this can be overridden by passing 'combine=True' in the constructor.
+ − 2566
If combine is set to True, the matching tokens are returned as a single token
+ − 2567
string, with the delimiters included; otherwise, the matching tokens are returned
+ − 2568
as a list of tokens, with the delimiters suppressed.
+ − 2569
"""
+ − 2570
dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
+ − 2571
if combine:
+ − 2572
return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
+ − 2573
else:
+ − 2574
return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
+ − 2575
+ − 2576
def countedArray( expr ):
+ − 2577
"""Helper to define a counted list of expressions.
+ − 2578
This helper defines a pattern of the form::
+ − 2579
integer expr expr expr...
+ − 2580
where the leading integer tells how many expr expressions follow.
+ − 2581
The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
+ − 2582
"""
+ − 2583
arrayExpr = Forward()
+ − 2584
def countFieldParseAction(s,l,t):
+ − 2585
n = int(t[0])
+ − 2586
arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
+ − 2587
return []
+ − 2588
return ( Word(nums).setParseAction(countFieldParseAction) + arrayExpr )
+ − 2589
+ − 2590
def _flatten(L):
+ − 2591
if type(L) is not list: return [L]
+ − 2592
if L == []: return L
+ − 2593
return _flatten(L[0]) + _flatten(L[1:])
+ − 2594
+ − 2595
def matchPreviousLiteral(expr):
+ − 2596
"""Helper to define an expression that is indirectly defined from
+ − 2597
the tokens matched in a previous expression, that is, it looks
+ − 2598
for a 'repeat' of a previous expression. For example::
+ − 2599
first = Word(nums)
+ − 2600
second = matchPreviousLiteral(first)
+ − 2601
matchExpr = first + ":" + second
+ − 2602
will match "1:1", but not "1:2". Because this matches a
+ − 2603
previous literal, will also match the leading "1:1" in "1:10".
+ − 2604
If this is not desired, use matchPreviousExpr.
+ − 2605
Do *not* use with packrat parsing enabled.
+ − 2606
"""
+ − 2607
rep = Forward()
+ − 2608
def copyTokenToRepeater(s,l,t):
+ − 2609
if t:
+ − 2610
if len(t) == 1:
+ − 2611
rep << t[0]
+ − 2612
else:
+ − 2613
# flatten t tokens
+ − 2614
tflat = _flatten(t.asList())
+ − 2615
rep << And( [ Literal(tt) for tt in tflat ] )
+ − 2616
else:
+ − 2617
rep << Empty()
+ − 2618
expr.addParseAction(copyTokenToRepeater)
+ − 2619
return rep
+ − 2620
+ − 2621
def matchPreviousExpr(expr):
+ − 2622
"""Helper to define an expression that is indirectly defined from
+ − 2623
the tokens matched in a previous expression, that is, it looks
+ − 2624
for a 'repeat' of a previous expression. For example::
+ − 2625
first = Word(nums)
+ − 2626
second = matchPreviousExpr(first)
+ − 2627
matchExpr = first + ":" + second
+ − 2628
will match "1:1", but not "1:2". Because this matches by
+ − 2629
expressions, will *not* match the leading "1:1" in "1:10";
+ − 2630
the expressions are evaluated first, and then compared, so
+ − 2631
"1" is compared with "10".
+ − 2632
Do *not* use with packrat parsing enabled.
+ − 2633
"""
+ − 2634
rep = Forward()
+ − 2635
e2 = expr.copy()
+ − 2636
rep << e2
+ − 2637
def copyTokenToRepeater(s,l,t):
+ − 2638
matchTokens = _flatten(t.asList())
+ − 2639
def mustMatchTheseTokens(s,l,t):
+ − 2640
theseTokens = _flatten(t.asList())
+ − 2641
if theseTokens != matchTokens:
+ − 2642
raise ParseException("",0,"")
+ − 2643
rep.setParseAction( mustMatchTheseTokens )
+ − 2644
expr.addParseAction(copyTokenToRepeater)
+ − 2645
return rep
+ − 2646
+ − 2647
def _escapeRegexRangeChars(s):
+ − 2648
#~ escape these chars: ^-]
+ − 2649
for c in r"\^-]":
+ − 2650
s = s.replace(c,"\\"+c)
+ − 2651
s = s.replace("\n",r"\n")
+ − 2652
s = s.replace("\t",r"\t")
+ − 2653
return _ustr(s)
+ − 2654
+ − 2655
def oneOf( strs, caseless=False, useRegex=True ):
+ − 2656
"""Helper to quickly define a set of alternative Literals, and makes sure to do
+ − 2657
longest-first testing when there is a conflict, regardless of the input order,
+ − 2658
but returns a MatchFirst for best performance.
+ − 2659
+ − 2660
Parameters:
+ − 2661
- strs - a string of space-delimited literals, or a list of string literals
+ − 2662
- caseless - (default=False) - treat all literals as caseless
+ − 2663
- useRegex - (default=True) - as an optimization, will generate a Regex
+ − 2664
object; otherwise, will generate a MatchFirst object (if caseless=True, or
+ − 2665
if creating a Regex raises an exception)
+ − 2666
"""
+ − 2667
if caseless:
+ − 2668
isequal = ( lambda a,b: a.upper() == b.upper() )
+ − 2669
masks = ( lambda a,b: b.upper().startswith(a.upper()) )
+ − 2670
parseElementClass = CaselessLiteral
+ − 2671
else:
+ − 2672
isequal = ( lambda a,b: a == b )
+ − 2673
masks = ( lambda a,b: b.startswith(a) )
+ − 2674
parseElementClass = Literal
+ − 2675
+ − 2676
if isinstance(strs,(list,tuple)):
+ − 2677
symbols = strs[:]
+ − 2678
elif isinstance(strs,basestring):
+ − 2679
symbols = strs.split()
+ − 2680
else:
+ − 2681
warnings.warn("Invalid argument to oneOf, expected string or list",
+ − 2682
SyntaxWarning, stacklevel=2)
+ − 2683
+ − 2684
i = 0
+ − 2685
while i < len(symbols)-1:
+ − 2686
cur = symbols[i]
+ − 2687
for j,other in enumerate(symbols[i+1:]):
+ − 2688
if ( isequal(other, cur) ):
+ − 2689
del symbols[i+j+1]
+ − 2690
break
+ − 2691
elif ( masks(cur, other) ):
+ − 2692
del symbols[i+j+1]
+ − 2693
symbols.insert(i,other)
+ − 2694
cur = other
+ − 2695
break
+ − 2696
else:
+ − 2697
i += 1
+ − 2698
+ − 2699
if not caseless and useRegex:
+ − 2700
#~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
+ − 2701
try:
+ − 2702
if len(symbols)==len("".join(symbols)):
+ − 2703
return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
+ − 2704
else:
+ − 2705
return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
+ − 2706
except:
+ − 2707
warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
+ − 2708
SyntaxWarning, stacklevel=2)
+ − 2709
+ − 2710
+ − 2711
# last resort, just use MatchFirst
+ − 2712
return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
+ − 2713
+ − 2714
def dictOf( key, value ):
+ − 2715
"""Helper to easily and clearly define a dictionary by specifying the respective patterns
+ − 2716
for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
+ − 2717
in the proper order. The key pattern can include delimiting markers or punctuation,
+ − 2718
as long as they are suppressed, thereby leaving the significant key text. The value
+ − 2719
pattern can include named results, so that the Dict results can include named token
+ − 2720
fields.
+ − 2721
"""
+ − 2722
return Dict( ZeroOrMore( Group ( key + value ) ) )
+ − 2723
+ − 2724
_bslash = "\\"
+ − 2725
printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
+ − 2726
+ − 2727
# convenience constants for positional expressions
+ − 2728
empty = Empty().setName("empty")
+ − 2729
lineStart = LineStart().setName("lineStart")
+ − 2730
lineEnd = LineEnd().setName("lineEnd")
+ − 2731
stringStart = StringStart().setName("stringStart")
+ − 2732
stringEnd = StringEnd().setName("stringEnd")
+ − 2733
+ − 2734
_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
+ − 2735
_printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
+ − 2736
_escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
+ − 2737
_escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
+ − 2738
_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
+ − 2739
_charRange = Group(_singleChar + Suppress("-") + _singleChar)
+ − 2740
_reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
+ − 2741
+ − 2742
_expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
+ − 2743
+ − 2744
def srange(s):
+ − 2745
r"""Helper to easily define string ranges for use in Word construction. Borrows
+ − 2746
syntax from regexp '[]' string range definitions::
+ − 2747
srange("[0-9]") -> "0123456789"
+ − 2748
srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
+ − 2749
srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
+ − 2750
The input string must be enclosed in []'s, and the returned string is the expanded
+ − 2751
character set joined into a single string.
+ − 2752
The values enclosed in the []'s may be::
+ − 2753
a single character
+ − 2754
an escaped character with a leading backslash (such as \- or \])
+ − 2755
an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
+ − 2756
an escaped octal character with a leading '\0' (\041, which is a '!' character)
+ − 2757
a range of any of the above, separated by a dash ('a-z', etc.)
+ − 2758
any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
+ − 2759
"""
+ − 2760
try:
+ − 2761
return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
+ − 2762
except:
+ − 2763
return ""
+ − 2764
+ − 2765
def replaceWith(replStr):
+ − 2766
"""Helper method for common parse actions that simply return a literal value. Especially
+ − 2767
useful when used with transformString().
+ − 2768
"""
+ − 2769
def _replFunc(*args):
+ − 2770
return [replStr]
+ − 2771
return _replFunc
+ − 2772
+ − 2773
def removeQuotes(s,l,t):
+ − 2774
"""Helper parse action for removing quotation marks from parsed quoted strings.
+ − 2775
To use, add this parse action to quoted string using::
+ − 2776
quotedString.setParseAction( removeQuotes )
+ − 2777
"""
+ − 2778
return t[0][1:-1]
+ − 2779
+ − 2780
def upcaseTokens(s,l,t):
+ − 2781
"""Helper parse action to convert tokens to upper case."""
+ − 2782
return [ str(tt).upper() for tt in t ]
+ − 2783
+ − 2784
def downcaseTokens(s,l,t):
+ − 2785
"""Helper parse action to convert tokens to lower case."""
+ − 2786
return [ str(tt).lower() for tt in t ]
+ − 2787
+ − 2788
def keepOriginalText(s,startLoc,t):
+ − 2789
import inspect
+ − 2790
"""Helper parse action to preserve original parsed text,
+ − 2791
overriding any nested parse actions."""
+ − 2792
f = inspect.stack()[1][0]
+ − 2793
try:
+ − 2794
endloc = f.f_locals["loc"]
+ − 2795
finally:
+ − 2796
del f
+ − 2797
return s[startLoc:endloc]
+ − 2798
+ − 2799
def _makeTags(tagStr, xml):
+ − 2800
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
+ − 2801
if isinstance(tagStr,basestring):
+ − 2802
resname = tagStr
+ − 2803
tagStr = Keyword(tagStr, caseless=not xml)
+ − 2804
else:
+ − 2805
resname = tagStr.name
+ − 2806
+ − 2807
tagAttrName = Word(alphas,alphanums+"_-")
+ − 2808
if (xml):
+ − 2809
tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
+ − 2810
openTag = Suppress("<") + tagStr + \
+ − 2811
Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
+ − 2812
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ − 2813
else:
+ − 2814
printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
+ − 2815
tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
+ − 2816
openTag = Suppress("<") + tagStr + \
+ − 2817
Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
+ − 2818
Suppress("=") + tagAttrValue ))) + \
+ − 2819
Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
+ − 2820
closeTag = Combine("</" + tagStr + ">")
+ − 2821
+ − 2822
openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
+ − 2823
closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
+ − 2824
+ − 2825
return openTag, closeTag
+ − 2826
+ − 2827
def makeHTMLTags(tagStr):
+ − 2828
"""Helper to construct opening and closing tag expressions for HTML, given a tag name"""
+ − 2829
return _makeTags( tagStr, False )
+ − 2830
+ − 2831
def makeXMLTags(tagStr):
+ − 2832
"""Helper to construct opening and closing tag expressions for XML, given a tag name"""
+ − 2833
return _makeTags( tagStr, True )
+ − 2834
+ − 2835
opAssoc = _Constants()
+ − 2836
opAssoc.LEFT = object()
+ − 2837
opAssoc.RIGHT = object()
+ − 2838
+ − 2839
def operatorPrecedence( baseExpr, opList ):
+ − 2840
"""Helper method for constructing grammars of expressions made up of
+ − 2841
operators working in a precedence hierarchy. Operators may be unary or
+ − 2842
binary, left- or right-associative. Parse actions can also be attached
+ − 2843
to operator expressions.
+ − 2844
+ − 2845
Parameters:
+ − 2846
- baseExpr - expression representing the most basic element for the nested
+ − 2847
- opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form
+ − 2848
(opExpr, numTerms, rightLeftAssoc, parseAction), where:
+ − 2849
- opExpr is the pyparsing expression for the operator;
+ − 2850
may also be a string, which will be converted to a Literal
+ − 2851
- numTerms is the number of terms for this operator (must
+ − 2852
be 1 or 2)
+ − 2853
- rightLeftAssoc is the indicator whether the operator is
+ − 2854
right or left associative, using the pyparsing-defined
+ − 2855
constants opAssoc.RIGHT and opAssoc.LEFT.
+ − 2856
- parseAction is the parse action to be associated with
+ − 2857
expressions matching this operator expression (the
+ − 2858
parse action tuple member may be omitted)
+ − 2859
"""
+ − 2860
ret = Forward()
+ − 2861
lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
+ − 2862
for i,operDef in enumerate(opList):
+ − 2863
opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
+ − 2864
thisExpr = Forward().setName("expr%d" % i)
+ − 2865
if rightLeftAssoc == opAssoc.LEFT:
+ − 2866
if arity == 1:
+ − 2867
matchExpr = Group( lastExpr + opExpr )
+ − 2868
elif arity == 2:
+ − 2869
matchExpr = Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
+ − 2870
else:
+ − 2871
raise ValueError, "operator must be unary (1) or binary (2)"
+ − 2872
elif rightLeftAssoc == opAssoc.RIGHT:
+ − 2873
if arity == 1:
+ − 2874
# try to avoid LR with this extra test
+ − 2875
if not isinstance(opExpr, Optional):
+ − 2876
opExpr = Optional(opExpr)
+ − 2877
matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
+ − 2878
elif arity == 2:
+ − 2879
matchExpr = Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
+ − 2880
else:
+ − 2881
raise ValueError, "operator must be unary (1) or binary (2)"
+ − 2882
else:
+ − 2883
raise ValueError, "operator must indicate right or left associativity"
+ − 2884
if pa:
+ − 2885
matchExpr.setParseAction( pa )
+ − 2886
thisExpr << ( matchExpr | lastExpr )
+ − 2887
lastExpr = thisExpr
+ − 2888
ret << lastExpr
+ − 2889
return ret
+ − 2890
+ − 2891
alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
+ − 2892
punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
+ − 2893
+ − 2894
dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
+ − 2895
sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
+ − 2896
quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
+ − 2897
+ − 2898
anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_"))
+ − 2899
commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
+ − 2900
_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
+ − 2901
replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
+ − 2902
+ − 2903
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
+ − 2904
cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
+ − 2905
+ − 2906
htmlComment = Regex(r"<!--[\s\S]*?-->")
+ − 2907
restOfLine = Regex(r".*").leaveWhitespace()
+ − 2908
dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
+ − 2909
cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
+ − 2910
+ − 2911
javaStyleComment = cppStyleComment
+ − 2912
pythonStyleComment = Regex(r"#.*").setName("Python style comment")
+ − 2913
_noncomma = "".join( [ c for c in printables if c != "," ] )
+ − 2914
_commasepitem = Combine(OneOrMore(Word(_noncomma) +
+ − 2915
Optional( Word(" \t") +
+ − 2916
~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
+ − 2917
commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
+ − 2918
+ − 2919
+ − 2920
if __name__ == "__main__":
+ − 2921
+ − 2922
def test( teststring ):
+ − 2923
print teststring,"->",
+ − 2924
try:
+ − 2925
tokens = simpleSQL.parseString( teststring )
+ − 2926
tokenlist = tokens.asList()
+ − 2927
print tokenlist
+ − 2928
print "tokens = ", tokens
+ − 2929
print "tokens.columns =", tokens.columns
+ − 2930
print "tokens.tables =", tokens.tables
+ − 2931
print tokens.asXML("SQL",True)
+ − 2932
except ParseException, err:
+ − 2933
print err.line
+ − 2934
print " "*(err.column-1) + "^"
+ − 2935
print err
+ − 2936
print
+ − 2937
+ − 2938
selectToken = CaselessLiteral( "select" )
+ − 2939
fromToken = CaselessLiteral( "from" )
+ − 2940
+ − 2941
ident = Word( alphas, alphanums + "_$" )
+ − 2942
columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ − 2943
columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
+ − 2944
tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
+ − 2945
tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
+ − 2946
simpleSQL = ( selectToken + \
+ − 2947
( '*' | columnNameList ).setResultsName( "columns" ) + \
+ − 2948
fromToken + \
+ − 2949
tableNameList.setResultsName( "tables" ) )
+ − 2950
+ − 2951
test( "SELECT * from XYZZY, ABC" )
+ − 2952
test( "select * from SYS.XYZZY" )
+ − 2953
test( "Select A from Sys.dual" )
+ − 2954
test( "Select AA,BB,CC from Sys.dual" )
+ − 2955
test( "Select A, B, C from Sys.dual" )
+ − 2956
test( "Select A, B, C from Sys.dual" )
+ − 2957
test( "Xelect A, B, C from Sys.dual" )
+ − 2958
test( "Select A, B, C frox Sys.dual" )
+ − 2959
test( "Select" )
+ − 2960
test( "Select ^^^ frox Sys.dual" )
+ − 2961
test( "Select A, B, C from Sys.dual, Table2 " )